diff options
Diffstat (limited to 'eigen/unsupported/Eigen')
181 files changed, 0 insertions, 53343 deletions
diff --git a/eigen/unsupported/Eigen/AdolcForward b/eigen/unsupported/Eigen/AdolcForward deleted file mode 100644 index 15f5f07..0000000 --- a/eigen/unsupported/Eigen/AdolcForward +++ /dev/null @@ -1,156 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ADLOC_FORWARD -#define EIGEN_ADLOC_FORWARD - -//-------------------------------------------------------------------------------- -// -// This file provides support for adolc's adouble type in forward mode. -// ADOL-C is a C++ automatic differentiation library, -// see https://projects.coin-or.org/ADOL-C for more information. -// -// Note that the maximal number of directions is controlled by -// the preprocessor token NUMBER_DIRECTIONS. The default is 2. -// -//-------------------------------------------------------------------------------- - -#define ADOLC_TAPELESS -#ifndef NUMBER_DIRECTIONS -# define NUMBER_DIRECTIONS 2 -#endif -#include <adolc/adtl.h> - -// adolc defines some very stupid macros: -#if defined(malloc) -# undef malloc -#endif - -#if defined(calloc) -# undef calloc -#endif - -#if defined(realloc) -# undef realloc -#endif - -#include <Eigen/Core> - -namespace Eigen { - -/** - * \defgroup AdolcForward_Module Adolc forward module - * This module provides support for adolc's adouble type in forward mode. - * ADOL-C is a C++ automatic differentiation library, - * see https://projects.coin-or.org/ADOL-C for more information. - * It mainly consists in: - * - a struct Eigen::NumTraits<adtl::adouble> specialization - * - overloads of internal::* math function for adtl::adouble type. - * - * Note that the maximal number of directions is controlled by - * the preprocessor token NUMBER_DIRECTIONS. The default is 2. - * - * \code - * #include <unsupported/Eigen/AdolcSupport> - * \endcode - */ - //@{ - -} // namespace Eigen - -// Eigen's require a few additional functions which must be defined in the same namespace -// than the custom scalar type own namespace -namespace adtl { - -inline const adouble& conj(const adouble& x) { return x; } -inline const adouble& real(const adouble& x) { return x; } -inline adouble imag(const adouble&) { return 0.; } -inline adouble abs(const adouble& x) { return fabs(x); } -inline adouble abs2(const adouble& x) { return x*x; } - -} - -namespace Eigen { - -template<> struct NumTraits<adtl::adouble> - : NumTraits<double> -{ - typedef adtl::adouble Real; - typedef adtl::adouble NonInteger; - typedef adtl::adouble Nested; - enum { - IsComplex = 0, - IsInteger = 0, - IsSigned = 1, - RequireInitialization = 1, - ReadCost = 1, - AddCost = 1, - MulCost = 1 - }; -}; - -template<typename Functor> class AdolcForwardJacobian : public Functor -{ - typedef adtl::adouble ActiveScalar; -public: - - AdolcForwardJacobian() : Functor() {} - AdolcForwardJacobian(const Functor& f) : Functor(f) {} - - // forward constructors - template<typename T0> - AdolcForwardJacobian(const T0& a0) : Functor(a0) {} - template<typename T0, typename T1> - AdolcForwardJacobian(const T0& a0, const T1& a1) : Functor(a0, a1) {} - template<typename T0, typename T1, typename T2> - AdolcForwardJacobian(const T0& a0, const T1& a1, const T1& a2) : Functor(a0, a1, a2) {} - - typedef typename Functor::InputType InputType; - typedef typename Functor::ValueType ValueType; - typedef typename Functor::JacobianType JacobianType; - - typedef Matrix<ActiveScalar, InputType::SizeAtCompileTime, 1> ActiveInput; - typedef Matrix<ActiveScalar, ValueType::SizeAtCompileTime, 1> ActiveValue; - - void operator() (const InputType& x, ValueType* v, JacobianType* _jac) const - { - eigen_assert(v!=0); - if (!_jac) - { - Functor::operator()(x, v); - return; - } - - JacobianType& jac = *_jac; - - ActiveInput ax = x.template cast<ActiveScalar>(); - ActiveValue av(jac.rows()); - - for (int j=0; j<jac.cols(); j++) - for (int i=0; i<jac.cols(); i++) - ax[i].setADValue(j, i==j ? 1 : 0); - - Functor::operator()(ax, &av); - - for (int i=0; i<jac.rows(); i++) - { - (*v)[i] = av[i].getValue(); - for (int j=0; j<jac.cols(); j++) - jac.coeffRef(i,j) = av[i].getADValue(j); - } - } -protected: - -}; - -//@} - -} - -#endif // EIGEN_ADLOC_FORWARD diff --git a/eigen/unsupported/Eigen/AlignedVector3 b/eigen/unsupported/Eigen/AlignedVector3 deleted file mode 100644 index 47a86d4..0000000 --- a/eigen/unsupported/Eigen/AlignedVector3 +++ /dev/null @@ -1,224 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ALIGNED_VECTOR3 -#define EIGEN_ALIGNED_VECTOR3 - -#include <Eigen/Geometry> - -namespace Eigen { - -/** - * \defgroup AlignedVector3_Module Aligned vector3 module - * - * \code - * #include <unsupported/Eigen/AlignedVector3> - * \endcode - */ - //@{ - - -/** \class AlignedVector3 - * - * \brief A vectorization friendly 3D vector - * - * This class represents a 3D vector internally using a 4D vector - * such that vectorization can be seamlessly enabled. Of course, - * the same result can be achieved by directly using a 4D vector. - * This class makes this process simpler. - * - */ -// TODO specialize Cwise -template<typename _Scalar> class AlignedVector3; - -namespace internal { -template<typename _Scalar> struct traits<AlignedVector3<_Scalar> > - : traits<Matrix<_Scalar,3,1,0,4,1> > -{ -}; -} - -template<typename _Scalar> class AlignedVector3 - : public MatrixBase<AlignedVector3<_Scalar> > -{ - typedef Matrix<_Scalar,4,1> CoeffType; - CoeffType m_coeffs; - public: - - typedef MatrixBase<AlignedVector3<_Scalar> > Base; - EIGEN_DENSE_PUBLIC_INTERFACE(AlignedVector3) - using Base::operator*; - - inline Index rows() const { return 3; } - inline Index cols() const { return 1; } - - Scalar* data() { return m_coeffs.data(); } - const Scalar* data() const { return m_coeffs.data(); } - Index innerStride() const { return 1; } - Index outerStride() const { return 3; } - - inline const Scalar& coeff(Index row, Index col) const - { return m_coeffs.coeff(row, col); } - - inline Scalar& coeffRef(Index row, Index col) - { return m_coeffs.coeffRef(row, col); } - - inline const Scalar& coeff(Index index) const - { return m_coeffs.coeff(index); } - - inline Scalar& coeffRef(Index index) - { return m_coeffs.coeffRef(index);} - - - inline AlignedVector3(const Scalar& x, const Scalar& y, const Scalar& z) - : m_coeffs(x, y, z, Scalar(0)) - {} - - inline AlignedVector3(const AlignedVector3& other) - : Base(), m_coeffs(other.m_coeffs) - {} - - template<typename XprType, int Size=XprType::SizeAtCompileTime> - struct generic_assign_selector {}; - - template<typename XprType> struct generic_assign_selector<XprType,4> - { - inline static void run(AlignedVector3& dest, const XprType& src) - { - dest.m_coeffs = src; - } - }; - - template<typename XprType> struct generic_assign_selector<XprType,3> - { - inline static void run(AlignedVector3& dest, const XprType& src) - { - dest.m_coeffs.template head<3>() = src; - dest.m_coeffs.w() = Scalar(0); - } - }; - - template<typename Derived> - inline AlignedVector3(const MatrixBase<Derived>& other) - { - generic_assign_selector<Derived>::run(*this,other.derived()); - } - - inline AlignedVector3& operator=(const AlignedVector3& other) - { m_coeffs = other.m_coeffs; return *this; } - - template <typename Derived> - inline AlignedVector3& operator=(const MatrixBase<Derived>& other) - { - generic_assign_selector<Derived>::run(*this,other.derived()); - return *this; - } - - inline AlignedVector3 operator+(const AlignedVector3& other) const - { return AlignedVector3(m_coeffs + other.m_coeffs); } - - inline AlignedVector3& operator+=(const AlignedVector3& other) - { m_coeffs += other.m_coeffs; return *this; } - - inline AlignedVector3 operator-(const AlignedVector3& other) const - { return AlignedVector3(m_coeffs - other.m_coeffs); } - - inline AlignedVector3 operator-=(const AlignedVector3& other) - { m_coeffs -= other.m_coeffs; return *this; } - - inline AlignedVector3 operator*(const Scalar& s) const - { return AlignedVector3(m_coeffs * s); } - - inline friend AlignedVector3 operator*(const Scalar& s,const AlignedVector3& vec) - { return AlignedVector3(s * vec.m_coeffs); } - - inline AlignedVector3& operator*=(const Scalar& s) - { m_coeffs *= s; return *this; } - - inline AlignedVector3 operator/(const Scalar& s) const - { return AlignedVector3(m_coeffs / s); } - - inline AlignedVector3& operator/=(const Scalar& s) - { m_coeffs /= s; return *this; } - - inline Scalar dot(const AlignedVector3& other) const - { - eigen_assert(m_coeffs.w()==Scalar(0)); - eigen_assert(other.m_coeffs.w()==Scalar(0)); - return m_coeffs.dot(other.m_coeffs); - } - - inline void normalize() - { - m_coeffs /= norm(); - } - - inline AlignedVector3 normalized() const - { - return AlignedVector3(m_coeffs / norm()); - } - - inline Scalar sum() const - { - eigen_assert(m_coeffs.w()==Scalar(0)); - return m_coeffs.sum(); - } - - inline Scalar squaredNorm() const - { - eigen_assert(m_coeffs.w()==Scalar(0)); - return m_coeffs.squaredNorm(); - } - - inline Scalar norm() const - { - using std::sqrt; - return sqrt(squaredNorm()); - } - - inline AlignedVector3 cross(const AlignedVector3& other) const - { - return AlignedVector3(m_coeffs.cross3(other.m_coeffs)); - } - - template<typename Derived> - inline bool isApprox(const MatrixBase<Derived>& other, const RealScalar& eps=NumTraits<Scalar>::dummy_precision()) const - { - return m_coeffs.template head<3>().isApprox(other,eps); - } - - CoeffType& coeffs() { return m_coeffs; } - const CoeffType& coeffs() const { return m_coeffs; } -}; - -namespace internal { - -template<typename _Scalar> -struct eval<AlignedVector3<_Scalar>, Dense> -{ - typedef const AlignedVector3<_Scalar>& type; -}; - -template<typename Scalar> -struct evaluator<AlignedVector3<Scalar> > - : evaluator<Matrix<Scalar,4,1> > -{ - typedef AlignedVector3<Scalar> XprType; - typedef evaluator<Matrix<Scalar,4,1> > Base; - - evaluator(const XprType &m) : Base(m.coeffs()) {} -}; - -} - -//@} - -} - -#endif // EIGEN_ALIGNED_VECTOR3 diff --git a/eigen/unsupported/Eigen/ArpackSupport b/eigen/unsupported/Eigen/ArpackSupport deleted file mode 100644 index 37a2799..0000000 --- a/eigen/unsupported/Eigen/ArpackSupport +++ /dev/null @@ -1,31 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ARPACKSUPPORT_MODULE_H -#define EIGEN_ARPACKSUPPORT_MODULE_H - -#include <Eigen/Core> - -#include <Eigen/src/Core/util/DisableStupidWarnings.h> - -/** \defgroup ArpackSupport_Module Arpack support module - * - * This module provides a wrapper to Arpack, a library for sparse eigenvalue decomposition. - * - * \code - * #include <Eigen/ArpackSupport> - * \endcode - */ - -#include <Eigen/SparseCholesky> -#include "src/Eigenvalues/ArpackSelfAdjointEigenSolver.h" - -#include <Eigen/src/Core/util/ReenableStupidWarnings.h> - -#endif // EIGEN_ARPACKSUPPORT_MODULE_H -/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/eigen/unsupported/Eigen/AutoDiff b/eigen/unsupported/Eigen/AutoDiff deleted file mode 100644 index abf5b7d..0000000 --- a/eigen/unsupported/Eigen/AutoDiff +++ /dev/null @@ -1,40 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_AUTODIFF_MODULE -#define EIGEN_AUTODIFF_MODULE - -namespace Eigen { - -/** - * \defgroup AutoDiff_Module Auto Diff module - * - * This module features forward automatic differentation via a simple - * templated scalar type wrapper AutoDiffScalar. - * - * Warning : this should NOT be confused with numerical differentiation, which - * is a different method and has its own module in Eigen : \ref NumericalDiff_Module. - * - * \code - * #include <unsupported/Eigen/AutoDiff> - * \endcode - */ -//@{ - -} - -#include "src/AutoDiff/AutoDiffScalar.h" -// #include "src/AutoDiff/AutoDiffVector.h" -#include "src/AutoDiff/AutoDiffJacobian.h" - -namespace Eigen { -//@} -} - -#endif // EIGEN_AUTODIFF_MODULE diff --git a/eigen/unsupported/Eigen/BVH b/eigen/unsupported/Eigen/BVH deleted file mode 100644 index 0161a54..0000000 --- a/eigen/unsupported/Eigen/BVH +++ /dev/null @@ -1,95 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Ilya Baran <ibaran@mit.edu> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_BVH_MODULE_H -#define EIGEN_BVH_MODULE_H - -#include <Eigen/Core> -#include <Eigen/Geometry> -#include <Eigen/StdVector> -#include <algorithm> -#include <queue> - -namespace Eigen { - -/** - * \defgroup BVH_Module BVH module - * \brief This module provides generic bounding volume hierarchy algorithms - * and reference tree implementations. - * - * - * \code - * #include <unsupported/Eigen/BVH> - * \endcode - * - * A bounding volume hierarchy (BVH) can accelerate many geometric queries. This module provides a generic implementation - * of the two basic algorithms over a BVH: intersection of a query object against all objects in the hierarchy and minimization - * of a function over the objects in the hierarchy. It also provides intersection and minimization over a cartesian product of - * two BVH's. A BVH accelerates intersection by using the fact that if a query object does not intersect a volume, then it cannot - * intersect any object contained in that volume. Similarly, a BVH accelerates minimization because the minimum of a function - * over a volume is no greater than the minimum of a function over any object contained in it. - * - * Some sample queries that can be written in terms of intersection are: - * - Determine all points where a ray intersects a triangle mesh - * - Given a set of points, determine which are contained in a query sphere - * - Given a set of spheres, determine which contain the query point - * - Given a set of disks, determine if any is completely contained in a query rectangle (represent each 2D disk as a point \f$(x,y,r)\f$ - * in 3D and represent the rectangle as a pyramid based on the original rectangle and shrinking in the \f$r\f$ direction) - * - Given a set of points, count how many pairs are \f$d\pm\epsilon\f$ apart (done by looking at the cartesian product of the set - * of points with itself) - * - * Some sample queries that can be written in terms of function minimization over a set of objects are: - * - Find the intersection between a ray and a triangle mesh closest to the ray origin (function is infinite off the ray) - * - Given a polyline and a query point, determine the closest point on the polyline to the query - * - Find the diameter of a point cloud (done by looking at the cartesian product and using negative distance as the function) - * - Determine how far two meshes are from colliding (this is also a cartesian product query) - * - * This implementation decouples the basic algorithms both from the type of hierarchy (and the types of the bounding volumes) and - * from the particulars of the query. To enable abstraction from the BVH, the BVH is required to implement a generic mechanism - * for traversal. To abstract from the query, the query is responsible for keeping track of results. - * - * To be used in the algorithms, a hierarchy must implement the following traversal mechanism (see KdBVH for a sample implementation): \code - typedef Volume //the type of bounding volume - typedef Object //the type of object in the hierarchy - typedef Index //a reference to a node in the hierarchy--typically an int or a pointer - typedef VolumeIterator //an iterator type over node children--returns Index - typedef ObjectIterator //an iterator over object (leaf) children--returns const Object & - Index getRootIndex() const //returns the index of the hierarchy root - const Volume &getVolume(Index index) const //returns the bounding volume of the node at given index - void getChildren(Index index, VolumeIterator &outVBegin, VolumeIterator &outVEnd, - ObjectIterator &outOBegin, ObjectIterator &outOEnd) const - //getChildren takes a node index and makes [outVBegin, outVEnd) range over its node children - //and [outOBegin, outOEnd) range over its object children - \endcode - * - * To use the hierarchy, call BVIntersect or BVMinimize, passing it a BVH (or two, for cartesian product) and a minimizer or intersector. - * For an intersection query on a single BVH, the intersector encapsulates the query and must provide two functions: - * \code - bool intersectVolume(const Volume &volume) //returns true if the query intersects the volume - bool intersectObject(const Object &object) //returns true if the intersection search should terminate immediately - \endcode - * The guarantee that BVIntersect provides is that intersectObject will be called on every object whose bounding volume - * intersects the query (but possibly on other objects too) unless the search is terminated prematurely. It is the - * responsibility of the intersectObject function to keep track of the results in whatever manner is appropriate. - * The cartesian product intersection and the BVMinimize queries are similar--see their individual documentation. - * - * The following is a simple but complete example for how to use the BVH to accelerate the search for a closest red-blue point pair: - * \include BVH_Example.cpp - * Output: \verbinclude BVH_Example.out - */ -} - -//@{ - -#include "src/BVH/BVAlgorithms.h" -#include "src/BVH/KdBVH.h" - -//@} - -#endif // EIGEN_BVH_MODULE_H diff --git a/eigen/unsupported/Eigen/CMakeLists.txt b/eigen/unsupported/Eigen/CMakeLists.txt deleted file mode 100644 index 631a060..0000000 --- a/eigen/unsupported/Eigen/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -set(Eigen_HEADERS - AdolcForward - AlignedVector3 - ArpackSupport - AutoDiff - BVH - EulerAngles - FFT - IterativeSolvers - KroneckerProduct - LevenbergMarquardt - MatrixFunctions - MoreVectorization - MPRealSupport - NonLinearOptimization - NumericalDiff - OpenGLSupport - Polynomials - Skyline - SparseExtra - SpecialFunctions - Splines - ) - -install(FILES - ${Eigen_HEADERS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel - ) - -install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") - -add_subdirectory(CXX11) diff --git a/eigen/unsupported/Eigen/CXX11/CMakeLists.txt b/eigen/unsupported/Eigen/CXX11/CMakeLists.txt deleted file mode 100644 index 385ed24..0000000 --- a/eigen/unsupported/Eigen/CXX11/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -set(Eigen_CXX11_HEADERS Tensor TensorSymmetry ThreadPool) - -install(FILES - ${Eigen_CXX11_HEADERS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel - ) - -install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel FILES_MATCHING PATTERN "*.h") diff --git a/eigen/unsupported/Eigen/CXX11/Tensor b/eigen/unsupported/Eigen/CXX11/Tensor deleted file mode 100644 index bb6523d..0000000 --- a/eigen/unsupported/Eigen/CXX11/Tensor +++ /dev/null @@ -1,154 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -//#ifndef EIGEN_CXX11_TENSOR_MODULE -//#define EIGEN_CXX11_TENSOR_MODULE - -#include "../../../Eigen/Core" - -#ifdef EIGEN_USE_SYCL -#undef min -#undef max -#undef isnan -#undef isinf -#undef isfinite -#include <SYCL/sycl.hpp> -#include <map> -#include <memory> -#include <utility> -#endif - -#include <Eigen/src/Core/util/DisableStupidWarnings.h> - -#include "../SpecialFunctions" -#include "src/util/CXX11Meta.h" -#include "src/util/MaxSizeVector.h" - -/** \defgroup CXX11_Tensor_Module Tensor Module - * - * This module provides a Tensor class for storing arbitrarily indexed - * objects. - * - * \code - * #include <Eigen/CXX11/Tensor> - * \endcode - * - * Much of the documentation can be found \ref eigen_tensors "here". - */ - -#include <cmath> -#include <cstddef> -#include <cstring> - -#ifdef _WIN32 -typedef __int16 int16_t; -typedef unsigned __int16 uint16_t; -typedef __int32 int32_t; -typedef unsigned __int32 uint32_t; -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -#else -#include <stdint.h> -#endif - -#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 -#include <random> -#endif - -#ifdef _WIN32 -#include <windows.h> -#elif defined(__APPLE__) -#include <mach/mach_time.h> -#else -#include <time.h> -#endif - -#ifdef EIGEN_USE_THREADS -#include "ThreadPool" -#endif - -#ifdef EIGEN_USE_GPU -#include <iostream> -#include <cuda_runtime.h> -#if __cplusplus >= 201103L -#include <atomic> -#include <unistd.h> -#endif -#endif - -#include "src/Tensor/TensorMacros.h" -#include "src/Tensor/TensorForwardDeclarations.h" -#include "src/Tensor/TensorMeta.h" -#include "src/Tensor/TensorFunctors.h" -#include "src/Tensor/TensorCostModel.h" -#include "src/Tensor/TensorDeviceDefault.h" -#include "src/Tensor/TensorDeviceThreadPool.h" -#include "src/Tensor/TensorDeviceCuda.h" -#include "src/Tensor/TensorDeviceSycl.h" -#include "src/Tensor/TensorIndexList.h" -#include "src/Tensor/TensorDimensionList.h" -#include "src/Tensor/TensorDimensions.h" -#include "src/Tensor/TensorInitializer.h" -#include "src/Tensor/TensorTraits.h" -#include "src/Tensor/TensorRandom.h" -#include "src/Tensor/TensorUInt128.h" -#include "src/Tensor/TensorIntDiv.h" -#include "src/Tensor/TensorGlobalFunctions.h" - -#include "src/Tensor/TensorBase.h" - -#include "src/Tensor/TensorEvaluator.h" -#include "src/Tensor/TensorExpr.h" -#include "src/Tensor/TensorReduction.h" -#include "src/Tensor/TensorReductionCuda.h" -#include "src/Tensor/TensorArgMax.h" -#include "src/Tensor/TensorConcatenation.h" -#include "src/Tensor/TensorContractionMapper.h" -#include "src/Tensor/TensorContractionBlocking.h" -#include "src/Tensor/TensorContraction.h" -#include "src/Tensor/TensorContractionThreadPool.h" -#include "src/Tensor/TensorContractionCuda.h" -#include "src/Tensor/TensorConversion.h" -#include "src/Tensor/TensorConvolution.h" -#include "src/Tensor/TensorFFT.h" -#include "src/Tensor/TensorPatch.h" -#include "src/Tensor/TensorImagePatch.h" -#include "src/Tensor/TensorVolumePatch.h" -#include "src/Tensor/TensorBroadcasting.h" -#include "src/Tensor/TensorChipping.h" -#include "src/Tensor/TensorInflation.h" -#include "src/Tensor/TensorLayoutSwap.h" -#include "src/Tensor/TensorMorphing.h" -#include "src/Tensor/TensorPadding.h" -#include "src/Tensor/TensorReverse.h" -#include "src/Tensor/TensorShuffling.h" -#include "src/Tensor/TensorStriding.h" -#include "src/Tensor/TensorCustomOp.h" -#include "src/Tensor/TensorEvalTo.h" -#include "src/Tensor/TensorForcedEval.h" -#include "src/Tensor/TensorGenerator.h" -#include "src/Tensor/TensorAssign.h" -#include "src/Tensor/TensorScan.h" - -#include "src/Tensor/TensorSycl.h" -#include "src/Tensor/TensorExecutor.h" -#include "src/Tensor/TensorDevice.h" - -#include "src/Tensor/TensorStorage.h" -#include "src/Tensor/Tensor.h" -#include "src/Tensor/TensorFixedSize.h" -#include "src/Tensor/TensorMap.h" -#include "src/Tensor/TensorRef.h" - -#include "src/Tensor/TensorIO.h" - -#include <Eigen/src/Core/util/ReenableStupidWarnings.h> - -//#endif // EIGEN_CXX11_TENSOR_MODULE diff --git a/eigen/unsupported/Eigen/CXX11/TensorSymmetry b/eigen/unsupported/Eigen/CXX11/TensorSymmetry deleted file mode 100644 index fb1b0c0..0000000 --- a/eigen/unsupported/Eigen/CXX11/TensorSymmetry +++ /dev/null @@ -1,42 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE -#define EIGEN_CXX11_TENSORSYMMETRY_MODULE - -#include <unsupported/Eigen/CXX11/Tensor> - -#include <Eigen/src/Core/util/DisableStupidWarnings.h> - -#include "src/util/CXX11Meta.h" - -/** \defgroup CXX11_TensorSymmetry_Module Tensor Symmetry Module - * - * This module provides a classes that allow for the definition of - * symmetries w.r.t. tensor indices. - * - * Including this module will implicitly include the Tensor module. - * - * \code - * #include <Eigen/TensorSymmetry> - * \endcode - */ - -#include "src/TensorSymmetry/util/TemplateGroupTheory.h" -#include "src/TensorSymmetry/Symmetry.h" -#include "src/TensorSymmetry/StaticSymmetry.h" -#include "src/TensorSymmetry/DynamicSymmetry.h" - -#include <Eigen/src/Core/util/ReenableStupidWarnings.h> - -#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/eigen/unsupported/Eigen/CXX11/ThreadPool b/eigen/unsupported/Eigen/CXX11/ThreadPool deleted file mode 100644 index 09d637e..0000000 --- a/eigen/unsupported/Eigen/CXX11/ThreadPool +++ /dev/null @@ -1,65 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_THREADPOOL_MODULE -#define EIGEN_CXX11_THREADPOOL_MODULE - -#include "../../../Eigen/Core" - -#include <Eigen/src/Core/util/DisableStupidWarnings.h> - -/** \defgroup CXX11_ThreadPool_Module C++11 ThreadPool Module - * - * This module provides 2 threadpool implementations - * - a simple reference implementation - * - a faster non blocking implementation - * - * This module requires C++11. - * - * \code - * #include <Eigen/CXX11/ThreadPool> - * \endcode - */ - - -// The code depends on CXX11, so only include the module if the -// compiler supports it. -#if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900 -#include <cstddef> -#include <cstring> -#include <stdint.h> -#include <time.h> - -#include <vector> -#include <atomic> -#include <condition_variable> -#include <deque> -#include <mutex> -#include <thread> -#include <functional> -#include <memory> - -#include "src/util/CXX11Meta.h" -#include "src/util/MaxSizeVector.h" - -#include "src/ThreadPool/ThreadLocal.h" -#include "src/ThreadPool/ThreadYield.h" -#include "src/ThreadPool/EventCount.h" -#include "src/ThreadPool/RunQueue.h" -#include "src/ThreadPool/ThreadPoolInterface.h" -#include "src/ThreadPool/ThreadEnvironment.h" -#include "src/ThreadPool/SimpleThreadPool.h" -#include "src/ThreadPool/NonBlockingThreadPool.h" - -#endif - -#include <Eigen/src/Core/util/ReenableStupidWarnings.h> - -#endif // EIGEN_CXX11_THREADPOOL_MODULE - diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md b/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md deleted file mode 100644 index da70fa2..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +++ /dev/null @@ -1,1760 +0,0 @@ -# Eigen Tensors {#eigen_tensors} - -Tensors are multidimensional arrays of elements. Elements are typically scalars, -but more complex types such as strings are also supported. - -[TOC] - -## Tensor Classes - -You can manipulate a tensor with one of the following classes. They all are in -the namespace `::Eigen.` - - -### Class Tensor<data_type, rank> - -This is the class to use to create a tensor and allocate memory for it. The -class is templatized with the tensor datatype, such as float or int, and the -tensor rank. The rank is the number of dimensions, for example rank 2 is a -matrix. - -Tensors of this class are resizable. For example, if you assign a tensor of a -different size to a Tensor, that tensor is resized to match its new value. - -#### Constructor `Tensor<data_type, rank>(size0, size1, ...)` - -Constructor for a Tensor. The constructor must be passed `rank` integers -indicating the sizes of the instance along each of the the `rank` -dimensions. - - // Create a tensor of rank 3 of sizes 2, 3, 4. This tensor owns - // memory to hold 24 floating point values (24 = 2 x 3 x 4). - Tensor<float, 3> t_3d(2, 3, 4); - - // Resize t_3d by assigning a tensor of different sizes, but same rank. - t_3d = Tensor<float, 3>(3, 4, 3); - -#### Constructor `Tensor<data_type, rank>(size_array)` - -Constructor where the sizes for the constructor are specified as an array of -values instead of an explicitly list of parameters. The array type to use is -`Eigen::array<Eigen::Index>`. The array can be constructed automatically -from an initializer list. - - // Create a tensor of strings of rank 2 with sizes 5, 7. - Tensor<string, 2> t_2d({5, 7}); - - -### Class `TensorFixedSize<data_type, Sizes<size0, size1, ...>>` - -Class to use for tensors of fixed size, where the size is known at compile -time. Fixed sized tensors can provide very fast computations because all their -dimensions are known by the compiler. FixedSize tensors are not resizable. - -If the total number of elements in a fixed size tensor is small enough the -tensor data is held onto the stack and does not cause heap allocation and free. - - // Create a 4 x 3 tensor of floats. - TensorFixedSize<float, Sizes<4, 3>> t_4x3; - -### Class `TensorMap<Tensor<data_type, rank>>` - -This is the class to use to create a tensor on top of memory allocated and -owned by another part of your code. It allows to view any piece of allocated -memory as a Tensor. Instances of this class do not own the memory where the -data are stored. - -A TensorMap is not resizable because it does not own the memory where its data -are stored. - -#### Constructor `TensorMap<Tensor<data_type, rank>>(data, size0, size1, ...)` - -Constructor for a Tensor. The constructor must be passed a pointer to the -storage for the data, and "rank" size attributes. The storage has to be -large enough to hold all the data. - - // Map a tensor of ints on top of stack-allocated storage. - int storage[128]; // 2 x 4 x 2 x 8 = 128 - TensorMap<Tensor<int, 4>> t_4d(storage, 2, 4, 2, 8); - - // The same storage can be viewed as a different tensor. - // You can also pass the sizes as an array. - TensorMap<Tensor<int, 2>> t_2d(storage, 16, 8); - - // You can also map fixed-size tensors. Here we get a 1d view of - // the 2d fixed-size tensor. - TensorFixedSize<float, Sizes<4, 5>> t_4x3; - TensorMap<Tensor<float, 1>> t_12(t_4x3.data(), 12); - - -#### Class `TensorRef` - -See Assigning to a TensorRef below. - -## Accessing Tensor Elements - -#### `<data_type> tensor(index0, index1...)` - -Return the element at position `(index0, index1...)` in tensor -`tensor`. You must pass as many parameters as the rank of `tensor`. -The expression can be used as an l-value to set the value of the element at the -specified position. The value returned is of the datatype of the tensor. - - // Set the value of the element at position (0, 1, 0); - Tensor<float, 3> t_3d(2, 3, 4); - t_3d(0, 1, 0) = 12.0f; - - // Initialize all elements to random values. - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 3; ++j) { - for (int k = 0; k < 4; ++k) { - t_3d(i, j, k) = ...some random value...; - } - } - } - - // Print elements of a tensor. - for (int i = 0; i < 2; ++i) { - LOG(INFO) << t_3d(i, 0, 0); - } - - -## TensorLayout - -The tensor library supports 2 layouts: `ColMajor` (the default) and -`RowMajor`. Only the default column major layout is currently fully -supported, and it is therefore not recommended to attempt to use the row major -layout at the moment. - -The layout of a tensor is optionally specified as part of its type. If not -specified explicitly column major is assumed. - - Tensor<float, 3, ColMajor> col_major; // equivalent to Tensor<float, 3> - TensorMap<Tensor<float, 3, RowMajor> > row_major(data, ...); - -All the arguments to an expression must use the same layout. Attempting to mix -different layouts will result in a compilation error. - -It is possible to change the layout of a tensor or an expression using the -`swap_layout()` method. Note that this will also reverse the order of the -dimensions. - - Tensor<float, 2, ColMajor> col_major(2, 4); - Tensor<float, 2, RowMajor> row_major(2, 4); - - Tensor<float, 2> col_major_result = col_major; // ok, layouts match - Tensor<float, 2> col_major_result = row_major; // will not compile - - // Simple layout swap - col_major_result = row_major.swap_layout(); - eigen_assert(col_major_result.dimension(0) == 4); - eigen_assert(col_major_result.dimension(1) == 2); - - // Swap the layout and preserve the order of the dimensions - array<int, 2> shuffle(1, 0); - col_major_result = row_major.swap_layout().shuffle(shuffle); - eigen_assert(col_major_result.dimension(0) == 2); - eigen_assert(col_major_result.dimension(1) == 4); - - -## Tensor Operations - -The Eigen Tensor library provides a vast library of operations on Tensors: -numerical operations such as addition and multiplication, geometry operations -such as slicing and shuffling, etc. These operations are available as methods -of the Tensor classes, and in some cases as operator overloads. For example -the following code computes the elementwise addition of two tensors: - - Tensor<float, 3> t1(2, 3, 4); - ...set some values in t1... - Tensor<float, 3> t2(2, 3, 4); - ...set some values in t2... - // Set t3 to the element wise sum of t1 and t2 - Tensor<float, 3> t3 = t1 + t2; - -While the code above looks easy enough, it is important to understand that the -expression `t1 + t2` is not actually adding the values of the tensors. The -expression instead constructs a "tensor operator" object of the class -TensorCwiseBinaryOp<scalar_sum>, which has references to the tensors -`t1` and `t2`. This is a small C++ object that knows how to add -`t1` and `t2`. It is only when the value of the expression is assigned -to the tensor `t3` that the addition is actually performed. Technically, -this happens through the overloading of `operator=()` in the Tensor class. - -This mechanism for computing tensor expressions allows for lazy evaluation and -optimizations which are what make the tensor library very fast. - -Of course, the tensor operators do nest, and the expression `t1 + t2 * 0.3f` -is actually represented with the (approximate) tree of operators: - - TensorCwiseBinaryOp<scalar_sum>(t1, TensorCwiseUnaryOp<scalar_mul>(t2, 0.3f)) - - -### Tensor Operations and C++ "auto" - -Because Tensor operations create tensor operators, the C++ `auto` keyword -does not have its intuitive meaning. Consider these 2 lines of code: - - Tensor<float, 3> t3 = t1 + t2; - auto t4 = t1 + t2; - -In the first line we allocate the tensor `t3` and it will contain the -result of the addition of `t1` and `t2`. In the second line, `t4` -is actually the tree of tensor operators that will compute the addition of -`t1` and `t2`. In fact, `t4` is *not* a tensor and you cannot get -the values of its elements: - - Tensor<float, 3> t3 = t1 + t2; - cout << t3(0, 0, 0); // OK prints the value of t1(0, 0, 0) + t2(0, 0, 0) - - auto t4 = t1 + t2; - cout << t4(0, 0, 0); // Compilation error! - -When you use `auto` you do not get a Tensor as a result but instead a -non-evaluated expression. So only use `auto` to delay evaluation. - -Unfortunately, there is no single underlying concrete type for holding -non-evaluated expressions, hence you have to use auto in the case when you do -want to hold non-evaluated expressions. - -When you need the results of set of tensor computations you have to assign the -result to a Tensor that will be capable of holding onto them. This can be -either a normal Tensor, a fixed size Tensor, or a TensorMap on an existing -piece of memory. All the following will work: - - auto t4 = t1 + t2; - - Tensor<float, 3> result = t4; // Could also be: result(t4); - cout << result(0, 0, 0); - - TensorMap<float, 4> result(<a float* with enough space>, <size0>, ...) = t4; - cout << result(0, 0, 0); - - TensorFixedSize<float, Sizes<size0, ...>> result = t4; - cout << result(0, 0, 0); - -Until you need the results, you can keep the operation around, and even reuse -it for additional operations. As long as you keep the expression as an -operation, no computation is performed. - - // One way to compute exp((t1 + t2) * 0.2f); - auto t3 = t1 + t2; - auto t4 = t3 * 0.2f; - auto t5 = t4.exp(); - Tensor<float, 3> result = t5; - - // Another way, exactly as efficient as the previous one: - Tensor<float, 3> result = ((t1 + t2) * 0.2f).exp(); - -### Controlling When Expression are Evaluated - -There are several ways to control when expressions are evaluated: - -* Assignment to a Tensor, TensorFixedSize, or TensorMap. -* Use of the eval() method. -* Assignment to a TensorRef. - -#### Assigning to a Tensor, TensorFixedSize, or TensorMap. - -The most common way to evaluate an expression is to assign it to a Tensor. In -the example below, the `auto` declarations make the intermediate values -"Operations", not Tensors, and do not cause the expressions to be evaluated. -The assignment to the Tensor `result` causes the evaluation of all the -operations. - - auto t3 = t1 + t2; // t3 is an Operation. - auto t4 = t3 * 0.2f; // t4 is an Operation. - auto t5 = t4.exp(); // t5 is an Operation. - Tensor<float, 3> result = t5; // The operations are evaluated. - -If you know the ranks and sizes of the Operation value you can assign the -Operation to a TensorFixedSize instead of a Tensor, which is a bit more -efficient. - - // We know that the result is a 4x4x2 tensor! - TensorFixedSize<float, Sizes<4, 4, 2>> result = t5; - -Simiarly, assigning an expression to a TensorMap causes its evaluation. Like -tensors of type TensorFixedSize, TensorMaps cannot be resized so they have to -have the rank and sizes of the expression that are assigned to them. - -#### Calling `eval()`. - -When you compute large composite expressions, you sometimes want to tell Eigen -that an intermediate value in the expression tree is worth evaluating ahead of -time. This is done by inserting a call to the `eval()` method of the -expression Operation. - - // The previous example could have been written: - Tensor<float, 3> result = ((t1 + t2) * 0.2f).exp(); - - // If you want to compute (t1 + t2) once ahead of time you can write: - Tensor<float, 3> result = ((t1 + t2).eval() * 0.2f).exp(); - -Semantically, calling `eval()` is equivalent to materializing the value of -the expression in a temporary Tensor of the right size. The code above in -effect does: - - // .eval() knows the size! - TensorFixedSize<float, Sizes<4, 4, 2>> tmp = t1 + t2; - Tensor<float, 3> result = (tmp * 0.2f).exp(); - -Note that the return value of `eval()` is itself an Operation, so the -following code does not do what you may think: - - // Here t3 is an evaluation Operation. t3 has not been evaluated yet. - auto t3 = (t1 + t2).eval(); - - // You can use t3 in another expression. Still no evaluation. - auto t4 = (t3 * 0.2f).exp(); - - // The value is evaluated when you assign the Operation to a Tensor, using - // an intermediate tensor to represent t3.x - Tensor<float, 3> result = t4; - -While in the examples above calling `eval()` does not make a difference in -performance, in other cases it can make a huge difference. In the expression -below the `broadcast()` expression causes the `X.maximum()` expression -to be evaluated many times: - - Tensor<...> X ...; - Tensor<...> Y = ((X - X.maximum(depth_dim).reshape(dims2d).broadcast(bcast)) - * beta).exp(); - -Inserting a call to `eval()` between the `maximum()` and -`reshape()` calls guarantees that maximum() is only computed once and -greatly speeds-up execution: - - Tensor<...> Y = - ((X - X.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)) - * beta).exp(); - -In the other example below, the tensor `Y` is both used in the expression -and its assignment. This is an aliasing problem and if the evaluation is not -done in the right order Y will be updated incrementally during the evaluation -resulting in bogus results: - - Tensor<...> Y ...; - Y = Y / (Y.sum(depth_dim).reshape(dims2d).broadcast(bcast)); - -Inserting a call to `eval()` between the `sum()` and `reshape()` -expressions ensures that the sum is computed before any updates to `Y` are -done. - - Y = Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); - -Note that an eval around the full right hand side expression is not needed -because the generated has to compute the i-th value of the right hand side -before assigning it to the left hand side. - -However, if you were assigning the expression value to a shuffle of `Y` -then you would need to force an eval for correctness by adding an `eval()` -call for the right hand side: - - Y.shuffle(...) = - (Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast))).eval(); - - -#### Assigning to a `TensorRef`. - -If you need to access only a few elements from the value of an expression you -can avoid materializing the value in a full tensor by using a TensorRef. - -A TensorRef is a small wrapper class for any Eigen Operation. It provides -overloads for the `()` operator that let you access individual values in -the expression. TensorRef is convenient, because the Operation themselves do -not provide a way to access individual elements. - - // Create a TensorRef for the expression. The expression is not - // evaluated yet. - TensorRef<Tensor<float, 3> > ref = ((t1 + t2) * 0.2f).exp(); - - // Use "ref" to access individual elements. The expression is evaluated - // on the fly. - float at_0 = ref(0, 0, 0); - cout << ref(0, 1, 0); - -Only use TensorRef when you need a subset of the values of the expression. -TensorRef only computes the values you access. However note that if you are -going to access all the values it will be much faster to materialize the -results in a Tensor first. - -In some cases, if the full Tensor result would be very large, you may save -memory by accessing it as a TensorRef. But not always. So don't count on it. - - -### Controlling How Expressions Are Evaluated - -The tensor library provides several implementations of the various operations -such as contractions and convolutions. The implementations are optimized for -different environments: single threaded on CPU, multi threaded on CPU, or on a -GPU using cuda. Additional implementations may be added later. - -You can choose which implementation to use with the `device()` call. If -you do not choose an implementation explicitly the default implementation that -uses a single thread on the CPU is used. - -The default implementation has been optimized for recent Intel CPUs, taking -advantage of SSE, AVX, and FMA instructions. Work is ongoing to tune the -library on ARM CPUs. Note that you need to pass compiler-dependent flags -to enable the use of SSE, AVX, and other instructions. - -For example, the following code adds two tensors using the default -single-threaded CPU implementation: - - Tensor<float, 2> a(30, 40); - Tensor<float, 2> b(30, 40); - Tensor<float, 2> c = a + b; - -To choose a different implementation you have to insert a `device()` call -before the assignment of the result. For technical C++ reasons this requires -that the Tensor for the result be declared on its own. This means that you -have to know the size of the result. - - Eigen::Tensor<float, 2> c(30, 40); - c.device(...) = a + b; - -The call to `device()` must be the last call on the left of the operator=. - -You must pass to the `device()` call an Eigen device object. There are -presently three devices you can use: DefaultDevice, ThreadPoolDevice and -GpuDevice. - - -#### Evaluating With the DefaultDevice - -This is exactly the same as not inserting a `device()` call. - - DefaultDevice my_device; - c.device(my_device) = a + b; - -#### Evaluating with a Thread Pool - - // Create the Eigen ThreadPoolDevice. - Eigen::ThreadPoolDevice my_device(4 /* number of threads to use */); - - // Now just use the device when evaluating expressions. - Eigen::Tensor<float, 2> c(30, 50); - c.device(my_device) = a.contract(b, dot_product_dims); - - -#### Evaluating On GPU - -This is presently a bit more complicated than just using a thread pool device. -You need to create a GPU device but you also need to explicitly allocate the -memory for tensors with cuda. - - -## API Reference - -### Datatypes - -In the documentation of the tensor methods and Operation we mention datatypes -that are tensor-type specific: - -#### `<Tensor-Type>::``Dimensions` - -Acts like an array of ints. Has an `int size` attribute, and can be -indexed like an array to access individual values. Used to represent the -dimensions of a tensor. See `dimensions()`. - -#### `<Tensor-Type>::``Index` - -Acts like an `int`. Used for indexing tensors along their dimensions. See -`operator()`, `dimension()`, and `size()`. - -#### `<Tensor-Type>::``Scalar` - -Represents the datatype of individual tensor elements. For example, for a -`Tensor<float>`, `Scalar` is the type `float`. See -`setConstant()`. - -#### `<Operation>` - -We use this pseudo type to indicate that a tensor Operation is returned by a -method. We indicate in the text the type and dimensions of the tensor that the -Operation returns after evaluation. - -The Operation will have to be evaluated, for example by assigning it to a -tensor, before you can access the values of the resulting tensor. You can also -access the values through a TensorRef. - - -## Built-in Tensor Methods - -These are usual C++ methods that act on tensors immediately. They are not -Operations which provide delayed evaluation of their results. Unless specified -otherwise, all the methods listed below are available on all tensor classes: -Tensor, TensorFixedSize, and TensorMap. - -## Metadata - -### `int NumDimensions` - -Constant value indicating the number of dimensions of a Tensor. This is also -known as the tensor "rank". - - Eigen::Tensor<float, 2> a(3, 4); - cout << "Dims " << a.NumDimensions; - => Dims 2 - -### `Dimensions dimensions()` - -Returns an array-like object representing the dimensions of the tensor. -The actual type of the `dimensions()` result is `<Tensor-Type>::``Dimensions`. - - Eigen::Tensor<float, 2> a(3, 4); - const Eigen::Tensor<float, 2>::Dimensions& d = a.dimensions(); - cout << "Dim size: " << d.size << ", dim 0: " << d[0] - << ", dim 1: " << d[1]; - => Dim size: 2, dim 0: 3, dim 1: 4 - -If you use a C++11 compiler, you can use `auto` to simplify the code: - - const auto& d = a.dimensions(); - cout << "Dim size: " << d.size << ", dim 0: " << d[0] - << ", dim 1: " << d[1]; - => Dim size: 2, dim 0: 3, dim 1: 4 - -### `Index dimension(Index n)` - -Returns the n-th dimension of the tensor. The actual type of the -`dimension()` result is `<Tensor-Type>::``Index`, but you can -always use it like an int. - - Eigen::Tensor<float, 2> a(3, 4); - int dim1 = a.dimension(1); - cout << "Dim 1: " << dim1; - => Dim 1: 4 - -### `Index size()` - -Returns the total number of elements in the tensor. This is the product of all -the tensor dimensions. The actual type of the `size()` result is -`<Tensor-Type>::``Index`, but you can always use it like an int. - - Eigen::Tensor<float, 2> a(3, 4); - cout << "Size: " << a.size(); - => Size: 12 - - -### Getting Dimensions From An Operation - -A few operations provide `dimensions()` directly, -e.g. `TensorReslicingOp`. Most operations defer calculating dimensions -until the operation is being evaluated. If you need access to the dimensions -of a deferred operation, you can wrap it in a TensorRef (see Assigning to a -TensorRef above), which provides `dimensions()` and `dimension()` as -above. - -TensorRef can also wrap the plain Tensor types, so this is a useful idiom in -templated contexts where the underlying object could be either a raw Tensor -or some deferred operation (e.g. a slice of a Tensor). In this case, the -template code can wrap the object in a TensorRef and reason about its -dimensionality while remaining agnostic to the underlying type. - - -## Constructors - -### Tensor - -Creates a tensor of the specified size. The number of arguments must be equal -to the rank of the tensor. The content of the tensor is not initialized. - - Eigen::Tensor<float, 2> a(3, 4); - cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; - => NumRows: 3 NumCols: 4 - -### TensorFixedSize - -Creates a tensor of the specified size. The number of arguments in the Sizes<> -template parameter determines the rank of the tensor. The content of the tensor -is not initialized. - - Eigen::TensorFixedSize<float, Sizes<3, 4>> a; - cout << "Rank: " << a.rank() << endl; - => Rank: 2 - cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; - => NumRows: 3 NumCols: 4 - -### TensorMap - -Creates a tensor mapping an existing array of data. The data must not be freed -until the TensorMap is discarded, and the size of the data must be large enough -to accommodate the coefficients of the tensor. - - float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - Eigen::TensorMap<Tensor<float, 2>> a(data, 3, 4); - cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; - => NumRows: 3 NumCols: 4 - cout << "a(1, 2): " << a(1, 2) << endl; - => a(1, 2): 7 - - -## Contents Initialization - -When a new Tensor or a new TensorFixedSize are created, memory is allocated to -hold all the tensor elements, but the memory is not initialized. Similarly, -when a new TensorMap is created on top of non-initialized memory the memory its -contents are not initialized. - -You can use one of the methods below to initialize the tensor memory. These -have an immediate effect on the tensor and return the tensor itself as a -result. These are not tensor Operations which delay evaluation. - -### `<Tensor-Type> setConstant(const Scalar& val)` - -Sets all elements of the tensor to the constant value `val`. `Scalar` -is the type of data stored in the tensor. You can pass any value that is -convertible to that type. - -Returns the tensor itself in case you want to chain another call. - - a.setConstant(12.3f); - cout << "Constant: " << endl << a << endl << endl; - => - Constant: - 12.3 12.3 12.3 12.3 - 12.3 12.3 12.3 12.3 - 12.3 12.3 12.3 12.3 - -Note that `setConstant()` can be used on any tensor where the element type -has a copy constructor and an `operator=()`: - - Eigen::Tensor<string, 2> a(2, 3); - a.setConstant("yolo"); - cout << "String tensor: " << endl << a << endl << endl; - => - String tensor: - yolo yolo yolo - yolo yolo yolo - - -### `<Tensor-Type> setZero()` - -Fills the tensor with zeros. Equivalent to `setConstant(Scalar(0))`. -Returns the tensor itself in case you want to chain another call. - - a.setZero(); - cout << "Zeros: " << endl << a << endl << endl; - => - Zeros: - 0 0 0 0 - 0 0 0 0 - 0 0 0 0 - - -### `<Tensor-Type> setValues({..initializer_list})` - -Fills the tensor with explicit values specified in a std::initializer_list. -The type of the initializer list depends on the type and rank of the tensor. - -If the tensor has rank N, the initializer list must be nested N times. The -most deeply nested lists must contains P scalars of the Tensor type where P is -the size of the last dimension of the Tensor. - -For example, for a `TensorFixedSize<float, 2, 3>` the initializer list must -contains 2 lists of 3 floats each. - -`setValues()` returns the tensor itself in case you want to chain another -call. - - Eigen::Tensor<float, 2> a(2, 3); - a.setValues({{0.0f, 1.0f, 2.0f}, {3.0f, 4.0f, 5.0f}}); - cout << "a" << endl << a << endl << endl; - => - a - 0 1 2 - 3 4 5 - -If a list is too short, the corresponding elements of the tensor will not be -changed. This is valid at each level of nesting. For example the following -code only sets the values of the first row of the tensor. - - Eigen::Tensor<int, 2> a(2, 3); - a.setConstant(1000); - a.setValues({{10, 20, 30}}); - cout << "a" << endl << a << endl << endl; - => - a - 10 20 30 - 1000 1000 1000 - -### `<Tensor-Type> setRandom()` - -Fills the tensor with random values. Returns the tensor itself in case you -want to chain another call. - - a.setRandom(); - cout << "Random: " << endl << a << endl << endl; - => - Random: - 0.680375 0.59688 -0.329554 0.10794 - -0.211234 0.823295 0.536459 -0.0452059 - 0.566198 -0.604897 -0.444451 0.257742 - -You can customize `setRandom()` by providing your own random number -generator as a template argument: - - a.setRandom<MyRandomGenerator>(); - -Here, `MyRandomGenerator` must be a struct with the following member -functions, where Scalar and Index are the same as `<Tensor-Type>::``Scalar` -and `<Tensor-Type>::``Index`. - -See `struct UniformRandomGenerator` in TensorFunctors.h for an example. - - // Custom number generator for use with setRandom(). - struct MyRandomGenerator { - // Default and copy constructors. Both are needed - MyRandomGenerator() { } - MyRandomGenerator(const MyRandomGenerator& ) { } - - // Return a random value to be used. "element_location" is the - // location of the entry to set in the tensor, it can typically - // be ignored. - Scalar operator()(Eigen::DenseIndex element_location, - Eigen::DenseIndex /*unused*/ = 0) const { - return <randomly generated value of type T>; - } - - // Same as above but generates several numbers at a time. - typename internal::packet_traits<Scalar>::type packetOp( - Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const { - return <a packet of randomly generated values>; - } - }; - -You can also use one of the 2 random number generators that are part of the -tensor library: -* UniformRandomGenerator -* NormalRandomGenerator - - -## Data Access - -The Tensor, TensorFixedSize, and TensorRef classes provide the following -accessors to access the tensor coefficients: - - const Scalar& operator()(const array<Index, NumIndices>& indices) - const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) - Scalar& operator()(const array<Index, NumIndices>& indices) - Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) - -The number of indices must be equal to the rank of the tensor. Moreover, these -accessors are not available on tensor expressions. In order to access the -values of a tensor expression, the expression must either be evaluated or -wrapped in a TensorRef. - - -### `Scalar* data()` and `const Scalar* data() const` - -Returns a pointer to the storage for the tensor. The pointer is const if the -tensor was const. This allows direct access to the data. The layout of the -data depends on the tensor layout: RowMajor or ColMajor. - -This access is usually only needed for special cases, for example when mixing -Eigen Tensor code with other libraries. - -Scalar is the type of data stored in the tensor. - - Eigen::Tensor<float, 2> a(3, 4); - float* a_data = a.data(); - a_data[0] = 123.45f; - cout << "a(0, 0): " << a(0, 0); - => a(0, 0): 123.45 - - -## Tensor Operations - -All the methods documented below return non evaluated tensor `Operations`. -These can be chained: you can apply another Tensor Operation to the value -returned by the method. - -The chain of Operation is evaluated lazily, typically when it is assigned to a -tensor. See "Controlling when Expression are Evaluated" for more details about -their evaluation. - -### `<Operation> constant(const Scalar& val)` - -Returns a tensor of the same type and dimensions as the original tensor but -where all elements have the value `val`. - -This is useful, for example, when you want to add or subtract a constant from a -tensor, or multiply every element of a tensor by a scalar. - - Eigen::Tensor<float, 2> a(2, 3); - a.setConstant(1.0f); - Eigen::Tensor<float, 2> b = a + a.constant(2.0f); - Eigen::Tensor<float, 2> c = b * b.constant(0.2f); - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - cout << "c" << endl << c << endl << endl; - => - a - 1 1 1 - 1 1 1 - - b - 3 3 3 - 3 3 3 - - c - 0.6 0.6 0.6 - 0.6 0.6 0.6 - -### `<Operation> random()` - -Returns a tensor of the same type and dimensions as the current tensor -but where all elements have random values. - -This is for example useful to add random values to an existing tensor. -The generation of random values can be customized in the same manner -as for `setRandom()`. - - Eigen::Tensor<float, 2> a(2, 3); - a.setConstant(1.0f); - Eigen::Tensor<float, 2> b = a + a.random(); - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 1 1 1 - 1 1 1 - - b - 1.68038 1.5662 1.82329 - 0.788766 1.59688 0.395103 - - -## Unary Element Wise Operations - -All these operations take a single input tensor as argument and return a tensor -of the same type and dimensions as the tensor to which they are applied. The -requested operations are applied to each element independently. - -### `<Operation> operator-()` - -Returns a tensor of the same type and dimensions as the original tensor -containing the opposite values of the original tensor. - - Eigen::Tensor<float, 2> a(2, 3); - a.setConstant(1.0f); - Eigen::Tensor<float, 2> b = -a; - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 1 1 1 - 1 1 1 - - b - -1 -1 -1 - -1 -1 -1 - -### `<Operation> sqrt()` - -Returns a tensor of the same type and dimensions as the original tensor -containing the square roots of the original tensor. - -### `<Operation> rsqrt()` - -Returns a tensor of the same type and dimensions as the original tensor -containing the inverse square roots of the original tensor. - -### `<Operation> square()` - -Returns a tensor of the same type and dimensions as the original tensor -containing the squares of the original tensor values. - -### `<Operation> inverse()` - -Returns a tensor of the same type and dimensions as the original tensor -containing the inverse of the original tensor values. - -### `<Operation> exp()` - -Returns a tensor of the same type and dimensions as the original tensor -containing the exponential of the original tensor. - -### `<Operation> log()` - -Returns a tensor of the same type and dimensions as the original tensor -containing the natural logarithms of the original tensor. - -### `<Operation> abs()` - -Returns a tensor of the same type and dimensions as the original tensor -containing the absolute values of the original tensor. - -### `<Operation> pow(Scalar exponent)` - -Returns a tensor of the same type and dimensions as the original tensor -containing the coefficients of the original tensor to the power of the -exponent. - -The type of the exponent, Scalar, is always the same as the type of the -tensor coefficients. For example, only integer exponents can be used in -conjuntion with tensors of integer values. - -You can use cast() to lift this restriction. For example this computes -cubic roots of an int Tensor: - - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{0, 1, 8}, {27, 64, 125}}); - Eigen::Tensor<double, 2> b = a.cast<double>().pow(1.0 / 3.0); - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 0 1 8 - 27 64 125 - - b - 0 1 2 - 3 4 5 - -### `<Operation> operator * (Scalar scale)` - -Multiplies all the coefficients of the input tensor by the provided scale. - -### `<Operation> cwiseMax(Scalar threshold)` -TODO - -### `<Operation> cwiseMin(Scalar threshold)` -TODO - -### `<Operation> unaryExpr(const CustomUnaryOp& func)` -TODO - - -## Binary Element Wise Operations - -These operations take two input tensors as arguments. The 2 input tensors should -be of the same type and dimensions. The result is a tensor of the same -dimensions as the tensors to which they are applied, and unless otherwise -specified it is also of the same type. The requested operations are applied to -each pair of elements independently. - -### `<Operation> operator+(const OtherDerived& other)` - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise sums of the inputs. - -### `<Operation> operator-(const OtherDerived& other)` - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise differences of the inputs. - -### `<Operation> operator*(const OtherDerived& other)` - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise products of the inputs. - -### `<Operation> operator/(const OtherDerived& other)` - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise quotients of the inputs. - -This operator is not supported for integer types. - -### `<Operation> cwiseMax(const OtherDerived& other)` - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise maximums of the inputs. - -### `<Operation> cwiseMin(const OtherDerived& other)` - -Returns a tensor of the same type and dimensions as the input tensors -containing the coefficient wise mimimums of the inputs. - -### `<Operation> Logical operators` - -The following logical operators are supported as well: - -* operator&&(const OtherDerived& other) -* operator||(const OtherDerived& other) -* operator<(const OtherDerived& other) -* operator<=(const OtherDerived& other) -* operator>(const OtherDerived& other) -* operator>=(const OtherDerived& other) -* operator==(const OtherDerived& other) -* operator!=(const OtherDerived& other) - -They all return a tensor of boolean values. - - -## Selection (select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) - -Selection is a coefficient-wise ternary operator that is the tensor equivalent -to the if-then-else operation. - - Tensor<bool, 3> if = ...; - Tensor<float, 3> then = ...; - Tensor<float, 3> else = ...; - Tensor<float, 3> result = if.select(then, else); - -The 3 arguments must be of the same dimensions, which will also be the dimension -of the result. The 'if' tensor must be of type boolean, the 'then' and the -'else' tensor must be of the same type, which will also be the type of the -result. - -Each coefficient in the result is equal to the corresponding coefficient in the -'then' tensor if the corresponding value in the 'if' tensor is true. If not, the -resulting coefficient will come from the 'else' tensor. - - -## Contraction - -Tensor *contractions* are a generalization of the matrix product to the -multidimensional case. - - // Create 2 matrices using tensors of rank 2 - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{1, 2, 3}, {6, 5, 4}}); - Eigen::Tensor<int, 2> b(3, 2); - b.setValues({{1, 2}, {4, 5}, {5, 6}}); - - // Compute the traditional matrix product - Eigen::array<Eigen::IndexPair<int>, 1> product_dims = { Eigen::IndexPair<int>(1, 0) }; - Eigen::Tensor<int, 2> AB = a.contract(b, product_dims); - - // Compute the product of the transpose of the matrices - Eigen::array<Eigen::IndexPair<int>, 1> transposed_product_dims = { Eigen::IndexPair<int>(0, 1) }; - Eigen::Tensor<int, 2> AtBt = a.contract(b, transposed_product_dims); - - // Contraction to scalar value using a double contraction. - // First coordinate of both tensors are contracted as well as both second coordinates, i.e., this computes the sum of the squares of the elements. - Eigen::array<Eigen::IndexPair<int>, 2> double_contraction_product_dims = { Eigen::IndexPair<int>(0, 0), Eigen::IndexPair<int>(1, 1) }; - Eigen::Tensor<int, 0> AdoubleContractedA = a.contract(a, double_contraction_product_dims); - - // Extracting the scalar value of the tensor contraction for further usage - int value = AdoubleContractedA(0); - -## Reduction Operations - -A *Reduction* operation returns a tensor with fewer dimensions than the -original tensor. The values in the returned tensor are computed by applying a -*reduction operator* to slices of values from the original tensor. You specify -the dimensions along which the slices are made. - -The Eigen Tensor library provides a set of predefined reduction operators such -as `maximum()` and `sum()` and lets you define additional operators by -implementing a few methods from a reductor template. - -### Reduction Dimensions - -All reduction operations take a single parameter of type -`<TensorType>::``Dimensions` which can always be specified as an array of -ints. These are called the "reduction dimensions." The values are the indices -of the dimensions of the input tensor over which the reduction is done. The -parameter can have at most as many element as the rank of the input tensor; -each element must be less than the tensor rank, as it indicates one of the -dimensions to reduce. - -Each dimension of the input tensor should occur at most once in the reduction -dimensions as the implementation does not remove duplicates. - -The order of the values in the reduction dimensions does not affect the -results, but the code may execute faster if you list the dimensions in -increasing order. - -Example: Reduction along one dimension. - - // Create a tensor of 2 dimensions - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{1, 2, 3}, {6, 5, 4}}); - // Reduce it along the second dimension (1)... - Eigen::array<int, 1> dims({1 /* dimension to reduce */}); - // ...using the "maximum" operator. - // The result is a tensor with one dimension. The size of - // that dimension is the same as the first (non-reduced) dimension of a. - Eigen::Tensor<int, 1> b = a.maximum(dims); - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 1 2 3 - 6 5 4 - - b - 3 - 6 - -Example: Reduction along two dimensions. - - Eigen::Tensor<float, 3, Eigen::ColMajor> a(2, 3, 4); - a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, - {7.0f, 6.0f, 5.0f, 4.0f}, - {8.0f, 9.0f, 10.0f, 11.0f}}, - {{12.0f, 13.0f, 14.0f, 15.0f}, - {19.0f, 18.0f, 17.0f, 16.0f}, - {20.0f, 21.0f, 22.0f, 23.0f}}}); - // The tensor a has 3 dimensions. We reduce along the - // first 2, resulting in a tensor with a single dimension - // of size 4 (the last dimension of a.) - // Note that we pass the array of reduction dimensions - // directly to the maximum() call. - Eigen::Tensor<float, 1, Eigen::ColMajor> b = - a.maximum(Eigen::array<int, 2>({0, 1})); - cout << "b" << endl << b << endl << endl; - => - b - 20 - 21 - 22 - 23 - -#### Reduction along all dimensions - -As a special case, if you pass no parameter to a reduction operation the -original tensor is reduced along *all* its dimensions. The result is a -scalar, represented as a zero-dimension tensor. - - Eigen::Tensor<float, 3> a(2, 3, 4); - a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, - {7.0f, 6.0f, 5.0f, 4.0f}, - {8.0f, 9.0f, 10.0f, 11.0f}}, - {{12.0f, 13.0f, 14.0f, 15.0f}, - {19.0f, 18.0f, 17.0f, 16.0f}, - {20.0f, 21.0f, 22.0f, 23.0f}}}); - // Reduce along all dimensions using the sum() operator. - Eigen::Tensor<float, 0> b = a.sum(); - cout << "b" << endl << b << endl << endl; - => - b - 276 - - -### `<Operation> sum(const Dimensions& new_dims)` -### `<Operation> sum()` - -Reduce a tensor using the sum() operator. The resulting values -are the sum of the reduced values. - -### `<Operation> mean(const Dimensions& new_dims)` -### `<Operation> mean()` - -Reduce a tensor using the mean() operator. The resulting values -are the mean of the reduced values. - -### `<Operation> maximum(const Dimensions& new_dims)` -### `<Operation> maximum()` - -Reduce a tensor using the maximum() operator. The resulting values are the -largest of the reduced values. - -### `<Operation> minimum(const Dimensions& new_dims)` -### `<Operation> minimum()` - -Reduce a tensor using the minimum() operator. The resulting values -are the smallest of the reduced values. - -### `<Operation> prod(const Dimensions& new_dims)` -### `<Operation> prod()` - -Reduce a tensor using the prod() operator. The resulting values -are the product of the reduced values. - -### `<Operation> all(const Dimensions& new_dims)` -### `<Operation> all()` -Reduce a tensor using the all() operator. Casts tensor to bool and then checks -whether all elements are true. Runs through all elements rather than -short-circuiting, so may be significantly inefficient. - -### `<Operation> any(const Dimensions& new_dims)` -### `<Operation> any()` -Reduce a tensor using the any() operator. Casts tensor to bool and then checks -whether any element is true. Runs through all elements rather than -short-circuiting, so may be significantly inefficient. - - -### `<Operation> reduce(const Dimensions& new_dims, const Reducer& reducer)` - -Reduce a tensor using a user-defined reduction operator. See `SumReducer` -in TensorFunctors.h for information on how to implement a reduction operator. - - -## Scan Operations - -A *Scan* operation returns a tensor with the same dimensions as the original -tensor. The operation performs an inclusive scan along the specified -axis, which means it computes a running total along the axis for a given -reduction operation. -If the reduction operation corresponds to summation, then this computes the -prefix sum of the tensor along the given axis. - -Example: -dd a comment to this line - - // Create a tensor of 2 dimensions - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{1, 2, 3}, {4, 5, 6}}); - // Scan it along the second dimension (1) using summation - Eigen::Tensor<int, 2> b = a.cumsum(1); - // The result is a tensor with the same size as the input - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 1 2 3 - 4 5 6 - - b - 1 3 6 - 4 9 15 - -### `<Operation> cumsum(const Index& axis)` - -Perform a scan by summing consecutive entries. - -### `<Operation> cumprod(const Index& axis)` - -Perform a scan by multiplying consecutive entries. - - -## Convolutions - -### `<Operation> convolve(const Kernel& kernel, const Dimensions& dims)` - -Returns a tensor that is the output of the convolution of the input tensor with the kernel, -along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor -which were part of the convolution will be reduced by the formula: -output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size). -The dimension sizes for dimensions that were not part of the convolution will remain the same. -Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the -convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is -for the last dimension). - - // Compute convolution along the second and third dimension. - Tensor<float, 4, DataLayout> input(3, 3, 7, 11); - Tensor<float, 2, DataLayout> kernel(2, 2); - Tensor<float, 4, DataLayout> output(3, 2, 6, 11); - input.setRandom(); - kernel.setRandom(); - - Eigen::array<ptrdiff_t, 2> dims({1, 2}); // Specify second and third dimension for convolution. - output = input.convolve(kernel, dims); - - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < 2; ++j) { - for (int k = 0; k < 6; ++k) { - for (int l = 0; l < 11; ++l) { - const float result = output(i,j,k,l); - const float expected = input(i,j+0,k+0,l) * kernel(0,0) + - input(i,j+1,k+0,l) * kernel(1,0) + - input(i,j+0,k+1,l) * kernel(0,1) + - input(i,j+1,k+1,l) * kernel(1,1); - VERIFY_IS_APPROX(result, expected); - } - } - } - } - - -## Geometrical Operations - -These operations return a Tensor with different dimensions than the original -Tensor. They can be used to access slices of tensors, see them with different -dimensions, or pad tensors with additional data. - -### `<Operation> reshape(const Dimensions& new_dims)` - -Returns a view of the input tensor that has been reshaped to the specified -new dimensions. The argument new_dims is an array of Index values. The -rank of the resulting tensor is equal to the number of elements in new_dims. - -The product of all the sizes in the new dimension array must be equal to -the number of elements in the input tensor. - - // Increase the rank of the input tensor by introducing a new dimension - // of size 1. - Tensor<float, 2> input(7, 11); - array<int, 3> three_dims{{7, 11, 1}}; - Tensor<float, 3> result = input.reshape(three_dims); - - // Decrease the rank of the input tensor by merging 2 dimensions; - array<int, 1> one_dim{{7 * 11}}; - Tensor<float, 1> result = input.reshape(one_dim); - -This operation does not move any data in the input tensor, so the resulting -contents of a reshaped Tensor depend on the data layout of the original Tensor. - -For example this is what happens when you `reshape()` a 2D ColMajor tensor -to one dimension: - - Eigen::Tensor<float, 2, Eigen::ColMajor> a(2, 3); - a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); - Eigen::array<Eigen::DenseIndex, 1> one_dim({3 * 2}); - Eigen::Tensor<float, 1, Eigen::ColMajor> b = a.reshape(one_dim); - cout << "b" << endl << b << endl; - => - b - 0 - 300 - 100 - 400 - 200 - 500 - -This is what happens when the 2D Tensor is RowMajor: - - Eigen::Tensor<float, 2, Eigen::RowMajor> a(2, 3); - a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); - Eigen::array<Eigen::DenseIndex, 1> one_dim({3 * 2}); - Eigen::Tensor<float, 1, Eigen::RowMajor> b = a.reshape(one_dim); - cout << "b" << endl << b << endl; - => - b - 0 - 100 - 200 - 300 - 400 - 500 - -The reshape operation is a lvalue. In other words, it can be used on the left -side of the assignment operator. - -The previous example can be rewritten as follow: - - Eigen::Tensor<float, 2, Eigen::ColMajor> a(2, 3); - a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); - Eigen::array<Eigen::DenseIndex, 2> two_dim({2, 3}); - Eigen::Tensor<float, 1, Eigen::ColMajor> b(6); - b.reshape(two_dim) = a; - cout << "b" << endl << b << endl; - => - b - 0 - 300 - 100 - 400 - 200 - 500 - -Note that "b" itself was not reshaped but that instead the assignment is done to -the reshape view of b. - - -### `<Operation> shuffle(const Shuffle& shuffle)` - -Returns a copy of the input tensor whose dimensions have been -reordered according to the specified permutation. The argument shuffle -is an array of Index values. Its size is the rank of the input -tensor. It must contain a permutation of 0, 1, ..., rank - 1. The i-th -dimension of the output tensor equals to the size of the shuffle[i]-th -dimension of the input tensor. For example: - - // Shuffle all dimensions to the left by 1. - Tensor<float, 3> input(20, 30, 50); - // ... set some values in input. - Tensor<float, 3> output = input.shuffle({1, 2, 0}) - - eigen_assert(output.dimension(0) == 30); - eigen_assert(output.dimension(1) == 50); - eigen_assert(output.dimension(2) == 20); - -Indices into the output tensor are shuffled accordingly to formulate -indices into the input tensor. For example, one can assert in the above -code snippet that: - - eigen_assert(output(3, 7, 11) == input(11, 3, 7)); - -In general, one can assert that - - eigen_assert(output(..., indices[shuffle[i]], ...) == - input(..., indices[i], ...)) - -The shuffle operation results in a lvalue, which means that it can be assigned -to. In other words, it can be used on the left side of the assignment operator. - -Let's rewrite the previous example to take advantage of this feature: - - // Shuffle all dimensions to the left by 1. - Tensor<float, 3> input(20, 30, 50); - // ... set some values in input. - Tensor<float, 3> output(30, 50, 20); - output.shuffle({2, 0, 1}) = input; - - -### `<Operation> stride(const Strides& strides)` - -Returns a view of the input tensor that strides (skips stride-1 -elements) along each of the dimensions. The argument strides is an -array of Index values. The dimensions of the resulting tensor are -ceil(input_dimensions[i] / strides[i]). - -For example this is what happens when you `stride()` a 2D tensor: - - Eigen::Tensor<int, 2> a(4, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}, {600, 700, 800}, {900, 1000, 1100}}); - Eigen::array<Eigen::DenseIndex, 2> strides({3, 2}); - Eigen::Tensor<int, 2> b = a.stride(strides); - cout << "b" << endl << b << endl; - => - b - 0 200 - 900 1100 - -It is possible to assign a tensor to a stride: - Tensor<float, 3> input(20, 30, 50); - // ... set some values in input. - Tensor<float, 3> output(40, 90, 200); - output.stride({2, 3, 4}) = input; - - -### `<Operation> slice(const StartIndices& offsets, const Sizes& extents)` - -Returns a sub-tensor of the given tensor. For each dimension i, the slice is -made of the coefficients stored between offset[i] and offset[i] + extents[i] in -the input tensor. - - Eigen::Tensor<int, 2> a(4, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}, - {600, 700, 800}, {900, 1000, 1100}}); - Eigen::array<int, 2> offsets = {1, 0}; - Eigen::array<int, 2> extents = {2, 2}; - Eigen::Tensor<int, 1> slice = a.slice(offsets, extents); - cout << "a" << endl << a << endl; - => - a - 0 100 200 - 300 400 500 - 600 700 800 - 900 1000 1100 - cout << "slice" << endl << slice << endl; - => - slice - 300 400 - 600 700 - - -### `<Operation> chip(const Index offset, const Index dim)` - -A chip is a special kind of slice. It is the subtensor at the given offset in -the dimension dim. The returned tensor has one fewer dimension than the input -tensor: the dimension dim is removed. - -For example, a matrix chip would be either a row or a column of the input -matrix. - - Eigen::Tensor<int, 2> a(4, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}, - {600, 700, 800}, {900, 1000, 1100}}); - Eigen::Tensor<int, 1> row_3 = a.chip(2, 0); - Eigen::Tensor<int, 1> col_2 = a.chip(1, 1); - cout << "a" << endl << a << endl; - => - a - 0 100 200 - 300 400 500 - 600 700 800 - 900 1000 1100 - cout << "row_3" << endl << row_3 << endl; - => - row_3 - 600 700 800 - cout << "col_2" << endl << col_2 << endl; - => - col_2 - 100 400 700 1000 - -It is possible to assign values to a tensor chip since the chip operation is a -lvalue. For example: - - Eigen::Tensor<int, 1> a(3); - a.setValues({{100, 200, 300}}); - Eigen::Tensor<int, 2> b(2, 3); - b.setZero(); - b.chip(0, 0) = a; - cout << "a" << endl << a << endl; - => - a - 100 - 200 - 300 - cout << "b" << endl << b << endl; - => - b - 100 200 300 - 0 0 0 - - -### `<Operation> reverse(const ReverseDimensions& reverse)` - -Returns a view of the input tensor that reverses the order of the coefficients -along a subset of the dimensions. The argument reverse is an array of boolean -values that indicates whether or not the order of the coefficients should be -reversed along each of the dimensions. This operation preserves the dimensions -of the input tensor. - -For example this is what happens when you `reverse()` the first dimension -of a 2D tensor: - - Eigen::Tensor<int, 2> a(4, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}, - {600, 700, 800}, {900, 1000, 1100}}); - Eigen::array<bool, 2> reverse({true, false}); - Eigen::Tensor<int, 2> b = a.reverse(reverse); - cout << "a" << endl << a << endl << "b" << endl << b << endl; - => - a - 0 100 200 - 300 400 500 - 600 700 800 - 900 1000 1100 - b - 900 1000 1100 - 600 700 800 - 300 400 500 - 0 100 200 - - -### `<Operation> broadcast(const Broadcast& broadcast)` - -Returns a view of the input tensor in which the input is replicated one to many -times. -The broadcast argument specifies how many copies of the input tensor need to be -made in each of the dimensions. - - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}}); - Eigen::array<int, 2> bcast({3, 2}); - Eigen::Tensor<int, 2> b = a.broadcast(bcast); - cout << "a" << endl << a << endl << "b" << endl << b << endl; - => - a - 0 100 200 - 300 400 500 - b - 0 100 200 0 100 200 - 300 400 500 300 400 500 - 0 100 200 0 100 200 - 300 400 500 300 400 500 - 0 100 200 0 100 200 - 300 400 500 300 400 500 - -### `<Operation> concatenate(const OtherDerived& other, Axis axis)` - -TODO - -### `<Operation> pad(const PaddingDimensions& padding)` - -Returns a view of the input tensor in which the input is padded with zeros. - - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{0, 100, 200}, {300, 400, 500}}); - Eigen::array<pair<int, int>, 2> paddings; - paddings[0] = make_pair(0, 1); - paddings[1] = make_pair(2, 3); - Eigen::Tensor<int, 2> b = a.pad(paddings); - cout << "a" << endl << a << endl << "b" << endl << b << endl; - => - a - 0 100 200 - 300 400 500 - b - 0 0 0 0 - 0 0 0 0 - 0 100 200 0 - 300 400 500 0 - 0 0 0 0 - 0 0 0 0 - 0 0 0 0 - - -### `<Operation> extract_patches(const PatchDims& patch_dims)` - -Returns a tensor of coefficient patches extracted from the input tensor, where -each patch is of dimension specified by 'patch_dims'. The returned tensor has -one greater dimension than the input tensor, which is used to index each patch. -The patch index in the output tensor depends on the data layout of the input -tensor: the patch index is the last dimension ColMajor layout, and the first -dimension in RowMajor layout. - -For example, given the following input tensor: - - Eigen::Tensor<float, 2, DataLayout> tensor(3,4); - tensor.setValues({{0.0f, 1.0f, 2.0f, 3.0f}, - {4.0f, 5.0f, 6.0f, 7.0f}, - {8.0f, 9.0f, 10.0f, 11.0f}}); - - cout << "tensor: " << endl << tensor << endl; -=> -tensor: - 0 1 2 3 - 4 5 6 7 - 8 9 10 11 - -Six 2x2 patches can be extracted and indexed using the following code: - - Eigen::Tensor<float, 3, DataLayout> patch; - Eigen::array<ptrdiff_t, 2> patch_dims; - patch_dims[0] = 2; - patch_dims[1] = 2; - patch = tensor.extract_patches(patch_dims); - for (int k = 0; k < 6; ++k) { - cout << "patch index: " << k << endl; - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 2; ++j) { - if (DataLayout == ColMajor) { - cout << patch(i, j, k) << " "; - } else { - cout << patch(k, i, j) << " "; - } - } - cout << endl; - } - } - -This code results in the following output when the data layout is ColMajor: - -patch index: 0 -0 1 -4 5 -patch index: 1 -4 5 -8 9 -patch index: 2 -1 2 -5 6 -patch index: 3 -5 6 -9 10 -patch index: 4 -2 3 -6 7 -patch index: 5 -6 7 -10 11 - -This code results in the following output when the data layout is RowMajor: -(NOTE: the set of patches is the same as in ColMajor, but are indexed differently). - -patch index: 0 -0 1 -4 5 -patch index: 1 -1 2 -5 6 -patch index: 2 -2 3 -6 7 -patch index: 3 -4 5 -8 9 -patch index: 4 -5 6 -9 10 -patch index: 5 -6 7 -10 11 - -### `<Operation> extract_image_patches(const Index patch_rows, const Index patch_cols, const Index row_stride, const Index col_stride, const PaddingType padding_type)` - -Returns a tensor of coefficient image patches extracted from the input tensor, -which is expected to have dimensions ordered as follows (depending on the data -layout of the input tensor, and the number of additional dimensions 'N'): - -*) ColMajor -1st dimension: channels (of size d) -2nd dimension: rows (of size r) -3rd dimension: columns (of size c) -4th-Nth dimension: time (for video) or batch (for bulk processing). - -*) RowMajor (reverse order of ColMajor) -1st-Nth dimension: time (for video) or batch (for bulk processing). -N+1'th dimension: columns (of size c) -N+2'th dimension: rows (of size r) -N+3'th dimension: channels (of size d) - -The returned tensor has one greater dimension than the input tensor, which is -used to index each patch. The patch index in the output tensor depends on the -data layout of the input tensor: the patch index is the 4'th dimension in -ColMajor layout, and the 4'th from the last dimension in RowMajor layout. - -For example, given the following input tensor with the following dimension -sizes: - *) depth: 2 - *) rows: 3 - *) columns: 5 - *) batch: 7 - - Tensor<float, 4> tensor(2,3,5,7); - Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout(); - -2x2 image patches can be extracted and indexed using the following code: - -*) 2D patch: ColMajor (patch indexed by second-to-last dimension) - Tensor<float, 5> twod_patch; - twod_patch = tensor.extract_image_patches<2, 2>(); - // twod_patch.dimension(0) == 2 - // twod_patch.dimension(1) == 2 - // twod_patch.dimension(2) == 2 - // twod_patch.dimension(3) == 3*5 - // twod_patch.dimension(4) == 7 - -*) 2D patch: RowMajor (patch indexed by the second dimension) - Tensor<float, 5, RowMajor> twod_patch_row_major; - twod_patch_row_major = tensor_row_major.extract_image_patches<2, 2>(); - // twod_patch_row_major.dimension(0) == 7 - // twod_patch_row_major.dimension(1) == 3*5 - // twod_patch_row_major.dimension(2) == 2 - // twod_patch_row_major.dimension(3) == 2 - // twod_patch_row_major.dimension(4) == 2 - -## Special Operations - -### `<Operation> cast<T>()` - -Returns a tensor of type T with the same dimensions as the original tensor. -The returned tensor contains the values of the original tensor converted to -type T. - - Eigen::Tensor<float, 2> a(2, 3); - Eigen::Tensor<int, 2> b = a.cast<int>(); - -This can be useful for example if you need to do element-wise division of -Tensors of integers. This is not currently supported by the Tensor library -but you can easily cast the tensors to floats to do the division: - - Eigen::Tensor<int, 2> a(2, 3); - a.setValues({{0, 1, 2}, {3, 4, 5}}); - Eigen::Tensor<int, 2> b = - (a.cast<float>() / a.constant(2).cast<float>()).cast<int>(); - cout << "a" << endl << a << endl << endl; - cout << "b" << endl << b << endl << endl; - => - a - 0 1 2 - 3 4 5 - - b - 0 0 1 - 1 2 2 - - -### `<Operation> eval()` - -TODO - - -## Representation of scalar values - -Scalar values are often represented by tensors of size 1 and rank 0.For example -Tensor<T, N>::maximum() currently returns a Tensor<T, 0>. Similarly, the inner -product of 2 1d tensors (through contractions) returns a 0d tensor. - -## Limitations - -* The number of tensor dimensions is currently limited to 250 when using a - compiler that supports cxx11. It is limited to only 5 for older compilers. -* The IndexList class requires a cxx11 compliant compiler. You can use an - array of indices instead if you don't have access to a modern compiler. -* On GPUs only floating point values are properly tested and optimized for. -* Complex and integer values are known to be broken on GPUs. If you try to use - them you'll most likely end up triggering a static assertion failure such as - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - - diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h deleted file mode 100644 index 00295a2..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ /dev/null @@ -1,527 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_H -#define EIGEN_CXX11_TENSOR_TENSOR_H - -namespace Eigen { - -/** \class Tensor - * \ingroup CXX11_Tensor_Module - * - * \brief The tensor class. - * - * The %Tensor class is the work-horse for all \em dense tensors within Eigen. - * - * The %Tensor class encompasses only dynamic-size objects so far. - * - * The first two template parameters are required: - * \tparam Scalar_ Numeric type, e.g. float, double, int or `std::complex<float>`. - * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). - * \tparam NumIndices_ Number of indices (i.e. rank of the tensor) - * - * The remaining template parameters are optional -- in most cases you don't have to worry about them. - * \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of either - * \b #AutoAlign or \b #DontAlign. - * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required - * for vectorization. It defaults to aligning tensors. Note that tensors currently do not support any operations that profit from vectorization. - * Support for such operations (i.e. adding two tensors etc.) is planned. - * - * You can access elements of tensors using normal subscripting: - * - * \code - * Eigen::Tensor<double, 4> t(10, 10, 10, 10); - * t(0, 1, 2, 3) = 42.0; - * \endcode - * - * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN. - * - * <i><b>Some notes:</b></i> - * - * <dl> - * <dt><b>Relation to other parts of Eigen:</b></dt> - * <dd>The midterm development goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that - * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code - * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor - * class does not provide any of these features and is only available as a stand-alone class that just allows for - * coefficient access. Also, when fixed-size tensors are implemented, the number of template arguments is likely to - * change dramatically.</dd> - * </dl> - * - * \ref TopicStorageOrders - */ - -template<typename Scalar_, int NumIndices_, int Options_, typename IndexType_> -class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > -{ - public: - typedef Tensor<Scalar_, NumIndices_, Options_, IndexType_> Self; - typedef TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > Base; - typedef typename Eigen::internal::nested<Self>::type Nested; - typedef typename internal::traits<Self>::StorageKind StorageKind; - typedef typename internal::traits<Self>::Index Index; - typedef Scalar_ Scalar; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename Base::CoeffReturnType CoeffReturnType; - - enum { - IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign), - Layout = Options_ & RowMajor ? RowMajor : ColMajor, - CoordAccess = true, - RawAccess = true - }; - - static const int Options = Options_; - static const int NumIndices = NumIndices_; - typedef DSizes<Index, NumIndices_> Dimensions; - - protected: - TensorStorage<Scalar, Dimensions, Options> m_storage; - -#ifdef EIGEN_HAS_SFINAE - template<typename CustomIndices> - struct isOfNormalIndex{ - static const bool is_array = internal::is_base_of<array<Index, NumIndices>, CustomIndices>::value; - static const bool is_int = NumTraits<CustomIndices>::IsInteger; - static const bool value = is_array | is_int; - }; -#endif - - public: - // Metadata - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } - - // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - // work, because that uses base().coeffRef() - and we don't yet - // implement a similar class hierarchy - inline Self& base() { return *this; } - inline const Self& base() const { return *this; } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#endif - - // normal indices - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const - { - eigen_internal_assert(checkIndexRange(indices)); - return m_storage.data()[linearizedIndex(indices)]; - } - - // custom indices -#ifdef EIGEN_HAS_SFINAE - template<typename CustomIndices, - EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) ) - > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(CustomIndices& indices) const - { - return coeff(internal::customIndices2Array<Index,NumIndices>(indices)); - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return m_storage.data()[0]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return m_storage.data()[index]; - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#endif - - // normal indices - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) - { - eigen_internal_assert(checkIndexRange(indices)); - return m_storage.data()[linearizedIndex(indices)]; - } - - // custom indices -#ifdef EIGEN_HAS_SFINAE - template<typename CustomIndices, - EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) ) - > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(CustomIndices& indices) - { - return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices)); - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return m_storage.data()[0]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - eigen_internal_assert(index >= 0 && index < size()); - return m_storage.data()[index]; - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const - { - return coeff(array<Index, 2>(i0, i1)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const - { - return coeff(array<Index, 3>(i0, i1, i2)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const - { - return coeff(array<Index, 4>(i0, i1, i2, i3)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const - { - return coeff(array<Index, 5>(i0, i1, i2, i3, i4)); - } -#endif - - // custom indices -#ifdef EIGEN_HAS_SFINAE - template<typename CustomIndices, - EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) ) - > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(CustomIndices& indices) const - { - return coeff(internal::customIndices2Array<Index,NumIndices>(indices)); - } -#endif - - // normal indices - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const - { - return coeff(indices); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return coeff(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()() const - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeff(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const - { - // The bracket operator is only for vectors, use the parenthesis operator instead. - EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeff(index); - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) - { - return coeffRef(array<Index, 2>(i0, i1)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) - { - return coeffRef(array<Index, 3>(i0, i1, i2)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) - { - return coeffRef(array<Index, 4>(i0, i1, i2, i3)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) - { - return coeffRef(array<Index, 5>(i0, i1, i2, i3, i4)); - } -#endif - - // normal indices - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices) - { - return coeffRef(indices); - } - - // custom indices -#ifdef EIGEN_HAS_SFINAE - template<typename CustomIndices, - EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) ) - > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(CustomIndices& indices) - { - return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices)); - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) - { - eigen_assert(index >= 0 && index < size()); - return coeffRef(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeffRef(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index) - { - // The bracket operator is only for vectors, use the parenthesis operator instead - EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeffRef(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor() - : m_storage() - { - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor(const Self& other) - : m_storage(other.m_storage) - { - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions) - : m_storage(firstDimension, otherDimensions...) - { - // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } -#else - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1) - : m_storage(dim1, array<Index, 1>(dim1)) - { - EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2) - : m_storage(dim1*dim2, array<Index, 2>(dim1, dim2)) - { - EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3) - : m_storage(dim1*dim2*dim3, array<Index, 3>(dim1, dim2, dim3)) - { - EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4) - : m_storage(dim1*dim2*dim3*dim4, array<Index, 4>(dim1, dim2, dim3, dim4)) - { - EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) - : m_storage(dim1*dim2*dim3*dim4*dim5, array<Index, 5>(dim1, dim2, dim3, dim4, dim5)) - { - EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - } -#endif - - /** Normal Dimension */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array<Index, NumIndices>& dimensions) - : m_storage(internal::array_prod(dimensions), dimensions) - { - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) - { - typedef TensorAssignOp<Tensor, const OtherDerived> Assign; - Assign assign(*this, other.derived()); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - } - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, WriteAccessors>& other) - { - typedef TensorAssignOp<Tensor, const OtherDerived> Assign; - Assign assign(*this, other.derived()); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other) - { - typedef TensorAssignOp<Tensor, const Tensor> Assign; - Assign assign(*this, other); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Tensor& operator=(const OtherDerived& other) - { - typedef TensorAssignOp<Tensor, const OtherDerived> Assign; - Assign assign(*this, other); - resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - void resize(Index firstDimension, IndexTypes... otherDimensions) - { - // The number of dimensions used to resize a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - resize(array<Index, NumIndices>{{firstDimension, otherDimensions...}}); - } -#endif - - /** Normal Dimension */ - EIGEN_DEVICE_FUNC void resize(const array<Index, NumIndices>& dimensions) - { - int i; - Index size = Index(1); - for (i = 0; i < NumIndices; i++) { - internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]); - size *= dimensions[i]; - } - #ifdef EIGEN_INITIALIZE_COEFFS - bool size_changed = size != this->size(); - m_storage.resize(size, dimensions); - if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - #else - m_storage.resize(size, dimensions); - #endif - } - - // Why this overload, DSizes is derived from array ??? // - EIGEN_DEVICE_FUNC void resize(const DSizes<Index, NumIndices>& dimensions) { - array<Index, NumIndices> dims; - for (int i = 0; i < NumIndices; ++i) { - dims[i] = dimensions[i]; - } - resize(dims); - } - - EIGEN_DEVICE_FUNC - void resize() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - // Nothing to do: rank 0 tensors have fixed size - } - - /** Custom Dimension */ -#ifdef EIGEN_HAS_SFINAE - template<typename CustomDimension, - EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomDimension>::value) ) - > - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(CustomDimension& dimensions) - { - resize(internal::customIndices2Array<Index,NumIndices>(dimensions)); - } -#endif - -#ifndef EIGEN_EMULATE_CXX11_META_H - template <typename std::ptrdiff_t... Indices> - EIGEN_DEVICE_FUNC - void resize(const Sizes<Indices...>& dimensions) { - array<Index, NumIndices> dims; - for (int i = 0; i < NumIndices; ++i) { - dims[i] = static_cast<Index>(dimensions[i]); - } - resize(dims); - } -#else - template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> - EIGEN_DEVICE_FUNC - void resize(const Sizes<V1, V2, V3, V4, V5>& dimensions) { - array<Index, NumIndices> dims; - for (int i = 0; i < NumIndices; ++i) { - dims[i] = static_cast<Index>(dimensions[i]); - } - resize(dims); - } -#endif - - protected: - - bool checkIndexRange(const array<Index, NumIndices>& indices) const - { - using internal::array_apply_and_reduce; - using internal::array_zip_and_reduce; - using internal::greater_equal_zero_op; - using internal::logical_and_op; - using internal::lesser_op; - - return - // check whether the indices are all >= 0 - array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) && - // check whether the indices fit in the dimensions - array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions()); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const - { - if (Options&RowMajor) { - return m_storage.dimensions().IndexOfRowMajor(indices); - } else { - return m_storage.dimensions().IndexOfColMajor(indices); - } - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h deleted file mode 100644 index d06f40c..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h +++ /dev/null @@ -1,299 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com> -// Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H -#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H - -namespace Eigen { -namespace internal { - -/** \class TensorIndexTuple - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor + Index Tuple class. - * - * - */ -template<typename XprType> -struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType> -{ - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef Tuple<Index, typename XprTraits::Scalar> Scalar; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename XprType> -struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense> -{ - typedef const TensorIndexTupleOp<XprType>& type; -}; - -template<typename XprType> -struct nested<TensorIndexTupleOp<XprType>, 1, - typename eval<TensorIndexTupleOp<XprType> >::type> -{ - typedef TensorIndexTupleOp<XprType> type; -}; - -} // end namespace internal - -template<typename XprType> -class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename Eigen::internal::nested<TensorIndexTupleOp>::type Nested; - typedef typename Eigen::internal::traits<TensorIndexTupleOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Index Index; - typedef Tuple<Index, typename XprType::CoeffReturnType> CoeffReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr) - : m_xpr(expr) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; -}; - -// Eval as rvalue -template<typename ArgType, typename Device> -struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> -{ - typedef TensorIndexTupleOp<ArgType> XprType; - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - static const int NumDims = internal::array_size<Dimensions>::value; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, - PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_impl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return CoeffReturnType(index, m_impl.coeff(index)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, 1); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - TensorEvaluator<ArgType, Device> m_impl; -}; - -namespace internal { - -/** \class TensorTupleIndex - * \ingroup CXX11_Tensor_Module - * - * \brief Converts to Tensor<Tuple<Index, Scalar> > and reduces to Tensor<Index>. - * - */ -template<typename ReduceOp, typename Dims, typename XprType> -struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType> -{ - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef Index Scalar; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value; - static const int Layout = XprTraits::Layout; -}; - -template<typename ReduceOp, typename Dims, typename XprType> -struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense> -{ - typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>& type; -}; - -template<typename ReduceOp, typename Dims, typename XprType> -struct nested<TensorTupleReducerOp<ReduceOp, Dims, XprType>, 1, - typename eval<TensorTupleReducerOp<ReduceOp, Dims, XprType> >::type> -{ - typedef TensorTupleReducerOp<ReduceOp, Dims, XprType> type; -}; - -} // end namespace internal - -template<typename ReduceOp, typename Dims, typename XprType> -class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename Eigen::internal::nested<TensorTupleReducerOp>::type Nested; - typedef typename Eigen::internal::traits<TensorTupleReducerOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Index Index; - typedef Index CoeffReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr, - const ReduceOp& reduce_op, - const int return_dim, - const Dims& reduce_dims) - : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - const ReduceOp& reduce_op() const { return m_reduce_op; } - - EIGEN_DEVICE_FUNC - const Dims& reduce_dims() const { return m_reduce_dims; } - - EIGEN_DEVICE_FUNC - int return_dim() const { return m_return_dim; } - - protected: - typename XprType::Nested m_xpr; - const ReduceOp m_reduce_op; - const int m_return_dim; - const Dims m_reduce_dims; -}; - -// Eval as rvalue -template<typename ReduceOp, typename Dims, typename ArgType, typename Device> -struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Device> -{ - typedef TensorTupleReducerOp<ReduceOp, Dims, ArgType> XprType; - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename TensorIndexTupleOp<ArgType>::CoeffReturnType TupleType; - typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Dimensions Dimensions; - typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions; - static const int NumDims = internal::array_size<InputDimensions>::value; - typedef array<Index, NumDims> StrideDims; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, - PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, - BlockAccess = false, - Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_orig_impl(op.expression(), device), - m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), - m_return_dim(op.return_dim()) { - - gen_strides(m_orig_impl.dimensions(), m_strides); - if (Layout == static_cast<int>(ColMajor)) { - const Index total_size = internal::array_prod(m_orig_impl.dimensions()); - m_stride_mod = (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : total_size; - } else { - const Index total_size = internal::array_prod(m_orig_impl.dimensions()); - m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size; - } - m_stride_div = m_strides[m_return_dim]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_impl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - const TupleType v = m_impl.coeff(index); - return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div; - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - const double compute_cost = 1.0 + - (m_return_dim < 0 ? 0.0 : (TensorOpCost::ModCost<Index>() + TensorOpCost::DivCost<Index>())); - return m_orig_impl.costPerCoeff(vectorized) + - m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost); - } - - private: - EIGEN_DEVICE_FUNC void gen_strides(const InputDimensions& dims, StrideDims& strides) { - if (m_return_dim < 0) { - return; // Won't be using the strides. - } - eigen_assert(m_return_dim < NumDims && - "Asking to convert index to a dimension outside of the rank"); - - // Calculate m_stride_div and m_stride_mod, which are used to - // calculate the value of an index w.r.t. the m_return_dim. - if (Layout == static_cast<int>(ColMajor)) { - strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - strides[i] = strides[i-1] * dims[i-1]; - } - } else { - strides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - strides[i] = strides[i+1] * dims[i+1]; - } - } - } - - protected: - TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> m_orig_impl; - TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device> m_impl; - const int m_return_dim; - StrideDims m_strides; - Index m_stride_mod; - Index m_stride_div; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h deleted file mode 100644 index 166be20..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ /dev/null @@ -1,181 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H -#define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H - -namespace Eigen { - -/** \class TensorAssign - * \ingroup CXX11_Tensor_Module - * - * \brief The tensor assignment class. - * - * This class is represents the assignment of the values resulting from the evaluation of - * the rhs expression to the memory locations denoted by the lhs expression. - */ -namespace internal { -template<typename LhsXprType, typename RhsXprType> -struct traits<TensorAssignOp<LhsXprType, RhsXprType> > -{ - typedef typename LhsXprType::Scalar Scalar; - typedef typename traits<LhsXprType>::StorageKind StorageKind; - typedef typename promote_index_type<typename traits<LhsXprType>::Index, - typename traits<RhsXprType>::Index>::type Index; - typedef typename LhsXprType::Nested LhsNested; - typedef typename RhsXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - static const std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions; - static const int Layout = internal::traits<LhsXprType>::Layout; - - enum { - Flags = 0 - }; -}; - -template<typename LhsXprType, typename RhsXprType> -struct eval<TensorAssignOp<LhsXprType, RhsXprType>, Eigen::Dense> -{ - typedef const TensorAssignOp<LhsXprType, RhsXprType>& type; -}; - -template<typename LhsXprType, typename RhsXprType> -struct nested<TensorAssignOp<LhsXprType, RhsXprType>, 1, typename eval<TensorAssignOp<LhsXprType, RhsXprType> >::type> -{ - typedef TensorAssignOp<LhsXprType, RhsXprType> type; -}; - -} // end namespace internal - - - -template<typename LhsXprType, typename RhsXprType> -class TensorAssignOp : public TensorBase<TensorAssignOp<LhsXprType, RhsXprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorAssignOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename LhsXprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorAssignOp>::type Nested; - typedef typename Eigen::internal::traits<TensorAssignOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorAssignOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs) - : m_lhs_xpr(lhs), m_rhs_xpr(rhs) {} - - /** \returns the nested expressions */ - EIGEN_DEVICE_FUNC - typename internal::remove_all<typename LhsXprType::Nested>::type& - lhsExpression() const { return *((typename internal::remove_all<typename LhsXprType::Nested>::type*)&m_lhs_xpr); } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename RhsXprType::Nested>::type& - rhsExpression() const { return m_rhs_xpr; } - - protected: - typename internal::remove_all<typename LhsXprType::Nested>::type& m_lhs_xpr; - const typename internal::remove_all<typename RhsXprType::Nested>::type& m_rhs_xpr; -}; - - -template<typename LeftArgType, typename RightArgType, typename Device> -struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device> -{ - typedef TensorAssignOp<LeftArgType, RightArgType> XprType; - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : - m_leftImpl(op.lhsExpression(), device), - m_rightImpl(op.rhsExpression(), device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - } - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const - { - // The dimensions of the lhs and the rhs tensors should be equal to prevent - // overflows and ensure the result is fully initialized. - // TODO: use left impl instead if right impl dimensions are known at compile time. - return m_rightImpl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); - m_leftImpl.evalSubExprsIfNeeded(NULL); - // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non - // null value), attempt to evaluate the rhs expression in place. Returns true iff in place - // evaluation isn't supported and the caller still needs to manually assign the values generated - // by the rhs to the lhs. - return m_rightImpl.evalSubExprsIfNeeded(m_leftImpl.data()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_leftImpl.cleanup(); - m_rightImpl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { - m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { - const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned; - const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned; - m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i)); - } - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_leftImpl.coeff(index); - } - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const - { - return m_leftImpl.template packet<LoadMode>(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - // We assume that evalPacket or evalScalar is called to perform the - // assignment and account for the cost of the write here, but reduce left - // cost by one load because we are using m_leftImpl.coeffRef. - TensorOpCost left = m_leftImpl.costPerCoeff(vectorized); - return m_rightImpl.costPerCoeff(vectorized) + - TensorOpCost( - numext::maxi(0.0, left.bytes_loaded() - sizeof(CoeffReturnType)), - left.bytes_stored(), left.compute_cycles()) + - TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); - } - - /// required by sycl in order to extract the accessor - const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); } - - private: - TensorEvaluator<LeftArgType, Device> m_leftImpl; - TensorEvaluator<RightArgType, Device> m_rightImpl; -}; - -} - - -#endif // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h deleted file mode 100644 index f573608..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ /dev/null @@ -1,1012 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_BASE_H -#define EIGEN_CXX11_TENSOR_TENSOR_BASE_H - -// clang-format off - -namespace Eigen { - -/** \class TensorBase - * \ingroup CXX11_Tensor_Module - * - * \brief The tensor base class. - * - * This class is the common parent of the Tensor and TensorMap class, thus - * making it possible to use either class interchangably in expressions. - */ -#ifndef EIGEN_PARSED_BY_DOXYGEN -// FIXME Doxygen does not like the inheritance with different template parameters -// Since there is no doxygen documentation inside, we disable it for now -template<typename Derived> -class TensorBase<Derived, ReadOnlyAccessors> -{ - public: - typedef internal::traits<Derived> DerivedTraits; - typedef typename DerivedTraits::Scalar Scalar; - typedef typename DerivedTraits::Index Index; - typedef typename internal::remove_const<Scalar>::type CoeffReturnType; - static const int NumDimensions = DerivedTraits::NumDimensions; - - // Generic nullary operation support. - template <typename CustomNullaryOp> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<CustomNullaryOp, const Derived> - nullaryExpr(const CustomNullaryOp& func) const { - return TensorCwiseNullaryOp<CustomNullaryOp, const Derived>(derived(), func); - } - - // Coefficient-wise nullary operators - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> - constant(const Scalar& value) const { - return nullaryExpr(internal::scalar_constant_op<Scalar>(value)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<internal::UniformRandomGenerator<Scalar>, const Derived> - random() const { - return nullaryExpr(internal::UniformRandomGenerator<Scalar>()); - } - template <typename RandomGenerator> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<RandomGenerator, const Derived> - random(const RandomGenerator& gen = RandomGenerator()) const { - return nullaryExpr(gen); - } - - // Tensor generation - template <typename Generator> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorGeneratorOp<Generator, const Derived> - generate(const Generator& generator) const { - return TensorGeneratorOp<Generator, const Derived>(derived(), generator); - } - - // Generic unary operation support. - template <typename CustomUnaryOp> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<CustomUnaryOp, const Derived> - unaryExpr(const CustomUnaryOp& func) const { - return TensorCwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func); - } - - // Coefficient-wise unary operators - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> - operator-() const { - return unaryExpr(internal::scalar_opposite_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> - sqrt() const { - return unaryExpr(internal::scalar_sqrt_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> - sign() const { - return unaryExpr(internal::scalar_sign_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_rsqrt_op<Scalar>, const Derived> - rsqrt() const { - return unaryExpr(internal::scalar_rsqrt_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> - square() const { - return unaryExpr(internal::scalar_square_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> - cube() const { - return unaryExpr(internal::scalar_cube_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> - inverse() const { - return unaryExpr(internal::scalar_inverse_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> - tanh() const { - return unaryExpr(internal::scalar_tanh_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> - lgamma() const { - return unaryExpr(internal::scalar_lgamma_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> - digamma() const { - return unaryExpr(internal::scalar_digamma_op<Scalar>()); - } - - // igamma(a = this, x = other) - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived> - igamma(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_igamma_op<Scalar>()); - } - - // igammac(a = this, x = other) - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_igammac_op<Scalar>, const Derived, const OtherDerived> - igammac(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_igammac_op<Scalar>()); - } - - // zeta(x = this, q = other) - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const OtherDerived> - zeta(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_zeta_op<Scalar>()); - } - - // polygamma(n = this, x = other) - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const Derived, const OtherDerived> - polygamma(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_polygamma_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> - erf() const { - return unaryExpr(internal::scalar_erf_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> - erfc() const { - return unaryExpr(internal::scalar_erfc_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived> - sigmoid() const { - return unaryExpr(internal::scalar_sigmoid_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> - exp() const { - return unaryExpr(internal::scalar_exp_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> - log() const { - return unaryExpr(internal::scalar_log_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived> - log1p() const { - return unaryExpr(internal::scalar_log1p_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> - abs() const { - return unaryExpr(internal::scalar_abs_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived> - conjugate() const { - return unaryExpr(internal::scalar_conjugate_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >, const Derived> - pow(Scalar exponent) const { - return unaryExpr(internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >(exponent)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived> - real() const { - return unaryExpr(internal::scalar_real_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> - imag() const { - return unaryExpr(internal::scalar_imag_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_sum_op<Scalar,Scalar> >, const Derived> - operator+ (Scalar rhs) const { - return unaryExpr(internal::bind2nd_op<internal::scalar_sum_op<Scalar,Scalar> >(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_sum_op<Scalar> >, const Derived> - operator+ (Scalar lhs, const Derived& rhs) { - return rhs.unaryExpr(internal::bind1st_op<internal::scalar_sum_op<Scalar> >(lhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_difference_op<Scalar,Scalar> >, const Derived> - operator- (Scalar rhs) const { - EIGEN_STATIC_ASSERT((NumTraits<Scalar>::IsSigned || internal::is_same<Scalar, const std::complex<float> >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return unaryExpr(internal::bind2nd_op<internal::scalar_difference_op<Scalar,Scalar> >(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_difference_op<Scalar> >, const Derived> - operator- (Scalar lhs, const Derived& rhs) { - return rhs.unaryExpr(internal::bind1st_op<internal::scalar_difference_op<Scalar> >(lhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_product_op<Scalar,Scalar> >, const Derived> - operator* (Scalar rhs) const { - return unaryExpr(internal::bind2nd_op<internal::scalar_product_op<Scalar,Scalar> >(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_product_op<Scalar> >, const Derived> - operator* (Scalar lhs, const Derived& rhs) { - return rhs.unaryExpr(internal::bind1st_op<internal::scalar_product_op<Scalar> >(lhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_quotient_op<Scalar,Scalar> >, const Derived> - operator/ (Scalar rhs) const { - return unaryExpr(internal::bind2nd_op<internal::scalar_quotient_op<Scalar,Scalar> >(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_quotient_op<Scalar> >, const Derived> - operator/ (Scalar lhs, const Derived& rhs) { - return rhs.unaryExpr(internal::bind1st_op<internal::scalar_quotient_op<Scalar> >(lhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_mod_op<Scalar>, const Derived> - operator% (Scalar rhs) const { - EIGEN_STATIC_ASSERT(NumTraits<Scalar>::IsInteger, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD); - return unaryExpr(internal::scalar_mod_op<Scalar>(rhs)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - cwiseMax(Scalar threshold) const { - return cwiseMax(constant(threshold)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - cwiseMin(Scalar threshold) const { - return cwiseMin(constant(threshold)); - } - - template <typename NewType> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorConversionOp<NewType, const Derived> - cast() const { - return TensorConversionOp<NewType, const Derived>(derived()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_round_op<Scalar>, const Derived> - round() const { - return unaryExpr(internal::scalar_round_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_ceil_op<Scalar>, const Derived> - ceil() const { - return unaryExpr(internal::scalar_ceil_op<Scalar>()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_floor_op<Scalar>, const Derived> - floor() const { - return unaryExpr(internal::scalar_floor_op<Scalar>()); - } - - // Generic binary operation support. - template <typename CustomBinaryOp, typename OtherDerived> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived> - binaryExpr(const OtherDerived& other, const CustomBinaryOp& func) const { - return TensorCwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other, func); - } - - // Coefficient-wise binary operators. - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const Derived, const OtherDerived> - operator+(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_sum_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const Derived, const OtherDerived> - operator-(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_difference_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_product_op<Scalar>, const Derived, const OtherDerived> - operator*(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_product_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived> - operator/(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_quotient_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const OtherDerived> - cwiseMax(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_max_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const OtherDerived> - cwiseMin(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_min_op<Scalar>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived> - operator&&(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_boolean_and_op()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived> - operator||(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_boolean_or_op()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived> - operator^(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_boolean_xor_op()); - } - - // Comparisons and tests. - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const OtherDerived> - operator<(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>()); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const OtherDerived> - operator<=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>()); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const OtherDerived> - operator>(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>()); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const OtherDerived> - operator>=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const OtherDerived> - operator==(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>()); - } - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived> - operator!=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>()); - } - - // comparisons and tests for Scalars - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator<(Scalar threshold) const { - return operator<(constant(threshold)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator<=(Scalar threshold) const { - return operator<=(constant(threshold)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator>(Scalar threshold) const { - return operator>(constant(threshold)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator>=(Scalar threshold) const { - return operator>=(constant(threshold)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator==(Scalar threshold) const { - return operator==(constant(threshold)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> > - operator!=(Scalar threshold) const { - return operator!=(constant(threshold)); - } - - // Checks - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isnan_op<Scalar>, const Derived> - (isnan)() const { - return unaryExpr(internal::scalar_isnan_op<Scalar>()); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived> - (isinf)() const { - return unaryExpr(internal::scalar_isinf_op<Scalar>()); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isfinite_op<Scalar>, const Derived> - (isfinite)() const { - return unaryExpr(internal::scalar_isfinite_op<Scalar>()); - } - - // Coefficient-wise ternary operators. - template<typename ThenDerived, typename ElseDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorSelectOp<const Derived, const ThenDerived, const ElseDerived> - select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) const { - return TensorSelectOp<const Derived, const ThenDerived, const ElseDerived>(derived(), thenTensor.derived(), elseTensor.derived()); - } - - // Contractions. - typedef Eigen::IndexPair<Index> DimensionPair; - - template<typename OtherDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorContractionOp<const Dimensions, const Derived, const OtherDerived> - contract(const OtherDerived& other, const Dimensions& dims) const { - return TensorContractionOp<const Dimensions, const Derived, const OtherDerived>(derived(), other.derived(), dims); - } - - // Convolutions. - template<typename KernelDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorConvolutionOp<const Dimensions, const Derived, const KernelDerived> - convolve(const KernelDerived& kernel, const Dimensions& dims) const { - return TensorConvolutionOp<const Dimensions, const Derived, const KernelDerived>(derived(), kernel.derived(), dims); - } - - // Fourier transforms - template <int FFTDataType, int FFTDirection, typename FFT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection> - fft(const FFT& fft) const { - return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), fft); - } - - // Scan. - typedef TensorScanOp<internal::SumReducer<CoeffReturnType>, const Derived> TensorScanSumOp; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorScanSumOp - cumsum(const Index& axis, bool exclusive = false) const { - return TensorScanSumOp(derived(), axis, exclusive); - } - - typedef TensorScanOp<internal::ProdReducer<CoeffReturnType>, const Derived> TensorScanProdOp; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorScanProdOp - cumprod(const Index& axis, bool exclusive = false) const { - return TensorScanProdOp(derived(), axis, exclusive); - } - - template <typename Reducer> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorScanOp<Reducer, const Derived> - scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const { - return TensorScanOp<Reducer, const Derived>(derived(), axis, exclusive, reducer); - } - - // Reductions. - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived> - sum(const Dims& dims) const { - return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::SumReducer<CoeffReturnType>()); - } - - const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived> - sum() const { - DimensionList<Index, NumDimensions> in_dims; - return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::SumReducer<CoeffReturnType>()); - } - - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived> - mean(const Dims& dims) const { - return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MeanReducer<CoeffReturnType>()); - } - - const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived> - mean() const { - DimensionList<Index, NumDimensions> in_dims; - return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MeanReducer<CoeffReturnType>()); - } - - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived> - prod(const Dims& dims) const { - return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::ProdReducer<CoeffReturnType>()); - } - - const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived> - prod() const { - DimensionList<Index, NumDimensions> in_dims; - return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::ProdReducer<CoeffReturnType>()); - } - - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived> - maximum(const Dims& dims) const { - return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MaxReducer<CoeffReturnType>()); - } - - const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived> - maximum() const { - DimensionList<Index, NumDimensions> in_dims; - return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MaxReducer<CoeffReturnType>()); - } - - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived> - minimum(const Dims& dims) const { - return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MinReducer<CoeffReturnType>()); - } - - const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived> - minimum() const { - DimensionList<Index, NumDimensions> in_dims; - return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MinReducer<CoeffReturnType>()); - } - - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::AndReducer, const Dims, const TensorConversionOp<bool, const Derived> > - all(const Dims& dims) const { - return cast<bool>().reduce(dims, internal::AndReducer()); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::AndReducer, const DimensionList<Index, NumDimensions>, const TensorConversionOp<bool, const Derived> > - all() const { - DimensionList<Index, NumDimensions> in_dims; - return cast<bool>().reduce(in_dims, internal::AndReducer()); - } - - template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::OrReducer, const Dims, const TensorConversionOp<bool, const Derived> > - any(const Dims& dims) const { - return cast<bool>().reduce(dims, internal::OrReducer()); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<internal::OrReducer, const DimensionList<Index, NumDimensions>, const TensorConversionOp<bool, const Derived> > - any() const { - DimensionList<Index, NumDimensions> in_dims; - return cast<bool>().reduce(in_dims, internal::OrReducer()); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorTupleReducerOp< - internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, NumDimensions>, const Derived> - argmax() const { - array<Index, NumDimensions> in_dims; - for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; - return TensorTupleReducerOp< - internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, NumDimensions>, - const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorTupleReducerOp< - internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, NumDimensions>, const Derived> - argmin() const { - array<Index, NumDimensions> in_dims; - for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; - return TensorTupleReducerOp< - internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, NumDimensions>, - const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorTupleReducerOp< - internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, 1>, const Derived> - argmax(const int return_dim) const { - array<Index, 1> in_dims; - in_dims[0] = return_dim; - return TensorTupleReducerOp< - internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, 1>, - const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorTupleReducerOp< - internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, 1>, const Derived> - argmin(const int return_dim) const { - array<Index, 1> in_dims; - in_dims[0] = return_dim; - return TensorTupleReducerOp< - internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >, - const array<Index, 1>, - const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims); - } - - template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp<Reducer, const Dims, const Derived> - reduce(const Dims& dims, const Reducer& reducer) const { - return TensorReductionOp<Reducer, const Dims, const Derived>(derived(), dims, reducer); - } - - template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorBroadcastingOp<const Broadcast, const Derived> - broadcast(const Broadcast& broadcast) const { - return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast); - } - - template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorConcatenationOp<Axis, const Derived, const OtherDerived> - concatenate(const OtherDerived& other, Axis axis) const { - return TensorConcatenationOp<Axis, const Derived, const OtherDerived>(derived(), other.derived(), axis); - } - - template <typename PatchDims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorPatchOp<const PatchDims, const Derived> - extract_patches(const PatchDims& patch_dims) const { - return TensorPatchOp<const PatchDims, const Derived>(derived(), patch_dims); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Dynamic, Dynamic, const Derived> - extract_image_patches(const Index patch_rows = 1, const Index patch_cols = 1, - const Index row_stride = 1, const Index col_stride = 1, - const Index in_row_stride = 1, const Index in_col_stride = 1, - const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = Scalar(0)) const { - return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride, - in_row_stride, in_col_stride, 1, 1, padding_type, padding_value); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorImagePatchOp<Dynamic, Dynamic, const Derived> - extract_image_patches(const Index patch_rows, const Index patch_cols, - const Index row_stride, const Index col_stride, - const Index in_row_stride, const Index in_col_stride, - const Index row_inflate_stride, const Index col_inflate_stride, - const Index padding_top, const Index padding_bottom, - const Index padding_left,const Index padding_right, - const Scalar padding_value) const { - return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride, - in_row_stride, in_col_stride, row_inflate_stride, col_inflate_stride, - padding_top, padding_bottom, padding_left, padding_right, padding_value); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived> - extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, - const Index plane_stride = 1, const Index row_stride = 1, const Index col_stride = 1, - const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = Scalar(0)) const { - return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, 1, 1, 1, padding_type, padding_value); - } - - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived> - extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, - const Index plane_stride, const Index row_stride, const Index col_stride, - const Index plane_inflate_stride, const Index row_inflate_stride, const Index col_inflate_stride, - const Index padding_top_z, const Index padding_bottom_z, - const Index padding_top, const Index padding_bottom, - const Index padding_left, const Index padding_right, const Scalar padding_value = Scalar(0)) const { - return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, plane_inflate_stride, row_inflate_stride, col_inflate_stride, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right, padding_value); - } - - // Morphing operators. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorLayoutSwapOp<const Derived> - swap_layout() const { - return TensorLayoutSwapOp<const Derived>(derived()); - } - template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReshapingOp<const NewDimensions, const Derived> - reshape(const NewDimensions& newDimensions) const { - return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions); - } - template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorSlicingOp<const StartIndices, const Sizes, const Derived> - slice(const StartIndices& startIndices, const Sizes& sizes) const { - return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes); - } - template <typename StartIndices, typename StopIndices, typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides, const Derived> - stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const { - return TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides, - const Derived>(derived(), startIndices, stopIndices, strides); - } - template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorChippingOp<DimId, const Derived> - chip(const Index offset) const { - return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorChippingOp<Dynamic, const Derived> - chip(const Index offset, const Index dim) const { - return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim); - } - template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReverseOp<const ReverseDimensions, const Derived> - reverse(const ReverseDimensions& rev) const { - return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev); - } - template <typename PaddingDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorPaddingOp<const PaddingDimensions, const Derived> - pad(const PaddingDimensions& padding) const { - return TensorPaddingOp<const PaddingDimensions, const Derived>(derived(), padding, internal::scalar_cast_op<int, Scalar>()(0)); - } - template <typename PaddingDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorPaddingOp<const PaddingDimensions, const Derived> - pad(const PaddingDimensions& padding, const Scalar padding_value) const { - return TensorPaddingOp<const PaddingDimensions, const Derived>(derived(), padding, padding_value); - } - template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorShufflingOp<const Shuffle, const Derived> - shuffle(const Shuffle& shuffle) const { - return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle); - } - template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorStridingOp<const Strides, const Derived> - stride(const Strides& strides) const { - return TensorStridingOp<const Strides, const Derived>(derived(), strides); - } - template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorInflationOp<const Strides, const Derived> - inflate(const Strides& strides) const { - return TensorInflationOp<const Strides, const Derived>(derived(), strides); - } - - // Returns a tensor containing index/value tuples - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorIndexTupleOp<const Derived> - index_tuples() const { - return TensorIndexTupleOp<const Derived>(derived()); - } - - // Support for custom unary and binary operations - template <typename CustomUnaryFunc> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCustomUnaryOp<const CustomUnaryFunc, const Derived> customOp(const CustomUnaryFunc& op) const { - return TensorCustomUnaryOp<const CustomUnaryFunc, const Derived>(derived(), op); - } - template <typename OtherDerived, typename CustomBinaryFunc> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived> customOp(const OtherDerived& other, const CustomBinaryFunc& op) const { - return TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived>(derived(), other, op); - } - - // Force the evaluation of the expression. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorForcedEvalOp<const Derived> eval() const { - return TensorForcedEvalOp<const Derived>(derived()); - } - - protected: - template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor; - template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize; - template <typename OtherDerived, int AccessLevel> friend class TensorBase; - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); } -}; - -template<typename Derived, int AccessLevel = internal::accessors_level<Derived>::value> -class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> { - public: - typedef internal::traits<Derived> DerivedTraits; - typedef typename DerivedTraits::Scalar Scalar; - typedef typename DerivedTraits::Index Index; - typedef Scalar CoeffReturnType; - static const int NumDimensions = DerivedTraits::NumDimensions; - - template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor; - template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize; - template <typename OtherDerived, int OtherAccessLevel> friend class TensorBase; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& setZero() { - return setConstant(Scalar(0)); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& setConstant(const Scalar& val) { - return derived() = this->constant(val); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& setRandom() { - return derived() = this->random(); - } - template <typename RandomGenerator> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& setRandom() { - return derived() = this->template random<RandomGenerator>(); - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& setValues( - const typename internal::Initializer<Derived, NumDimensions>::InitList& vals) { - TensorEvaluator<Derived, DefaultDevice> eval(derived(), DefaultDevice()); - internal::initialize_tensor<Derived, NumDimensions>(eval, vals); - return derived(); - } -#endif // EIGEN_HAS_VARIADIC_TEMPLATES - - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Derived& operator+=(const OtherDerived& other) { - return derived() = derived() + other.derived(); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Derived& operator-=(const OtherDerived& other) { - return derived() = derived() - other.derived(); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Derived& operator*=(const OtherDerived& other) { - return derived() = derived() * other.derived(); - } - template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Derived& operator/=(const OtherDerived& other) { - return derived() = derived() / other.derived(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorLayoutSwapOp<const Derived> - swap_layout() const { - return TensorLayoutSwapOp<const Derived>(derived()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorLayoutSwapOp<Derived> - swap_layout() { - return TensorLayoutSwapOp<Derived>(derived()); - } - - template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorConcatenationOp<const Axis, const Derived, const OtherDerived> - concatenate(const OtherDerived& other, const Axis& axis) const { - return TensorConcatenationOp<const Axis, const Derived, const OtherDerived>(derived(), other, axis); - } - template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorConcatenationOp<const Axis, Derived, OtherDerived> - concatenate(const OtherDerived& other, const Axis& axis) { - return TensorConcatenationOp<const Axis, Derived, OtherDerived>(derived(), other, axis); - } - - template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReshapingOp<const NewDimensions, const Derived> - reshape(const NewDimensions& newDimensions) const { - return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions); - } - template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorReshapingOp<const NewDimensions, Derived> - reshape(const NewDimensions& newDimensions) { - return TensorReshapingOp<const NewDimensions, Derived>(derived(), newDimensions); - } - - template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorSlicingOp<const StartIndices, const Sizes, const Derived> - slice(const StartIndices& startIndices, const Sizes& sizes) const { - return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes); - } - template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorSlicingOp<const StartIndices, const Sizes, Derived> - slice(const StartIndices& startIndices, const Sizes& sizes) { - return TensorSlicingOp<const StartIndices, const Sizes, Derived>(derived(), startIndices, sizes); - } - - template <typename StartIndices, typename StopIndices, typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides, const Derived> - stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const { - return TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides, - const Derived>(derived(), startIndices, stopIndices, strides); - } - template <typename StartIndices, typename StopIndices, typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides, Derived> - stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) { - return TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides, - Derived>(derived(), startIndices, stopIndices, strides); - } - - template <DenseIndex DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorChippingOp<DimId, const Derived> - chip(const Index offset) const { - return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId); - } - template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorChippingOp<DimId, Derived> - chip(const Index offset) { - return TensorChippingOp<DimId, Derived>(derived(), offset, DimId); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorChippingOp<Dynamic, const Derived> - chip(const Index offset, const Index dim) const { - return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorChippingOp<Dynamic, Derived> - chip(const Index offset, const Index dim) { - return TensorChippingOp<Dynamic, Derived>(derived(), offset, dim); - } - - template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReverseOp<const ReverseDimensions, const Derived> - reverse(const ReverseDimensions& rev) const { - return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev); - } - template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorReverseOp<const ReverseDimensions, Derived> - reverse(const ReverseDimensions& rev) { - return TensorReverseOp<const ReverseDimensions, Derived>(derived(), rev); - } - - template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorShufflingOp<const Shuffle, const Derived> - shuffle(const Shuffle& shuffle) const { - return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle); - } - template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorShufflingOp<const Shuffle, Derived> - shuffle(const Shuffle& shuffle) { - return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle); - } - - template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorStridingOp<const Strides, const Derived> - stride(const Strides& strides) const { - return TensorStridingOp<const Strides, const Derived>(derived(), strides); - } - template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorStridingOp<const Strides, Derived> - stride(const Strides& strides) { - return TensorStridingOp<const Strides, Derived>(derived(), strides); - } - - // Select the device on which to evaluate the expression. - template <typename DeviceType> - TensorDevice<Derived, DeviceType> device(const DeviceType& device) { - return TensorDevice<Derived, DeviceType>(device, derived()); - } - - protected: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Derived& derived() { return *static_cast<Derived*>(this); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); } -}; -#endif // EIGEN_PARSED_BY_DOXYGEN -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_BASE_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h deleted file mode 100644 index 4cfe300..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ /dev/null @@ -1,392 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H -#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H - -namespace Eigen { - -/** \class TensorBroadcasting - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor broadcasting class. - * - * - */ -namespace internal { -template<typename Broadcast, typename XprType> -struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Broadcast, typename XprType> -struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense> -{ - typedef const TensorBroadcastingOp<Broadcast, XprType>& type; -}; - -template<typename Broadcast, typename XprType> -struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorBroadcastingOp<Broadcast, XprType> >::type> -{ - typedef TensorBroadcastingOp<Broadcast, XprType> type; -}; - -template <typename Dims> -struct is_input_scalar { - static const bool value = false; -}; -template <> -struct is_input_scalar<Sizes<> > { - static const bool value = true; -}; -#ifndef EIGEN_EMULATE_CXX11_META_H -template <typename std::size_t... Indices> -struct is_input_scalar<Sizes<Indices...> > { - static const bool value = (Sizes<Indices...>::total_size == 1); -}; -#endif - -} // end namespace internal - - - -template<typename Broadcast, typename XprType> -class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorBroadcastingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorBroadcastingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType& expr, const Broadcast& broadcast) - : m_xpr(expr), m_broadcast(broadcast) {} - - EIGEN_DEVICE_FUNC - const Broadcast& broadcast() const { return m_broadcast; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const Broadcast m_broadcast; -}; - - -// Eval as rvalue -template<typename Broadcast, typename ArgType, typename Device> -struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> -{ - typedef TensorBroadcastingOp<Broadcast, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = true, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_broadcast(op.broadcast()),m_impl(op.expression(), device) - { - // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar - // and store the result in a scalar. Instead one should reshape the scalar into a a N-D - // tensor with N >= 1 of 1 element first and then broadcast. - EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - const InputDimensions& input_dims = m_impl.dimensions(); - const Broadcast& broadcast = op.broadcast(); - for (int i = 0; i < NumDims; ++i) { - eigen_assert(input_dims[i] > 0); - m_dimensions[i] = input_dims[i] * broadcast[i]; - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputStrides[0] = 1; - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - } - } else { - m_inputStrides[NumDims-1] = 1; - m_outputStrides[NumDims-1] = 1; - for (int i = NumDims-2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const - { - if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) { - return m_impl.coeff(0); - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return coeffColMajor(index); - } else { - return coeffRowMajor(index); - } - } - - // TODO: attempt to speed this up. The integer divisions and modulo are slow - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const - { - Index inputIndex = 0; - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq<Broadcast>(i, 1)) { - eigen_assert(idx < m_impl.dimensions()[i]); - inputIndex += idx * m_inputStrides[i]; - } else { - if (internal::index_statically_eq<InputDimensions>(i, 1)) { - eigen_assert(idx % m_impl.dimensions()[i] == 0); - } else { - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; - } - } - index -= idx * m_outputStrides[i]; - } - if (internal::index_statically_eq<Broadcast>(0, 1)) { - eigen_assert(index < m_impl.dimensions()[0]); - inputIndex += index; - } else { - if (internal::index_statically_eq<InputDimensions>(0, 1)) { - eigen_assert(index % m_impl.dimensions()[0] == 0); - } else { - inputIndex += (index % m_impl.dimensions()[0]); - } - } - return m_impl.coeff(inputIndex); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const - { - Index inputIndex = 0; - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq<Broadcast>(i, 1)) { - eigen_assert(idx < m_impl.dimensions()[i]); - inputIndex += idx * m_inputStrides[i]; - } else { - if (internal::index_statically_eq<InputDimensions>(i, 1)) { - eigen_assert(idx % m_impl.dimensions()[i] == 0); - } else { - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; - } - } - index -= idx * m_outputStrides[i]; - } - if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) { - eigen_assert(index < m_impl.dimensions()[NumDims-1]); - inputIndex += index; - } else { - if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) { - eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); - } else { - inputIndex += (index % m_impl.dimensions()[NumDims-1]); - } - } - return m_impl.coeff(inputIndex); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const - { - if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) { - return internal::pset1<PacketReturnType>(m_impl.coeff(0)); - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return packetColMajor<LoadMode>(index); - } else { - return packetRowMajor<LoadMode>(index); - } - } - - // Ignore the LoadMode and always use unaligned loads since we can't guarantee - // the alignment at compile time. - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - const Index originalIndex = index; - - Index inputIndex = 0; - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq<Broadcast>(i, 1)) { - eigen_assert(idx < m_impl.dimensions()[i]); - inputIndex += idx * m_inputStrides[i]; - } else { - if (internal::index_statically_eq<InputDimensions>(i, 1)) { - eigen_assert(idx % m_impl.dimensions()[i] == 0); - } else { - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; - } - } - index -= idx * m_outputStrides[i]; - } - Index innermostLoc; - if (internal::index_statically_eq<Broadcast>(0, 1)) { - eigen_assert(index < m_impl.dimensions()[0]); - innermostLoc = index; - } else { - if (internal::index_statically_eq<InputDimensions>(0, 1)) { - eigen_assert(index % m_impl.dimensions()[0] == 0); - innermostLoc = 0; - } else { - innermostLoc = index % m_impl.dimensions()[0]; - } - } - inputIndex += innermostLoc; - - // Todo: this could be extended to the second dimension if we're not - // broadcasting alongside the first dimension, and so on. - if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) { - return m_impl.template packet<Unaligned>(inputIndex); - } else { - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - values[0] = m_impl.coeff(inputIndex); - for (int i = 1; i < PacketSize; ++i) { - values[i] = coeffColMajor(originalIndex+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - const Index originalIndex = index; - - Index inputIndex = 0; - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i]; - if (internal::index_statically_eq<Broadcast>(i, 1)) { - eigen_assert(idx < m_impl.dimensions()[i]); - inputIndex += idx * m_inputStrides[i]; - } else { - if (internal::index_statically_eq<InputDimensions>(i, 1)) { - eigen_assert(idx % m_impl.dimensions()[i] == 0); - } else { - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; - } - } - index -= idx * m_outputStrides[i]; - } - Index innermostLoc; - if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) { - eigen_assert(index < m_impl.dimensions()[NumDims-1]); - innermostLoc = index; - } else { - if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) { - eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); - innermostLoc = 0; - } else { - innermostLoc = index % m_impl.dimensions()[NumDims-1]; - } - } - inputIndex += innermostLoc; - - // Todo: this could be extended to the second dimension if we're not - // broadcasting alongside the first dimension, and so on. - if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) { - return m_impl.template packet<Unaligned>(inputIndex); - } else { - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - values[0] = m_impl.coeff(inputIndex); - for (int i = 1; i < PacketSize; ++i) { - values[i] = coeffRowMajor(originalIndex+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - double compute_cost = TensorOpCost::AddCost<Index>(); - if (NumDims > 0) { - for (int i = NumDims - 1; i > 0; --i) { - compute_cost += TensorOpCost::DivCost<Index>(); - if (internal::index_statically_eq<Broadcast>(i, 1)) { - compute_cost += - TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>(); - } else { - if (!internal::index_statically_eq<InputDimensions>(i, 1)) { - compute_cost += TensorOpCost::MulCost<Index>() + - TensorOpCost::ModCost<Index>() + - TensorOpCost::AddCost<Index>(); - } - } - compute_cost += - TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>(); - } - } - return m_impl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - - Broadcast functor() const { return m_broadcast; } - - protected: - const Broadcast m_broadcast; - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h deleted file mode 100644 index 1ba7ef1..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ /dev/null @@ -1,384 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H -#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H - -namespace Eigen { - -/** \class TensorKChippingReshaping - * \ingroup CXX11_Tensor_Module - * - * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor. - * - * - */ - -namespace internal { -template<DenseIndex DimId, typename XprType> -struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions - 1; - static const int Layout = XprTraits::Layout; -}; - -template<DenseIndex DimId, typename XprType> -struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense> -{ - typedef const TensorChippingOp<DimId, XprType>& type; -}; - -template<DenseIndex DimId, typename XprType> -struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type> -{ - typedef TensorChippingOp<DimId, XprType> type; -}; - -template <DenseIndex DimId> -struct DimensionId -{ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) { - eigen_assert(dim == DimId); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { - return DimId; - } -}; -template <> -struct DimensionId<Dynamic> -{ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) { - eigen_assert(dim >= 0); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { - return actual_dim; - } - private: - const DenseIndex actual_dim; -}; - - -} // end namespace internal - - - -template<DenseIndex DimId, typename XprType> -class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim) - : m_xpr(expr), m_offset(offset), m_dim(dim) { - } - - EIGEN_DEVICE_FUNC - const Index offset() const { return m_offset; } - EIGEN_DEVICE_FUNC - const Index dim() const { return m_dim.actualDim(); } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other) - { - typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const Index m_offset; - const internal::DimensionId<DimId> m_dim; -}; - - -// Eval as rvalue -template<DenseIndex DimId, typename ArgType, typename Device> -struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> -{ - typedef TensorChippingOp<DimId, ArgType> XprType; - static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - static const int NumDims = NumInputDims-1; - typedef typename XprType::Index Index; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - - enum { - // Alignment can't be guaranteed at compile time since it depends on the - // slice offsets. - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device) - { - EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); - eigen_assert(NumInputDims > m_dim.actualDim()); - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - eigen_assert(op.offset() < input_dims[m_dim.actualDim()]); - - int j = 0; - for (int i = 0; i < NumInputDims; ++i) { - if (i != m_dim.actualDim()) { - m_dimensions[j] = input_dims[i]; - ++j; - } - } - - m_stride = 1; - m_inputStride = 1; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < m_dim.actualDim(); ++i) { - m_stride *= input_dims[i]; - m_inputStride *= input_dims[i]; - } - } else { - for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) { - m_stride *= input_dims[i]; - m_inputStride *= input_dims[i]; - } - } - m_inputStride *= input_dims[m_dim.actualDim()]; - m_inputOffset = m_stride * op.offset(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(srcCoeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { - // m_stride is equal to 1, so let's avoid the integer division. - eigen_assert(m_stride == 1); - Index inputIndex = index * m_inputStride + m_inputOffset; - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - for (int i = 0; i < PacketSize; ++i) { - values[i] = m_impl.coeff(inputIndex); - inputIndex += m_inputStride; - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) { - // m_stride is aways greater than index, so let's avoid the integer division. - eigen_assert(m_stride > index); - return m_impl.template packet<LoadMode>(index + m_inputOffset); - } else { - const Index idx = index / m_stride; - const Index rem = index - idx * m_stride; - if (rem + PacketSize <= m_stride) { - Index inputIndex = idx * m_inputStride + m_inputOffset + rem; - return m_impl.template packet<LoadMode>(inputIndex); - } else { - // Cross the stride boundary. Fallback to slow path. - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - for (int i = 0; i < PacketSize; ++i) { - values[i] = coeff(index); - ++index; - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - double cost = 0; - if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && - m_dim.actualDim() == 0) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && - m_dim.actualDim() == NumInputDims - 1)) { - cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>(); - } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && - m_dim.actualDim() == NumInputDims - 1) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && - m_dim.actualDim() == 0)) { - cost += TensorOpCost::AddCost<Index>(); - } else { - cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() + - 3 * TensorOpCost::AddCost<Index>(); - } - - return m_impl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, cost, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { - CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data()); - if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) && - result) { - return result + m_inputOffset; - } else { - return NULL; - } - } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const - { - Index inputIndex; - if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { - // m_stride is equal to 1, so let's avoid the integer division. - eigen_assert(m_stride == 1); - inputIndex = index * m_inputStride + m_inputOffset; - } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims-1) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) { - // m_stride is aways greater than index, so let's avoid the integer division. - eigen_assert(m_stride > index); - inputIndex = index + m_inputOffset; - } else { - const Index idx = index / m_stride; - inputIndex = idx * m_inputStride + m_inputOffset; - index -= idx * m_stride; - inputIndex += index; - } - return inputIndex; - } - - Dimensions m_dimensions; - Index m_stride; - Index m_inputOffset; - Index m_inputStride; - TensorEvaluator<ArgType, Device> m_impl; - const internal::DimensionId<DimId> m_dim; - const Device& m_device; -}; - - -// Eval as lvalue -template<DenseIndex DimId, typename ArgType, typename Device> -struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device> - : public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> -{ - typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base; - typedef TensorChippingOp<DimId, ArgType> XprType; - static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - static const int NumDims = NumInputDims-1; - typedef typename XprType::Index Index; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - return this->m_impl.coeffRef(this->srcCoeff(index)); - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - - if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == 0) || - (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) { - // m_stride is equal to 1, so let's avoid the integer division. - eigen_assert(this->m_stride == 1); - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - Index inputIndex = index * this->m_inputStride + this->m_inputOffset; - for (int i = 0; i < PacketSize; ++i) { - this->m_impl.coeffRef(inputIndex) = values[i]; - inputIndex += this->m_inputStride; - } - } else if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) || - (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == 0)) { - // m_stride is aways greater than index, so let's avoid the integer division. - eigen_assert(this->m_stride > index); - this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x); - } else { - const Index idx = index / this->m_stride; - const Index rem = index - idx * this->m_stride; - if (rem + PacketSize <= this->m_stride) { - const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem; - this->m_impl.template writePacket<StoreMode>(inputIndex, x); - } else { - // Cross stride boundary. Fallback to slow path. - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - for (int i = 0; i < PacketSize; ++i) { - this->coeffRef(index) = values[i]; - ++index; - } - } - } - } -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h deleted file mode 100644 index 59bf90d..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ /dev/null @@ -1,361 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H - -namespace Eigen { - -/** \class TensorConcatenationOp - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor concatenation class. - * - * - */ -namespace internal { -template<typename Axis, typename LhsXprType, typename RhsXprType> -struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename promote_storage_type<typename LhsXprType::Scalar, - typename RhsXprType::Scalar>::ret Scalar; - typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, - typename traits<RhsXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<LhsXprType>::Index, - typename traits<RhsXprType>::Index>::type Index; - typedef typename LhsXprType::Nested LhsNested; - typedef typename RhsXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - static const int NumDimensions = traits<LhsXprType>::NumDimensions; - static const int Layout = traits<LhsXprType>::Layout; - enum { Flags = 0 }; -}; - -template<typename Axis, typename LhsXprType, typename RhsXprType> -struct eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, Eigen::Dense> -{ - typedef const TensorConcatenationOp<Axis, LhsXprType, RhsXprType>& type; -}; - -template<typename Axis, typename LhsXprType, typename RhsXprType> -struct nested<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, 1, typename eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >::type> -{ - typedef TensorConcatenationOp<Axis, LhsXprType, RhsXprType> type; -}; - -} // end namespace internal - - -template<typename Axis, typename LhsXprType, typename RhsXprType> -class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, WriteAccessors> -{ - public: - typedef typename internal::traits<TensorConcatenationOp>::Scalar Scalar; - typedef typename internal::traits<TensorConcatenationOp>::StorageKind StorageKind; - typedef typename internal::traits<TensorConcatenationOp>::Index Index; - typedef typename internal::nested<TensorConcatenationOp>::type Nested; - typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType, - typename RhsXprType::CoeffReturnType>::ret CoeffReturnType; - typedef typename NumTraits<Scalar>::Real RealScalar; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis) - : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename LhsXprType::Nested>::type& - lhsExpression() const { return m_lhs_xpr; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename RhsXprType::Nested>::type& - rhsExpression() const { return m_rhs_xpr; } - - EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const TensorConcatenationOp& other) - { - typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - protected: - typename LhsXprType::Nested m_lhs_xpr; - typename RhsXprType::Nested m_rhs_xpr; - const Axis m_axis; -}; - - -// Eval as rvalue -template<typename Axis, typename LeftArgType, typename RightArgType, typename Device> -struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> -{ - typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value; - static const int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis()) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((NumDims == RightNumDims), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - - eigen_assert(0 <= m_axis && m_axis < NumDims); - const Dimensions& lhs_dims = m_leftImpl.dimensions(); - const Dimensions& rhs_dims = m_rightImpl.dimensions(); - { - int i = 0; - for (; i < m_axis; ++i) { - eigen_assert(lhs_dims[i] > 0); - eigen_assert(lhs_dims[i] == rhs_dims[i]); - m_dimensions[i] = lhs_dims[i]; - } - eigen_assert(lhs_dims[i] > 0); // Now i == m_axis. - eigen_assert(rhs_dims[i] > 0); - m_dimensions[i] = lhs_dims[i] + rhs_dims[i]; - for (++i; i < NumDims; ++i) { - eigen_assert(lhs_dims[i] > 0); - eigen_assert(lhs_dims[i] == rhs_dims[i]); - m_dimensions[i] = lhs_dims[i]; - } - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_leftStrides[0] = 1; - m_rightStrides[0] = 1; - m_outputStrides[0] = 1; - - for (int j = 1; j < NumDims; ++j) { - m_leftStrides[j] = m_leftStrides[j-1] * lhs_dims[j-1]; - m_rightStrides[j] = m_rightStrides[j-1] * rhs_dims[j-1]; - m_outputStrides[j] = m_outputStrides[j-1] * m_dimensions[j-1]; - } - } else { - m_leftStrides[NumDims - 1] = 1; - m_rightStrides[NumDims - 1] = 1; - m_outputStrides[NumDims - 1] = 1; - - for (int j = NumDims - 2; j >= 0; --j) { - m_leftStrides[j] = m_leftStrides[j+1] * lhs_dims[j+1]; - m_rightStrides[j] = m_rightStrides[j+1] * rhs_dims[j+1]; - m_outputStrides[j] = m_outputStrides[j+1] * m_dimensions[j+1]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear? - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) - { - m_leftImpl.evalSubExprsIfNeeded(NULL); - m_rightImpl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() - { - m_leftImpl.cleanup(); - m_rightImpl.cleanup(); - } - - // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow. - // See CL/76180724 comments for more ideas. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - // Collect dimension-wise indices (subs). - array<Index, NumDims> subs; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - subs[i] = index / m_outputStrides[i]; - index -= subs[i] * m_outputStrides[i]; - } - subs[0] = index; - } else { - for (int i = 0; i < NumDims - 1; ++i) { - subs[i] = index / m_outputStrides[i]; - index -= subs[i] * m_outputStrides[i]; - } - subs[NumDims - 1] = index; - } - - const Dimensions& left_dims = m_leftImpl.dimensions(); - if (subs[m_axis] < left_dims[m_axis]) { - Index left_index; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - left_index = subs[0]; - for (int i = 1; i < NumDims; ++i) { - left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; - } - } else { - left_index = subs[NumDims - 1]; - for (int i = NumDims - 2; i >= 0; --i) { - left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; - } - } - return m_leftImpl.coeff(left_index); - } else { - subs[m_axis] -= left_dims[m_axis]; - const Dimensions& right_dims = m_rightImpl.dimensions(); - Index right_index; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - right_index = subs[0]; - for (int i = 1; i < NumDims; ++i) { - right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; - } - } else { - right_index = subs[NumDims - 1]; - for (int i = NumDims - 2; i >= 0; --i) { - right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; - } - } - return m_rightImpl.coeff(right_index); - } - } - - // TODO(phli): Add a real vectorization. - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); - - EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + - 2 * TensorOpCost::MulCost<Index>() + - TensorOpCost::DivCost<Index>() + - TensorOpCost::ModCost<Index>()); - const double lhs_size = m_leftImpl.dimensions().TotalSize(); - const double rhs_size = m_rightImpl.dimensions().TotalSize(); - return (lhs_size / (lhs_size + rhs_size)) * - m_leftImpl.costPerCoeff(vectorized) + - (rhs_size / (lhs_size + rhs_size)) * - m_rightImpl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, compute_cost); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_leftStrides; - array<Index, NumDims> m_rightStrides; - TensorEvaluator<LeftArgType, Device> m_leftImpl; - TensorEvaluator<RightArgType, Device> m_rightImpl; - const Axis m_axis; -}; - -// Eval as lvalue -template<typename Axis, typename LeftArgType, typename RightArgType, typename Device> - struct TensorEvaluator<TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> - : public TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> -{ - typedef TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> Base; - typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType; - typedef typename Base::Dimensions Dimensions; - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device) - : Base(op, device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(Layout) == static_cast<int>(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE); - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - // Collect dimension-wise indices (subs). - array<Index, Base::NumDims> subs; - for (int i = Base::NumDims - 1; i > 0; --i) { - subs[i] = index / this->m_outputStrides[i]; - index -= subs[i] * this->m_outputStrides[i]; - } - subs[0] = index; - - const Dimensions& left_dims = this->m_leftImpl.dimensions(); - if (subs[this->m_axis] < left_dims[this->m_axis]) { - Index left_index = subs[0]; - for (int i = 1; i < Base::NumDims; ++i) { - left_index += (subs[i] % left_dims[i]) * this->m_leftStrides[i]; - } - return this->m_leftImpl.coeffRef(left_index); - } else { - subs[this->m_axis] -= left_dims[this->m_axis]; - const Dimensions& right_dims = this->m_rightImpl.dimensions(); - Index right_index = subs[0]; - for (int i = 1; i < Base::NumDims; ++i) { - right_index += (subs[i] % right_dims[i]) * this->m_rightStrides[i]; - } - return this->m_rightImpl.coeffRef(right_index); - } - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize()); - - EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - for (int i = 0; i < packetSize; ++i) { - coeffRef(index+i) = values[i]; - } - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h deleted file mode 100644 index 20b29e5..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ /dev/null @@ -1,628 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H - -namespace Eigen { - -/** \class TensorContraction - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor contraction class. - * - * - */ -namespace internal { - -template<typename Dimensions, typename LhsXprType, typename RhsXprType> -struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename gebp_traits<typename remove_const<typename LhsXprType::Scalar>::type, - typename remove_const<typename RhsXprType::Scalar>::type>::ResScalar Scalar; - - typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, - typename traits<RhsXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<LhsXprType>::Index, - typename traits<RhsXprType>::Index>::type Index; - typedef typename LhsXprType::Nested LhsNested; - typedef typename RhsXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - - // From NumDims below. - static const int NumDimensions = traits<RhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value; - static const int Layout = traits<LhsXprType>::Layout; - - enum { - Flags = 0 - }; -}; - -template<typename Dimensions, typename LhsXprType, typename RhsXprType> -struct eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType>, Eigen::Dense> -{ - typedef const TensorContractionOp<Dimensions, LhsXprType, RhsXprType>& type; -}; - -template<typename Dimensions, typename LhsXprType, typename RhsXprType> -struct nested<TensorContractionOp<Dimensions, LhsXprType, RhsXprType>, 1, typename eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >::type> -{ - typedef TensorContractionOp<Dimensions, LhsXprType, RhsXprType> type; -}; - -template<typename Indices_, typename LeftArgType_, typename RightArgType_, typename Device_> -struct traits<TensorEvaluator<const TensorContractionOp<Indices_, LeftArgType_, RightArgType_>, Device_> > { - typedef Indices_ Indices; - typedef LeftArgType_ LeftArgType; - typedef RightArgType_ RightArgType; - typedef Device_ Device; - - // From NumDims below. - static const int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value; -}; - -} // end namespace internal - -template<typename Indices, typename LhsXprType, typename RhsXprType> -class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXprType, RhsXprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorContractionOp>::Scalar Scalar; - typedef typename internal::gebp_traits<typename LhsXprType::CoeffReturnType, - typename RhsXprType::CoeffReturnType>::ResScalar CoeffReturnType; - typedef typename Eigen::internal::nested<TensorContractionOp>::type Nested; - typedef typename Eigen::internal::traits<TensorContractionOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorContractionOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionOp( - const LhsXprType& lhs, const RhsXprType& rhs, const Indices& dims) - : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_indices(dims) {} - - EIGEN_DEVICE_FUNC - const Indices& indices() const { return m_indices; } - - /** \returns the nested expressions */ - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename LhsXprType::Nested>::type& - lhsExpression() const { return m_lhs_xpr; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename RhsXprType::Nested>::type& - rhsExpression() const { return m_rhs_xpr; } - - protected: - typename LhsXprType::Nested m_lhs_xpr; - typename RhsXprType::Nested m_rhs_xpr; - const Indices m_indices; -}; - - -template<typename Derived> -struct TensorContractionEvaluatorBase -{ - typedef typename internal::traits<Derived>::Indices Indices; - typedef typename internal::traits<Derived>::LeftArgType LeftArgType; - typedef typename internal::traits<Derived>::RightArgType RightArgType; - typedef typename internal::traits<Derived>::Device Device; - - typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - enum { - IsAligned = true, - PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = true - }; - - // Most of the code is assuming that both input tensors are ColMajor. If the - // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: - // If we want to compute A * B = C, where A is LHS and B is RHS, the code - // will pretend B is LHS and A is RHS. - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; - - static const int LDims = - internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; - static const int RDims = - internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; - static const int ContractDims = internal::array_size<Indices>::value; - static const int NumDims = LDims + RDims - 2 * ContractDims; - - typedef array<Index, ContractDims> contract_t; - typedef array<Index, LDims - ContractDims> left_nocontract_t; - typedef array<Index, RDims - ContractDims> right_nocontract_t; - - typedef DSizes<Index, NumDims> Dimensions; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorContractionEvaluatorBase(const XprType& op, const Device& device) - : m_leftImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(), - op.lhsExpression(), op.rhsExpression()), device), - m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(), - op.rhsExpression(), op.lhsExpression()), device), - m_device(device), - m_result(NULL) { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == - static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)), - YOU_MADE_A_PROGRAMMING_MISTAKE); - - - DSizes<Index, LDims> eval_left_dims; - DSizes<Index, RDims> eval_right_dims; - array<IndexPair<Index>, ContractDims> eval_op_indices; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - // For ColMajor, we keep using the existing dimensions - for (int i = 0; i < LDims; i++) { - eval_left_dims[i] = m_leftImpl.dimensions()[i]; - } - for (int i = 0; i < RDims; i++) { - eval_right_dims[i] = m_rightImpl.dimensions()[i]; - } - // We keep the pairs of contracting indices. - for (int i = 0; i < ContractDims; i++) { - eval_op_indices[i].first = op.indices()[i].first; - eval_op_indices[i].second = op.indices()[i].second; - } - } else { - // For RowMajor, we need to reverse the existing dimensions - for (int i = 0; i < LDims; i++) { - eval_left_dims[i] = m_leftImpl.dimensions()[LDims - i - 1]; - } - for (int i = 0; i < RDims; i++) { - eval_right_dims[i] = m_rightImpl.dimensions()[RDims - i - 1]; - } - // We need to flip all the pairs of contracting indices as well as - // reversing the dimensions. - for (int i = 0; i < ContractDims; i++) { - eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second; - eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first; - } - } - - // Check for duplicate axes and make sure the first index in eval_op_indices - // is increasing. Using O(n^2) sorting is OK since ContractDims is small - for (int i = 0; i < ContractDims; i++) { - for (int j = i + 1; j < ContractDims; j++) { - eigen_assert(eval_op_indices[j].first != eval_op_indices[i].first && - eval_op_indices[j].second != eval_op_indices[i].second && - "contraction axes should be unique"); - if (eval_op_indices[j].first < eval_op_indices[i].first) { - numext::swap(eval_op_indices[j], eval_op_indices[i]); - } - } - } - - array<Index, LDims> lhs_strides; - lhs_strides[0] = 1; - for (int i = 0; i < LDims-1; ++i) { - lhs_strides[i+1] = lhs_strides[i] * eval_left_dims[i]; - } - - array<Index, RDims> rhs_strides; - rhs_strides[0] = 1; - for (int i = 0; i < RDims-1; ++i) { - rhs_strides[i+1] = rhs_strides[i] * eval_right_dims[i]; - } - - if (m_i_strides.size() > 0) m_i_strides[0] = 1; - if (m_j_strides.size() > 0) m_j_strides[0] = 1; - if (m_k_strides.size() > 0) m_k_strides[0] = 1; - - m_i_size = 1; - m_j_size = 1; - m_k_size = 1; - - // To compute the dimension, we simply concatenate the non-contracting - // dimensions of the left and then the right tensor. Additionally, we also - // compute the strides corresponding to the left non-contracting - // dimensions and right non-contracting dimensions. - m_lhs_inner_dim_contiguous = true; - int dim_idx = 0; - unsigned int nocontract_idx = 0; - - for (int i = 0; i < LDims; i++) { - // find if we are contracting on index i of left tensor - bool contracting = false; - for (int j = 0; j < ContractDims; j++) { - if (eval_op_indices[j].first == i) { - contracting = true; - break; - } - } - if (!contracting) { - // add dimension size to output dimensions - m_dimensions[dim_idx] = eval_left_dims[i]; - m_left_nocontract_strides[nocontract_idx] = lhs_strides[i]; - if (dim_idx != i) { - m_lhs_inner_dim_contiguous = false; - } - if (nocontract_idx+1 < internal::array_size<left_nocontract_t>::value) { - m_i_strides[nocontract_idx+1] = - m_i_strides[nocontract_idx] * eval_left_dims[i]; - } else { - m_i_size = m_i_strides[nocontract_idx] * eval_left_dims[i]; - } - dim_idx++; - nocontract_idx++; - } - } - - nocontract_idx = 0; - for (int i = 0; i < RDims; i++) { - bool contracting = false; - // find if we are contracting on index i of right tensor - for (int j = 0; j < ContractDims; j++) { - if (eval_op_indices[j].second == i) { - contracting = true; - break; - } - } - if (!contracting) { - m_dimensions[dim_idx] = eval_right_dims[i]; - if (nocontract_idx+1 < internal::array_size<right_nocontract_t>::value) { - m_j_strides[nocontract_idx+1] = - m_j_strides[nocontract_idx] * eval_right_dims[i]; - } else { - m_j_size = m_j_strides[nocontract_idx] * eval_right_dims[i]; - } - m_right_nocontract_strides[nocontract_idx] = rhs_strides[i]; - dim_idx++; - nocontract_idx++; - } - } - - // Now compute the strides corresponding to the contracting dimensions. We - // assumed above that non-contracting axes are represented in the same order - // in the matrix as they are in the tensor. This is not the case for - // contracting axes. As the contracting axes must be of the same size in - // each tensor, we'll only look at the first tensor here. - m_rhs_inner_dim_contiguous = true; - m_rhs_inner_dim_reordered = false; - for (int i = 0; i < ContractDims; i++) { - Index left = eval_op_indices[i].first; - Index right = eval_op_indices[i].second; - - Index size = eval_left_dims[left]; - eigen_assert(size == eval_right_dims[right] && - "Contraction axes must be same size"); - - if (i+1 < static_cast<int>(internal::array_size<contract_t>::value)) { - m_k_strides[i+1] = m_k_strides[i] * size; - } else { - m_k_size = m_k_strides[i] * size; - } - m_left_contracting_strides[i] = lhs_strides[left]; - m_right_contracting_strides[i] = rhs_strides[right]; - - if (i > 0 && right < eval_op_indices[i-1].second) { - m_rhs_inner_dim_reordered = true; - } - if (right != i) { - m_rhs_inner_dim_contiguous = false; - } - } - - // If the layout is RowMajor, we need to reverse the m_dimensions - if (static_cast<int>(Layout) == static_cast<int>(RowMajor)) { - for (int i = 0, j = NumDims - 1; i < j; i++, j--) { - numext::swap(m_dimensions[i], m_dimensions[j]); - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { - m_leftImpl.evalSubExprsIfNeeded(NULL); - m_rightImpl.evalSubExprsIfNeeded(NULL); - if (data) { - evalTo(data); - return false; - } else { - m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); - evalTo(m_result); - return true; - } - } - - EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const { - if (this->m_lhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - static_cast<const Derived*>(this)->template evalProduct<true, true, true, Unaligned>(buffer); - } - else { - static_cast<const Derived*>(this)->template evalProduct<true, true, false, Unaligned>(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - static_cast<const Derived*>(this)->template evalProduct<true, false, true, Unaligned>(buffer); - } - else { - static_cast<const Derived*>(this)->template evalProduct<true, false, false, Unaligned>(buffer); - } - } - } - else { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - static_cast<const Derived*>(this)->template evalProduct<false, true, true, Unaligned>(buffer); - } - else { - static_cast<const Derived*>(this)->template evalProduct<false, true, false, Unaligned>(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - static_cast<const Derived*>(this)->template evalProduct<false, false, true, Unaligned>(buffer); - } - else { - static_cast<const Derived*>(this)->template evalProduct<false, false, false, Unaligned>(buffer); - } - } - } - } - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - EIGEN_DEVICE_FUNC void evalGemv(Scalar* buffer) const { - const Index rows = m_i_size; - const Index cols = m_k_size; - - typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar; - typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; - typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; - typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; - const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size; - const Index rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size; - const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned; - const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned; - typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, - LeftEvaluator, left_nocontract_t, - contract_t, lhs_packet_size, - lhs_inner_dim_contiguous, - false, lhs_alignment> LhsMapper; - - typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, - RightEvaluator, right_nocontract_t, - contract_t, rhs_packet_size, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, rhs_alignment> RhsMapper; - - LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides, - m_left_contracting_strides, m_k_strides); - RhsMapper rhs(m_rightImpl, m_right_nocontract_strides, m_j_strides, - m_right_contracting_strides, m_k_strides); - - const Scalar alpha(1); - const Index resIncr(1); - - // zero out the result buffer (which must be of size at least rows * sizeof(Scalar) - m_device.memset(buffer, 0, rows * sizeof(Scalar)); - - internal::general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,false,RhsScalar,RhsMapper,false>::run( - rows, cols, lhs, rhs, - buffer, resIncr, alpha); - } - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - EIGEN_DEVICE_FUNC void evalGemm(Scalar* buffer) const { - // columns in left side, rows in right side - const Index k = this->m_k_size; - - // rows in left side - const Index m = this->m_i_size; - - // columns in right side - const Index n = this->m_j_size; - - // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) - this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); - - // define mr, nr, and all of my data mapper types - typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar; - typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; - typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits; - - const Index nr = Traits::nr; - const Index mr = Traits::mr; - - typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; - typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; - - const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size; - const Index rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size; - - typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, - LeftEvaluator, left_nocontract_t, - contract_t, lhs_packet_size, - lhs_inner_dim_contiguous, - false, Unaligned> LhsMapper; - - typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, - RightEvaluator, right_nocontract_t, - contract_t, rhs_packet_size, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Unaligned> RhsMapper; - - typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper; - - // Declare GEBP packing and kernel structs - internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, mr, Traits::LhsProgress, ColMajor> pack_lhs; - internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, nr, ColMajor> pack_rhs; - - internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper, mr, nr, false, false> gebp; - - // initialize data mappers - LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, - this->m_left_contracting_strides, this->m_k_strides); - - RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, - this->m_right_contracting_strides, this->m_k_strides); - - OutputMapper output(buffer, m); - - // Sizes of the blocks to load in cache. See the Goto paper for details. - internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, 1); - const Index kc = blocking.kc(); - const Index mc = numext::mini(m, blocking.mc()); - const Index nc = numext::mini(n, blocking.nc()); - const Index sizeA = mc * kc; - const Index sizeB = kc * nc; - - LhsScalar* blockA = static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar))); - RhsScalar* blockB = static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar))); - - for(Index i2=0; i2<m; i2+=mc) - { - const Index actual_mc = numext::mini(i2+mc,m)-i2; - for (Index k2 = 0; k2 < k; k2 += kc) { - // make sure we don't overshoot right edge of left matrix, then pack vertical panel - const Index actual_kc = numext::mini(k2 + kc, k) - k2; - pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc, 0, 0); - - // series of horizontal blocks - for (Index j2 = 0; j2 < n; j2 += nc) { - // make sure we don't overshoot right edge of right matrix, then pack block - const Index actual_nc = numext::mini(j2 + nc, n) - j2; - pack_rhs(blockB, rhs.getSubMapper(k2, j2), actual_kc, actual_nc, 0, 0); - - // call gebp (matrix kernel) - // The parameters here are copied from Eigen's GEMM implementation - gebp(output.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, Scalar(1), -1, -1, 0, 0); - } - } - } - - this->m_device.deallocate(blockA); - this->m_device.deallocate(blockB); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_leftImpl.cleanup(); - m_rightImpl.cleanup(); - - if (m_result != NULL) { - m_device.deallocate(m_result); - m_result = NULL; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_result[index]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { - return TensorOpCost(sizeof(CoeffReturnType), 0, 0); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { return m_result; } - - protected: - // Prevent assignment - TensorContractionEvaluatorBase& operator = (const TensorContractionEvaluatorBase&); - Dimensions m_dimensions; - - contract_t m_k_strides; - contract_t m_left_contracting_strides; - contract_t m_right_contracting_strides; - - bool m_lhs_inner_dim_contiguous; - bool m_rhs_inner_dim_contiguous; - bool m_rhs_inner_dim_reordered; - - left_nocontract_t m_i_strides; - right_nocontract_t m_j_strides; - left_nocontract_t m_left_nocontract_strides; - right_nocontract_t m_right_nocontract_strides; - - Index m_i_size; - Index m_j_size; - Index m_k_size; - - TensorEvaluator<EvalLeftArgType, Device> m_leftImpl; - TensorEvaluator<EvalRightArgType, Device> m_rightImpl; - const Device& m_device; - Scalar* m_result; -}; - - -// evaluator for default device -template<typename Indices, typename LeftArgType, typename RightArgType, typename Device> -struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> : - public TensorContractionEvaluatorBase< - TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> > { - typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self; - typedef TensorContractionEvaluatorBase<Self> Base; - - typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - enum { - Layout = TensorEvaluator<LeftArgType, Device>::Layout - }; - - // Most of the code is assuming that both input tensors are ColMajor. If the - // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: - // If we want to compute A * B = C, where A is LHS and B is RHS, the code - // will pretend B is LHS and A is RHS. - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; - - static const int LDims = - internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; - static const int RDims = - internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; - static const int ContractDims = internal::array_size<Indices>::value; - - typedef array<Index, ContractDims> contract_t; - typedef array<Index, LDims - ContractDims> left_nocontract_t; - typedef array<Index, RDims - ContractDims> right_nocontract_t; - - static const int NumDims = LDims + RDims - 2 * ContractDims; - - // Could we use NumDimensions here? - typedef DSizes<Index, NumDims> Dimensions; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : - Base(op, device) { } - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const { - if (this->m_j_size == 1) { - this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); - return; - } - - this->template evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h deleted file mode 100644 index 5cf7b4f..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +++ /dev/null @@ -1,56 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H - - -namespace Eigen { -namespace internal { - -enum { - ShardByRow = 0, - ShardByCol = 1 -}; - - -// Default Blocking Strategy -template <typename LhsMapper, typename RhsMapper, typename Index, int ShardingType=ShardByCol> -class TensorContractionBlocking { - public: - - typedef typename LhsMapper::Scalar LhsScalar; - typedef typename RhsMapper::Scalar RhsScalar; - - EIGEN_DEVICE_FUNC TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) : - kc_(k), mc_(m), nc_(n) - { - if (ShardingType == ShardByCol) { - computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, mc_, nc_, num_threads); - } - else { - computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, nc_, mc_, num_threads); - } - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } - - private: - Index kc_; - Index mc_; - Index nc_; -}; - - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h deleted file mode 100644 index d65dbb4..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ /dev/null @@ -1,1391 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014-2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// Copyright (C) 2015 Navdeep Jaitly <ndjaitly@google.com> -// Copyright (C) 2014 Eric Martin <eric@ericmart.in> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H - -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - -namespace Eigen { - -template<typename Scalar, typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper, bool needs_edge_check> -__device__ EIGEN_STRONG_INLINE void -EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, Scalar* lhs_shmem, Scalar* rhs_shmem, - const Index m_size, const Index n_size, const Index k_size) { - - const Index m_block_idx = blockIdx.x; - const Index n_block_idx = blockIdx.y; - - const Index base_m = 64 * m_block_idx; - const Index base_n = 64 * n_block_idx; - - // declare and initialize 64 registers for output 8x8 block - - // prefetch registers - Scalar lhs_pf0; - Scalar lhs_pf1; - Scalar lhs_pf2; - Scalar lhs_pf3; - Scalar lhs_pf4; - Scalar lhs_pf5; - Scalar lhs_pf6; - Scalar lhs_pf7; - - Scalar rhs_pf0; - Scalar rhs_pf1; - Scalar rhs_pf2; - Scalar rhs_pf3; - Scalar rhs_pf4; - Scalar rhs_pf5; - Scalar rhs_pf6; - Scalar rhs_pf7; - - // shared memory is formatted - // (contract idx in block, nocontract idx in block, block idx) - // where block idx is column major. This transposition limits the number of - // bank conflicts when reading the LHS. The core idea is that since the contracting - // index is shared by both sides, then the contracting index should be in threadIdx.x. - - // On the LHS, we pad each row inside of each block with an extra element. This makes - // each block 8 rows of 9 elements, which is 72 elements. This gives no bank conflicts - // on writes and very few 2-way conflicts on reads. There is an 8x8 grid of these blocks. - - // On the RHS we just add 8 padding elements to the end of each block. This gives no bank - // conflicts on writes and also none on reads. - - // storage indices - const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z; - const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x; - - const Index lhs_store_idx_0 = lhs_store_idx_base + 576 * 0; - const Index lhs_store_idx_1 = lhs_store_idx_base + 576 * 1; - const Index lhs_store_idx_2 = lhs_store_idx_base + 576 * 2; - const Index lhs_store_idx_3 = lhs_store_idx_base + 576 * 3; - const Index lhs_store_idx_4 = lhs_store_idx_base + 576 * 4; - const Index lhs_store_idx_5 = lhs_store_idx_base + 576 * 5; - const Index lhs_store_idx_6 = lhs_store_idx_base + 576 * 6; - const Index lhs_store_idx_7 = lhs_store_idx_base + 576 * 7; - - const Index rhs_store_idx_0 = rhs_store_idx_base + 576 * 0; - const Index rhs_store_idx_1 = rhs_store_idx_base + 576 * 1; - const Index rhs_store_idx_2 = rhs_store_idx_base + 576 * 2; - const Index rhs_store_idx_3 = rhs_store_idx_base + 576 * 3; - const Index rhs_store_idx_4 = rhs_store_idx_base + 576 * 4; - const Index rhs_store_idx_5 = rhs_store_idx_base + 576 * 5; - const Index rhs_store_idx_6 = rhs_store_idx_base + 576 * 6; - const Index rhs_store_idx_7 = rhs_store_idx_base + 576 * 7; - - // in the loading code, the following variables are important: - // threadIdx.x: the vertical position in an 8x8 block - // threadIdx.y: the vertical index of the 8x8 block in the grid - // threadIdx.z: the horizontal position in an 8x8 block - // k: the horizontal index of the 8x8 block in the grid - // - // The k parameter is implicit (it was the loop counter for a loop that went - // from 0 to <8, but now that loop is unrolled in the below code. - - const Index load_idx_vert = threadIdx.x + 8 * threadIdx.y; - const Index lhs_vert = base_m + load_idx_vert; - -#define prefetchIntoRegisters(base_k) \ - { \ - lhs_pf0 = conv(0); \ - lhs_pf1 = conv(0); \ - lhs_pf2 = conv(0); \ - lhs_pf3 = conv(0); \ - lhs_pf4 = conv(0); \ - lhs_pf5 = conv(0); \ - lhs_pf6 = conv(0); \ - lhs_pf7 = conv(0); \ - \ - rhs_pf0 = conv(0); \ - rhs_pf1 = conv(0); \ - rhs_pf2 = conv(0); \ - rhs_pf3 = conv(0); \ - rhs_pf4 = conv(0); \ - rhs_pf5 = conv(0); \ - rhs_pf6 = conv(0); \ - rhs_pf7 = conv(0); \ - \ - if (!needs_edge_check || lhs_vert < m_size) { \ - const Index lhs_horiz_0 = base_k + threadIdx.z + 0 * 8; \ - const Index lhs_horiz_1 = base_k + threadIdx.z + 1 * 8; \ - const Index lhs_horiz_2 = base_k + threadIdx.z + 2 * 8; \ - const Index lhs_horiz_3 = base_k + threadIdx.z + 3 * 8; \ - const Index lhs_horiz_4 = base_k + threadIdx.z + 4 * 8; \ - const Index lhs_horiz_5 = base_k + threadIdx.z + 5 * 8; \ - const Index lhs_horiz_6 = base_k + threadIdx.z + 6 * 8; \ - const Index lhs_horiz_7 = base_k + threadIdx.z + 7 * 8; \ - \ - if (!needs_edge_check || lhs_horiz_7 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ - lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ - lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ - lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \ - lhs_pf7 = lhs(lhs_vert, lhs_horiz_7); \ - } else if (lhs_horiz_6 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ - lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ - lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ - lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \ - } else if (lhs_horiz_5 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ - lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ - lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ - } else if (lhs_horiz_4 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ - lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ - } else if (lhs_horiz_3 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ - } else if (lhs_horiz_2 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ - } else if (lhs_horiz_1 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ - } else if (lhs_horiz_0 < k_size) { \ - lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ - } \ - } \ - \ - const Index rhs_vert = base_k + load_idx_vert; \ - if (!needs_edge_check || rhs_vert < k_size) { \ - const Index rhs_horiz_0 = base_n + threadIdx.z + 0 * 8; \ - const Index rhs_horiz_1 = base_n + threadIdx.z + 1 * 8; \ - const Index rhs_horiz_2 = base_n + threadIdx.z + 2 * 8; \ - const Index rhs_horiz_3 = base_n + threadIdx.z + 3 * 8; \ - const Index rhs_horiz_4 = base_n + threadIdx.z + 4 * 8; \ - const Index rhs_horiz_5 = base_n + threadIdx.z + 5 * 8; \ - const Index rhs_horiz_6 = base_n + threadIdx.z + 6 * 8; \ - const Index rhs_horiz_7 = base_n + threadIdx.z + 7 * 8; \ - \ - if (rhs_horiz_7 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ - rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ - rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ - rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \ - rhs_pf7 = rhs(rhs_vert, rhs_horiz_7); \ - } else if (rhs_horiz_6 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ - rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ - rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ - rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \ - } else if (rhs_horiz_5 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ - rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ - rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ - } else if (rhs_horiz_4 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ - rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ - } else if (rhs_horiz_3 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ - } else if (rhs_horiz_2 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ - } else if (rhs_horiz_1 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ - } else if (rhs_horiz_0 < n_size) { \ - rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ - } \ - } \ - } \ - -#define writeRegToShmem(_) \ - lhs_shmem[lhs_store_idx_0] = lhs_pf0; \ - rhs_shmem[rhs_store_idx_0] = rhs_pf0; \ - \ - lhs_shmem[lhs_store_idx_1] = lhs_pf1; \ - rhs_shmem[rhs_store_idx_1] = rhs_pf1; \ - \ - lhs_shmem[lhs_store_idx_2] = lhs_pf2; \ - rhs_shmem[rhs_store_idx_2] = rhs_pf2; \ - \ - lhs_shmem[lhs_store_idx_3] = lhs_pf3; \ - rhs_shmem[rhs_store_idx_3] = rhs_pf3; \ - \ - lhs_shmem[lhs_store_idx_4] = lhs_pf4; \ - rhs_shmem[rhs_store_idx_4] = rhs_pf4; \ - \ - lhs_shmem[lhs_store_idx_5] = lhs_pf5; \ - rhs_shmem[rhs_store_idx_5] = rhs_pf5; \ - \ - lhs_shmem[lhs_store_idx_6] = lhs_pf6; \ - rhs_shmem[rhs_store_idx_6] = rhs_pf6; \ - \ - lhs_shmem[lhs_store_idx_7] = lhs_pf7; \ - rhs_shmem[rhs_store_idx_7] = rhs_pf7; \ - - // declare and initialize result array -#define res(i, j) _res_##i##j -#define initResultRow(i) \ - Scalar res(i, 0) = conv(0); \ - Scalar res(i, 1) = conv(0); \ - Scalar res(i, 2) = conv(0); \ - Scalar res(i, 3) = conv(0); \ - Scalar res(i, 4) = conv(0); \ - Scalar res(i, 5) = conv(0); \ - Scalar res(i, 6) = conv(0); \ - Scalar res(i, 7) = conv(0); \ - - internal::scalar_cast_op<int, Scalar> conv; - initResultRow(0); - initResultRow(1); - initResultRow(2); - initResultRow(3); - initResultRow(4); - initResultRow(5); - initResultRow(6); - initResultRow(7); -#undef initResultRow - - for (Index base_k = 0; base_k < k_size; base_k += 64) { - // wait for previous iteration to finish with shmem. Despite common sense, - // the code is a bit faster with this here then at bottom of loop - __syncthreads(); - - prefetchIntoRegisters(base_k); - writeRegToShmem(); - - #undef prefetchIntoRegisters - #undef writeRegToShmem - - // wait for shared mem packing to be done before starting computation - __syncthreads(); - - // compute 8x8 matrix product by outer product. This involves packing one column - // of LHS and one row of RHS into registers (takes 16 registers). - -#define lcol(i) _lcol##i - Scalar lcol(0); - Scalar lcol(1); - Scalar lcol(2); - Scalar lcol(3); - Scalar lcol(4); - Scalar lcol(5); - Scalar lcol(6); - Scalar lcol(7); - -#define rrow(j) _rrow##j - Scalar rrow(0); - Scalar rrow(1); - Scalar rrow(2); - Scalar rrow(3); - Scalar rrow(4); - Scalar rrow(5); - Scalar rrow(6); - Scalar rrow(7); - - // Now x corresponds to k, y to m, and z to n - const Scalar* lhs_block = &lhs_shmem[threadIdx.x + 9 * threadIdx.y]; - const Scalar* rhs_block = &rhs_shmem[threadIdx.x + 8 * threadIdx.z]; - -#define lhs_element(i, j) lhs_block[72 * ((i) + 8 * (j))] -#define rhs_element(i, j) rhs_block[72 * ((i) + 8 * (j))] - -#define loadData(i, j) \ - lcol(0) = lhs_element(0, j); \ - rrow(0) = rhs_element(i, 0); \ - lcol(1) = lhs_element(1, j); \ - rrow(1) = rhs_element(i, 1); \ - lcol(2) = lhs_element(2, j); \ - rrow(2) = rhs_element(i, 2); \ - lcol(3) = lhs_element(3, j); \ - rrow(3) = rhs_element(i, 3); \ - lcol(4) = lhs_element(4, j); \ - rrow(4) = rhs_element(i, 4); \ - lcol(5) = lhs_element(5, j); \ - rrow(5) = rhs_element(i, 5); \ - lcol(6) = lhs_element(6, j); \ - rrow(6) = rhs_element(i, 6); \ - lcol(7) = lhs_element(7, j); \ - rrow(7) = rhs_element(i, 7); \ - -#define computeCol(j) \ - res(0, j) += lcol(0) * rrow(j); \ - res(1, j) += lcol(1) * rrow(j); \ - res(2, j) += lcol(2) * rrow(j); \ - res(3, j) += lcol(3) * rrow(j); \ - res(4, j) += lcol(4) * rrow(j); \ - res(5, j) += lcol(5) * rrow(j); \ - res(6, j) += lcol(6) * rrow(j); \ - res(7, j) += lcol(7) * rrow(j); \ - -#define computePass(i) \ - loadData(i, i); \ - \ - computeCol(0); \ - computeCol(1); \ - computeCol(2); \ - computeCol(3); \ - computeCol(4); \ - computeCol(5); \ - computeCol(6); \ - computeCol(7); \ - - computePass(0); - computePass(1); - computePass(2); - computePass(3); - computePass(4); - computePass(5); - computePass(6); - computePass(7); - -#undef lcol -#undef rrow -#undef lhs_element -#undef rhs_element -#undef loadData -#undef computeCol -#undef computePass - } // end loop over k - - // we've now iterated over all of the large (ie width 64) k blocks and - // accumulated results in registers. At this point thread (x, y, z) contains - // the sum across all big k blocks of the product of little k block of index (x, y) - // with block of index (y, z). To compute the final output, we need to reduce - // the 8 threads over y by summation. -#define shuffleInc(i, j, mask) res(i, j) += __shfl_xor(res(i, j), mask) - -#define reduceRow(i, mask) \ - shuffleInc(i, 0, mask); \ - shuffleInc(i, 1, mask); \ - shuffleInc(i, 2, mask); \ - shuffleInc(i, 3, mask); \ - shuffleInc(i, 4, mask); \ - shuffleInc(i, 5, mask); \ - shuffleInc(i, 6, mask); \ - shuffleInc(i, 7, mask); \ - -#define reduceMatrix(mask) \ - reduceRow(0, mask); \ - reduceRow(1, mask); \ - reduceRow(2, mask); \ - reduceRow(3, mask); \ - reduceRow(4, mask); \ - reduceRow(5, mask); \ - reduceRow(6, mask); \ - reduceRow(7, mask); \ - - // actually perform the reduction, now each thread of index (_, y, z) - // contains the correct values in its registers that belong in the output - // block - reduceMatrix(1); - reduceMatrix(2); - reduceMatrix(4); - -#undef shuffleInc -#undef reduceRow -#undef reduceMatrix - - // now we need to copy the 64 values into main memory. We can't split work - // among threads because all variables are in registers. There's 2 ways - // to do this: - // (1) have 1 thread do 64 writes from registers into global memory - // (2) have 1 thread do 64 writes into shared memory, and then 8 threads - // each do 8 writes into global memory. We can just overwrite the shared - // memory from the problem we just solved. - // (2) is slightly faster than (1) due to less branching and more ILP - - // TODO: won't yield much gain, but could just use currently unused shared mem - // and then we won't have to sync - // wait for shared mem to be out of use - __syncthreads(); - -#define writeResultShmem(i, j) \ - lhs_shmem[i + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j] = res(i, j); \ - -#define writeRow(i) \ - writeResultShmem(i, 0); \ - writeResultShmem(i, 1); \ - writeResultShmem(i, 2); \ - writeResultShmem(i, 3); \ - writeResultShmem(i, 4); \ - writeResultShmem(i, 5); \ - writeResultShmem(i, 6); \ - writeResultShmem(i, 7); \ - - if (threadIdx.x == 0) { - writeRow(0); - writeRow(1); - writeRow(2); - writeRow(3); - writeRow(4); - writeRow(5); - writeRow(6); - writeRow(7); - } -#undef writeResultShmem -#undef writeRow - - const int max_i_write = numext::mini((int)((m_size - base_m - threadIdx.y + 7) / 8), 8); - const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8); - - if (threadIdx.x < max_i_write) { - if (max_j_write == 8) { - // TODO: can i trade bank conflicts for coalesced writes? - Scalar val0 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 0]; - Scalar val1 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 1]; - Scalar val2 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 2]; - Scalar val3 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 3]; - Scalar val4 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 4]; - Scalar val5 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 5]; - Scalar val6 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 6]; - Scalar val7 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 7]; - - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 0) = val0; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 1) = val1; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 2) = val2; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 3) = val3; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 4) = val4; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 5) = val5; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 6) = val6; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 7) = val7; - } else { -#pragma unroll 7 - for (int j = 0; j < max_j_write; j++) { - Scalar val = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j]; - output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * j) = val; - } - } - } -#undef res -} - - -template<typename Scalar, typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper> -__global__ void -__launch_bounds__(512) -EigenContractionKernel(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, - const Index m_size, const Index n_size, const Index k_size) { - __shared__ Scalar lhs_shmem[72 * 64]; - __shared__ Scalar rhs_shmem[72 * 64]; - - const Index m_block_idx = blockIdx.x; - const Index n_block_idx = blockIdx.y; - - const Index base_m = 64 * m_block_idx; - const Index base_n = 64 * n_block_idx; - - if (base_m + 63 < m_size && base_n + 63 < n_size) { - EigenContractionKernelInternal<Scalar, Index, LhsMapper, RhsMapper, OutputMapper, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size); - } else { - EigenContractionKernelInternal<Scalar, Index, LhsMapper, RhsMapper, OutputMapper, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size); - } -} - - -template<typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper, bool CHECK_LHS_BOUNDARY, - bool CHECK_RHS_BOUNDARY> -__device__ EIGEN_STRONG_INLINE void -EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, float2 lhs_shmem2[][16], - float2 rhs_shmem2[][8], const Index m_size, - const Index n_size, const Index k_size, - const Index base_m, const Index base_n) { - typedef float Scalar; - - // prefetch registers - float4 lhs_pf0, rhs_pf0; - - float4 results[4]; - for (int i=0; i < 4; i++) { - results[i].x = results[i].y = results[i].z = results[i].w = 0; - } - - -#define prefetch_lhs(reg, row, col) \ - if (!CHECK_LHS_BOUNDARY) { \ - if (col < k_size) { \ - reg =lhs.loadPacket<Unaligned>(row, col); \ - } \ - } else { \ - if (col < k_size) { \ - if (row + 3 < m_size) { \ - reg =lhs.loadPacket<Unaligned>(row, col); \ - } else if (row + 2 < m_size) { \ - reg.x =lhs(row + 0, col); \ - reg.y =lhs(row + 1, col); \ - reg.z =lhs(row + 2, col); \ - } else if (row + 1 < m_size) { \ - reg.x =lhs(row + 0, col); \ - reg.y =lhs(row + 1, col); \ - } else if (row < m_size) { \ - reg.x =lhs(row + 0, col); \ - } \ - } \ - } \ - - - Index lhs_vert = base_m+threadIdx.x*4; - - for (Index k = 0; k < k_size; k += 16) { - lhs_pf0 = internal::pset1<float4>(0); - rhs_pf0 = internal::pset1<float4>(0); - - Index lhs_horiz = threadIdx.y+k; - prefetch_lhs(lhs_pf0, lhs_vert, lhs_horiz) - - Index rhs_vert = k+(threadIdx.x%4)*4; - Index rhs_horiz0 = (threadIdx.x>>2)+threadIdx.y*4+base_n; - - if (!CHECK_RHS_BOUNDARY) { - if ((rhs_vert + 3) < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0); - } else if (rhs_vert + 2 < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); - } else if (rhs_vert + 1 < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - } else if (rhs_vert < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - } - } else { - if (rhs_horiz0 < n_size) { - if ((rhs_vert + 3) < k_size) { - rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0); - } else if ((rhs_vert + 2) < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); - } else if ((rhs_vert + 1) < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - } else if (rhs_vert < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - } - } - } - float x1, x2 ; - // the following can be a bitwise operation..... some day. - if((threadIdx.x%8) < 4) { - x1 = rhs_pf0.y; - x2 = rhs_pf0.w; - } else { - x1 = rhs_pf0.x; - x2 = rhs_pf0.z; - } - x1 = __shfl_xor(x1, 4); - x2 = __shfl_xor(x2, 4); - if((threadIdx.x%8) < 4) { - rhs_pf0.y = x1; - rhs_pf0.w = x2; - } else { - rhs_pf0.x = x1; - rhs_pf0.z = x2; - } - - // We have 64 features. - // Row 0 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 0, 1. - // Row 1 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 2, 3. - // ... - // Row 31 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 62, 63 - // Row 32 -> times (2, 6, 10, 14, 3, 7, 11, 15) for features 0, 1 - // ... - rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2][threadIdx.x%8] = make_float2(rhs_pf0.x, rhs_pf0.y); - rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2+32][threadIdx.x%8] = make_float2(rhs_pf0.z, rhs_pf0.w); - - // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) - // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) - // ... - // Row 15 (time 15) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) - // Row 16 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) - // ... - - lhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(lhs_pf0.x, lhs_pf0.y); - lhs_shmem2[threadIdx.y+16][threadIdx.x] = make_float2(lhs_pf0.z, lhs_pf0.w); - - -#define add_vals(fl1, fl2, fr1, fr2)\ - results[0].x += fl1.x * fr1.x;\ - results[0].y += fl1.y * fr1.x;\ - results[0].z += fl2.x * fr1.x;\ - results[0].w += fl2.y * fr1.x;\ -\ - results[1].x += fl1.x * fr1.y;\ - results[1].y += fl1.y * fr1.y;\ - results[1].z += fl2.x * fr1.y;\ - results[1].w += fl2.y * fr1.y;\ -\ - results[2].x += fl1.x * fr2.x;\ - results[2].y += fl1.y * fr2.x;\ - results[2].z += fl2.x * fr2.x;\ - results[2].w += fl2.y * fr2.x;\ -\ - results[3].x += fl1.x * fr2.y;\ - results[3].y += fl1.y * fr2.y;\ - results[3].z += fl2.x * fr2.y;\ - results[3].w += fl2.y * fr2.y;\ - - __syncthreads(); - - // Do the multiplies. - #pragma unroll - for (int koff = 0; koff < 16; koff ++) { - // 32 x threads. - float2 fl1 = lhs_shmem2[koff][threadIdx.x]; - float2 fl2 = lhs_shmem2[koff + 16][threadIdx.x]; - - int start_feature = threadIdx.y * 4; - float2 fr1 = rhs_shmem2[(start_feature>>1) + 32*((koff%4)/2)][koff/4 + (koff%2)*4]; - float2 fr2 = rhs_shmem2[(start_feature>>1) + 1 + 32*((koff%4)/2)][koff/4 + (koff%2)*4]; - - add_vals(fl1, fl2, fr1, fr2) - } - __syncthreads(); - } - -#undef prefetch_lhs -#undef add_vals - - Index horiz_base = threadIdx.y*4+base_n; - if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) { - for (int i = 0; i < 4; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } else if (!CHECK_RHS_BOUNDARY) { - // CHECK LHS - if (lhs_vert + 3 < m_size) { - for (int i = 0; i < 4; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } else if (lhs_vert + 2 < m_size) { - for (int i = 0; i < 4; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - } - } else if (lhs_vert + 1 < m_size) { - for (int i = 0; i < 4; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - } - } else if (lhs_vert < m_size) { - for (int i = 0; i < 4; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - } - } - } else if (!CHECK_LHS_BOUNDARY) { - // CHECK RHS - /* - int ncols_rem = fminf(n_size- horiz_base, 4); - for (int i = 0; i < ncols_rem; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - }*/ - for (int i = 0; i < 4; i++) { - if (horiz_base+i < n_size) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } - } else { - // CHECK both boundaries. - for (int i = 0; i < 4; i++) { - if (horiz_base+i < n_size) { - if (lhs_vert < m_size) - output(lhs_vert, horiz_base + i) = results[i].x; - if (lhs_vert + 1 < m_size) - output(lhs_vert + 1, horiz_base + i) = results[i].y; - if (lhs_vert + 2 < m_size) - output(lhs_vert + 2, horiz_base + i) = results[i].z; - if (lhs_vert + 3 < m_size) - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } - } -} - - -template<typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper, bool CHECK_LHS_BOUNDARY, - bool CHECK_RHS_BOUNDARY> -__device__ EIGEN_STRONG_INLINE void -EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, float2 lhs_shmem2[][32], - float2 rhs_shmem2[][8], const Index m_size, - const Index n_size, const Index k_size, - const Index base_m, const Index base_n) { - typedef float Scalar; - - // prefetch registers - float4 lhs_pf0, lhs_pf1, lhs_pf2, lhs_pf3; - float4 rhs_pf0, rhs_pf1; - - float4 results[8]; - for (int i=0; i < 8; i++) { - results[i].x = results[i].y = results[i].z = results[i].w = 0; - } - - - Index lhs_vert = base_m+threadIdx.x*4+(threadIdx.y%4)*32; - for (Index k = 0; k < k_size; k += 32) { - lhs_pf0 = internal::pset1<float4>(0); - lhs_pf1 = internal::pset1<float4>(0); - lhs_pf2 = internal::pset1<float4>(0); - lhs_pf3 = internal::pset1<float4>(0); - - rhs_pf0 = internal::pset1<float4>(0); - rhs_pf1 = internal::pset1<float4>(0); - - if (!CHECK_LHS_BOUNDARY) { - if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16)); - lhs_pf3 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+24)); - } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16)); - } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8)); - } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k)); - } - } else { - // just CHECK_LHS_BOUNDARY - if (lhs_vert + 3 < m_size) { - if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16)); - lhs_pf3 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+24)); - } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8)); - lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16)); - } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k)); - lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8)); - } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k)); - } - } else if (lhs_vert + 2 < m_size) { - if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); - lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16)); - lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); - lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24)); - lhs_pf3.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+24)); - } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); - lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16)); - } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); - } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); - } - } else if (lhs_vert + 1 < m_size) { - if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); - lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); - lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24)); - } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); - } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); - } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); - } - } else if (lhs_vert < m_size) { - if ((threadIdx.y/4+k+24) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); - } else if ((threadIdx.y/4+k+16) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); - } else if ((threadIdx.y/4+k+8) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); - } else if ((threadIdx.y/4+k) < k_size) { - lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); - } - } - } - __syncthreads(); - Index rhs_vert = k+threadIdx.x*4; - Index rhs_horiz0 = threadIdx.y*2+base_n; - Index rhs_horiz1 = threadIdx.y*2+1+base_n; - if (!CHECK_RHS_BOUNDARY) { - if ((rhs_vert + 3) < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0); - rhs_pf1 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz1); - } else if (rhs_vert + 2 < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); - rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1); - } else if (rhs_vert + 1 < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); - } else if (rhs_vert < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - } - } else { - if (rhs_horiz1 < n_size) { - if ((rhs_vert + 3) < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0); - rhs_pf1 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz1); - } else if (rhs_vert + 2 < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); - rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1); - } else if (k+threadIdx.x*4 + 1 < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); - } else if (k+threadIdx.x*4 < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); - } - } else if (rhs_horiz0 < n_size) { - if ((rhs_vert + 3) < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0); - } else if ((rhs_vert + 2) < k_size) { - // just CHECK_RHS_BOUNDARY - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); - } else if ((rhs_vert + 1) < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); - } else if (rhs_vert < k_size) { - rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); - } - } - } - __syncthreads(); - // Loaded. Do computation - // Row 0 -> times (0, 4, 8, .. 28) for features 0, 1. - // Row 1 -> times (0, 4, 8, .. 28) for features 2, 3. - // .. - // Row 31 -> times (0, 4, 8, .. 28) for features 62, 63 - rhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(rhs_pf0.x, rhs_pf1.x); - // Row 32 -> times (1, 5, 9, .. 29) for features 0, 1. - // Row 33 -> times (1, 5, 9, .. 29) for features 2, 3. - // .. - rhs_shmem2[threadIdx.y+32][threadIdx.x] = make_float2(rhs_pf0.y, rhs_pf1.y); - // Row 64 -> times (2, 6, 10, .. 30) for features 0, 1. - // Row 65 -> times (2, 6, 10, .. 30) for features 2, 3. - rhs_shmem2[threadIdx.y+64][threadIdx.x] = make_float2(rhs_pf0.z, rhs_pf1.z); - // Row 96 -> times (3, 7, 11, .. 31) for features 0, 1. - // Row 97 -> times (3, 7, 11, .. 31) for features 2, 3. - rhs_shmem2[threadIdx.y+96][threadIdx.x] = make_float2(rhs_pf0.w, rhs_pf1.w); - - // LHS. - // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125) - // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125) - // ... - // Row 8 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127) - // Row 15 (time 7) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127) - - -#define add_vals(a_feat1, a_feat2, f1, f2, f3, f4)\ - results[0].x += a_feat1.x * f1.x;\ - results[1].x += a_feat1.x * f1.y;\ - results[2].x += a_feat1.x * f2.x;\ - results[3].x += a_feat1.x * f2.y;\ - results[4].x += a_feat1.x * f3.x;\ - results[5].x += a_feat1.x * f3.y;\ - results[6].x += a_feat1.x * f4.x;\ - results[7].x += a_feat1.x * f4.y;\ -\ - results[0].y += a_feat1.y * f1.x;\ - results[1].y += a_feat1.y * f1.y;\ - results[2].y += a_feat1.y * f2.x;\ - results[3].y += a_feat1.y * f2.y;\ - results[4].y += a_feat1.y * f3.x;\ - results[5].y += a_feat1.y * f3.y;\ - results[6].y += a_feat1.y * f4.x;\ - results[7].y += a_feat1.y * f4.y;\ -\ - results[0].z += a_feat2.x * f1.x;\ - results[1].z += a_feat2.x * f1.y;\ - results[2].z += a_feat2.x * f2.x;\ - results[3].z += a_feat2.x * f2.y;\ - results[4].z += a_feat2.x * f3.x;\ - results[5].z += a_feat2.x * f3.y;\ - results[6].z += a_feat2.x * f4.x;\ - results[7].z += a_feat2.x * f4.y;\ -\ - results[0].w += a_feat2.y * f1.x;\ - results[1].w += a_feat2.y * f1.y;\ - results[2].w += a_feat2.y * f2.x;\ - results[3].w += a_feat2.y * f2.y;\ - results[4].w += a_feat2.y * f3.x;\ - results[5].w += a_feat2.y * f3.y;\ - results[6].w += a_feat2.y * f4.x;\ - results[7].w += a_feat2.y * f4.y;\ - - lhs_shmem2[threadIdx.y/4][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.x, lhs_pf0.y); - lhs_shmem2[threadIdx.y/4+8][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.x, lhs_pf1.y); - lhs_shmem2[threadIdx.y/4+16][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.x, lhs_pf2.y); - lhs_shmem2[threadIdx.y/4+24][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.x, lhs_pf3.y); - - lhs_shmem2[threadIdx.y/4 + 32][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.z, lhs_pf0.w); - lhs_shmem2[threadIdx.y/4 + 40][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.z, lhs_pf1.w); - lhs_shmem2[threadIdx.y/4 + 48][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.z, lhs_pf2.w); - lhs_shmem2[threadIdx.y/4 + 56][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.z, lhs_pf3.w); - - __syncthreads(); - - // Do the multiplies. - #pragma unroll - for (int koff = 0; koff < 32; koff ++) { - float2 a3 = lhs_shmem2[koff][threadIdx.x + (threadIdx.y % 4) * 8]; - float2 a4 = lhs_shmem2[koff + 32][threadIdx.x + (threadIdx.y % 4) * 8]; - - // first feature is at (threadIdx.y/4) * 8 last is at start + 8. - int start_feature = (threadIdx.y / 4) * 8; - - float2 br1 = rhs_shmem2[start_feature/2 + (koff % 4) * 32][koff/4]; - float2 br2 = rhs_shmem2[start_feature/2 + 1 + (koff % 4) * 32][koff/4]; - float2 br3 = rhs_shmem2[start_feature/2 + 2 + (koff % 4) * 32][koff/4]; - float2 br4 = rhs_shmem2[start_feature/2 + 3 + (koff % 4) * 32][koff/4]; - - add_vals(a3, a4, br1, br2, br3, br4) - } - __syncthreads(); - } // end loop over k - - - __syncthreads(); - Index horiz_base = (threadIdx.y/4)*8+base_n; - if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) { - for (int i = 0; i < 8; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } else if (!CHECK_RHS_BOUNDARY) { - if (lhs_vert + 3 < m_size) { - for (int i = 0; i < 8; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } else if (lhs_vert + 2 < m_size) { - for (int i = 0; i < 8; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - } - } else if (lhs_vert + 1 < m_size) { - for (int i = 0; i < 8; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - } - } else if (lhs_vert < m_size) { - for (int i = 0; i < 8; i++) { - output(lhs_vert, horiz_base + i) = results[i].x; - } - } - } else if (!CHECK_LHS_BOUNDARY) { - // CHECK BOUNDARY_B - for (int i = 0; i < 8; i++) { - if (horiz_base + i < n_size) { - output(lhs_vert, horiz_base + i) = results[i].x; - output(lhs_vert + 1, horiz_base + i) = results[i].y; - output(lhs_vert + 2, horiz_base + i) = results[i].z; - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } - } else { - // CHECK both boundaries. - for (int i = 0; i < 8; i++) { - if (horiz_base + i < n_size) { - if (lhs_vert < m_size) - output(lhs_vert, horiz_base + i) = results[i].x; - if (lhs_vert + 1 < m_size) - output(lhs_vert + 1, horiz_base + i) = results[i].y; - if (lhs_vert + 2 < m_size) - output(lhs_vert + 2, horiz_base + i) = results[i].z; - if (lhs_vert + 3 < m_size) - output(lhs_vert + 3, horiz_base + i) = results[i].w; - } - } - } -} - - -template<typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper> -__global__ void -__launch_bounds__(256) -EigenFloatContractionKernel(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, - const Index m_size, const Index n_size, const Index k_size) { - __shared__ float2 lhs_shmem[64*32]; - __shared__ float2 rhs_shmem[128*8]; - - typedef float2 LHS_MEM[64][32]; - typedef float2 RHS_MEM[128][8]; - - typedef float2 LHS_MEM16x16[32][16]; - typedef float2 RHS_MEM16x16[64][8]; - - const Index m_block_idx = blockIdx.x; - const Index n_block_idx = blockIdx.y; - - const Index base_m = 128 * m_block_idx; - const Index base_n = 64 * n_block_idx; - - bool check_rhs = (base_n + 63) >= n_size; - bool check_lhs128 = (base_m + 127) >= m_size; - - if (!check_rhs) { - if (!check_lhs128) { - // >= 128 rows left - EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, false, false>( - lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); - } else { - EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, true, false>( - lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); - } - } else { - if (!check_lhs128) { - // >= 128 rows left - EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, false, true>( - lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); - } else { - EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, true, true>( - lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); - } - } -} - -template<typename Index, typename LhsMapper, - typename RhsMapper, typename OutputMapper> -__global__ void -__launch_bounds__(256) -EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs, - const OutputMapper output, - const Index m_size, const Index n_size, const Index k_size) { - __shared__ float2 lhs_shmem[32][16]; - __shared__ float2 rhs_shmem[64][8]; - - const Index m_block_idx = blockIdx.x; - const Index n_block_idx = blockIdx.y; - - const Index base_m = 64 * m_block_idx; - const Index base_n = 64 * n_block_idx; - - if (base_m + 63 < m_size) { - if (base_n + 63 < n_size) { - EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, false, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); - } else { - EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, false, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); - } - } else { - if (base_n + 63 < n_size) { - EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, true, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); - } else { - EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, true, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); - } - } -} - - -template<typename Indices, typename LeftArgType, typename RightArgType> -struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> : - public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> > { - - typedef GpuDevice Device; - - typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self; - typedef TensorContractionEvaluatorBase<Self> Base; - - typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType; - - enum { - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - }; - - // Most of the code is assuming that both input tensors are ColMajor. If the - // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: - // If we want to compute A * B = C, where A is LHS and B is RHS, the code - // will pretend B is LHS and A is RHS. - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; - - static const int LDims = - internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; - static const int RDims = - internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; - static const int ContractDims = internal::array_size<Indices>::value; - - typedef array<Index, LDims> left_dim_mapper_t; - typedef array<Index, RDims> right_dim_mapper_t; - - typedef array<Index, ContractDims> contract_t; - typedef array<Index, LDims - ContractDims> left_nocontract_t; - typedef array<Index, RDims - ContractDims> right_nocontract_t; - - static const int NumDims = LDims + RDims - 2 * ContractDims; - - typedef DSizes<Index, NumDims> Dimensions; - - // typedefs needed in evalTo - typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar; - typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; - - typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; - typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; - - typedef typename LeftEvaluator::Dimensions LeftDimensions; - typedef typename RightEvaluator::Dimensions RightDimensions; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : - Base(op, device) {} - - // We need to redefine this method to make nvcc happy - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { - this->m_leftImpl.evalSubExprsIfNeeded(NULL); - this->m_rightImpl.evalSubExprsIfNeeded(NULL); - if (data) { - evalTo(data); - return false; - } else { - this->m_result = static_cast<Scalar *>(this->m_device.allocate(this->dimensions().TotalSize() * sizeof(Scalar))); - evalTo(this->m_result); - return true; - } - } - - void evalTo(Scalar* buffer) const { - if (this->m_lhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - evalTyped<true, true, true, Unaligned>(buffer); - } - else { - evalTyped<true, true, false, Unaligned>(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - evalTyped<true, false, true, Unaligned>(buffer); - } - else { - evalTyped<true, false, false, Unaligned>(buffer); - } - } - } - else { - if (this->m_rhs_inner_dim_contiguous) { - if (this->m_rhs_inner_dim_reordered) { - evalTyped<false, true, true, Unaligned>(buffer); - } - else { - evalTyped<false, true, false, Unaligned>(buffer); - } - } - else { - if (this->m_rhs_inner_dim_reordered) { - evalTyped<false, false, true, Unaligned>(buffer); - } - else { - evalTyped<false, false, false, Unaligned>(buffer); - } - } - } - } - - template <typename LhsScalar, typename RhsScalar, typename Index, typename LhsMapper, typename RhsMapper, typename OutputMapper> struct LaunchKernels { - static void Run(const LhsMapper& lhs, const RhsMapper& rhs, const OutputMapper& output, Index m, Index n, Index k, const GpuDevice& device) { - const Index m_blocks = (m + 63) / 64; - const Index n_blocks = (n + 63) / 64; - const dim3 num_blocks(m_blocks, n_blocks, 1); - const dim3 block_size(8, 8, 8); - LAUNCH_CUDA_KERNEL((EigenContractionKernel<Scalar, Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); - } - }; - - template <typename Index, typename LhsMapper, typename RhsMapper, typename OutputMapper> struct LaunchKernels<float, float, Index, LhsMapper, RhsMapper, OutputMapper> { - static void Run(const LhsMapper& lhs, const RhsMapper& rhs, const OutputMapper& output, Index m, Index n, Index k, const GpuDevice& device) { - if (m < 768 || n < 768) { - const Index m_blocks = (m + 63) / 64; - const Index n_blocks = (n + 63) / 64; - const dim3 num_blocks(m_blocks, n_blocks, 1); - const dim3 block_size(16, 16, 1); - LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel16x16<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); - } else { - const Index m_blocks = (m + 127) / 128; - const Index n_blocks = (n + 63) / 64; - const dim3 num_blocks(m_blocks, n_blocks, 1); - const dim3 block_size(8, 32, 1); - LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); - } - } - }; - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalTyped(Scalar* buffer) const { - // columns in left side, rows in right side - const Index k = this->m_k_size; - EIGEN_UNUSED_VARIABLE(k) - - // rows in left side - const Index m = this->m_i_size; - - // columns in right side - const Index n = this->m_j_size; - - // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) - this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); - - typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, - LeftEvaluator, left_nocontract_t, - contract_t, 4, - lhs_inner_dim_contiguous, - false, Unaligned> LhsMapper; - - typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, - RightEvaluator, right_nocontract_t, - contract_t, 4, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Unaligned> RhsMapper; - - typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper; - - - // initialize data mappers - LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, - this->m_left_contracting_strides, this->m_k_strides); - - RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, - this->m_right_contracting_strides, this->m_k_strides); - - OutputMapper output(buffer, m); - - setCudaSharedMemConfig(cudaSharedMemBankSizeEightByte); - LaunchKernels<LhsScalar, RhsScalar, Index, LhsMapper, RhsMapper, OutputMapper>::Run(lhs, rhs, output, m, n, k, this->m_device); - } -}; - -} // end namespace Eigen - -#endif // EIGEN_USE_GPU and __CUDACC__ -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h deleted file mode 100644 index 9b2cb3f..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +++ /dev/null @@ -1,467 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H - -namespace Eigen { - -namespace internal { - -enum { - Rhs = 0, - Lhs = 1 -}; - -/* - * Implementation of the Eigen blas_data_mapper class for tensors. - */ - -template <typename Tensor, bool HasRawAccess> struct CoeffLoader { - enum { - DirectOffsets = false - }; - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_tensor(tensor) { } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index) { - eigen_assert(false && "unsupported"); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return m_tensor.coeff(index); } - - template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - typename Tensor::PacketReturnType packet(typename Tensor::Index index) const - { - return m_tensor.template packet<LoadMode>(index); - } - - - private: - const Tensor m_tensor; -}; - -template <typename Tensor> struct CoeffLoader<Tensor, true> { - enum { - DirectOffsets = true - }; - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_data(tensor.data()) {} - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { - m_data += offset; - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return loadConstant(m_data+index); } - - template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - typename Tensor::PacketReturnType packet(typename Tensor::Index index) const - { - return internal::ploadt_ro<typename Tensor::PacketReturnType, LoadMode>(m_data + index); - } - private: - typedef typename Tensor::Scalar Scalar; - const Scalar* m_data; -}; - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - int packet_size, bool inner_dim_contiguous, int Alignment> -class SimpleTensorContractionMapper { - public: - EIGEN_DEVICE_FUNC - SimpleTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : - m_tensor(tensor), - m_nocontract_strides(nocontract_strides), - m_ij_strides(ij_strides), - m_contract_strides(contract_strides), - m_k_strides(k_strides) { } - - enum { - DirectOffsets = CoeffLoader<Tensor, Tensor::RawAccess>::DirectOffsets - }; - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { - m_tensor.offsetBuffer(offset); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator()(Index row) const { - // column major assumption - return operator()(row, 0); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const { - return m_tensor.coeff(computeIndex(row, col)); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const { - const bool left = (side == Lhs); - Index nocontract_val = left ? row : col; - Index linidx = 0; - for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) { - const Index idx = nocontract_val / m_ij_strides[i]; - linidx += idx * m_nocontract_strides[i]; - nocontract_val -= idx * m_ij_strides[i]; - } - if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) { - if (side == Lhs && inner_dim_contiguous) { - eigen_assert(m_nocontract_strides[0] == 1); - linidx += nocontract_val; - } else { - linidx += nocontract_val * m_nocontract_strides[0]; - } - } - - Index contract_val = left ? col : row; - if(array_size<contract_t>::value > 0) { - for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { - const Index idx = contract_val / m_k_strides[i]; - linidx += idx * m_contract_strides[i]; - contract_val -= idx * m_k_strides[i]; - } - - if (side == Rhs && inner_dim_contiguous) { - eigen_assert(m_contract_strides[0] == 1); - linidx += contract_val; - } else { - linidx += contract_val * m_contract_strides[0]; - } - } - - return linidx; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE IndexPair<Index> computeIndexPair(Index row, Index col, const Index distance) const { - const bool left = (side == Lhs); - Index nocontract_val[2] = {left ? row : col, left ? row + distance : col}; - Index linidx[2] = {0, 0}; - if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) { - for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) { - const Index idx0 = nocontract_val[0] / m_ij_strides[i]; - const Index idx1 = nocontract_val[1] / m_ij_strides[i]; - linidx[0] += idx0 * m_nocontract_strides[i]; - linidx[1] += idx1 * m_nocontract_strides[i]; - nocontract_val[0] -= idx0 * m_ij_strides[i]; - nocontract_val[1] -= idx1 * m_ij_strides[i]; - } - if (side == Lhs && inner_dim_contiguous) { - eigen_assert(m_nocontract_strides[0] == 1); - linidx[0] += nocontract_val[0]; - linidx[1] += nocontract_val[1]; - } else { - linidx[0] += nocontract_val[0] * m_nocontract_strides[0]; - linidx[1] += nocontract_val[1] * m_nocontract_strides[0]; - } - } - - Index contract_val[2] = {left ? col : row, left ? col : row + distance}; - if (array_size<contract_t>::value> 0) { - for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { - const Index idx0 = contract_val[0] / m_k_strides[i]; - const Index idx1 = contract_val[1] / m_k_strides[i]; - linidx[0] += idx0 * m_contract_strides[i]; - linidx[1] += idx1 * m_contract_strides[i]; - contract_val[0] -= idx0 * m_k_strides[i]; - contract_val[1] -= idx1 * m_k_strides[i]; - } - - if (side == Rhs && inner_dim_contiguous) { - eigen_assert(m_contract_strides[0] == 1); - linidx[0] += contract_val[0]; - linidx[1] += contract_val[1]; - } else { - linidx[0] += contract_val[0] * m_contract_strides[0]; - linidx[1] += contract_val[1] * m_contract_strides[0]; - } - } - return IndexPair<Index>(linidx[0], linidx[1]); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index firstAligned(Index size) const { - // Only claim alignment when we can compute the actual stride (ie when we're - // dealing with the lhs with inner_dim_contiguous. This is because the - // matrix-vector product relies on the stride when dealing with aligned inputs. - return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size; - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const { - return ((side == Lhs) && inner_dim_contiguous && array_size<contract_t>::value > 0) ? m_contract_strides[0] : 1; - } - - protected: - CoeffLoader<Tensor, Tensor::RawAccess> m_tensor; - const nocontract_t m_nocontract_strides; - const nocontract_t m_ij_strides; - const contract_t m_contract_strides; - const contract_t m_k_strides; -}; - - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - int packet_size, bool inner_dim_contiguous, - bool inner_dim_reordered, int Alignment> -class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, Alignment> -{ - public: - typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, Alignment> ParentMapper; - - EIGEN_DEVICE_FUNC - BaseTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : - ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - - typedef typename Tensor::PacketReturnType Packet; - typedef typename unpacket_traits<Packet>::half HalfPacket; - - template <int AlignmentType> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { - // whole method makes column major assumption - - // don't need to add offsets for now (because operator handles that) - // current code assumes packet size must be a multiple of 2 - EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - - if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) { - const Index index = this->computeIndex(i, j); - eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1); - return this->m_tensor.template packet<AlignmentType>(index); - } - - const IndexPair<Index> indexPair = this->computeIndexPair(i, j, packet_size - 1); - const Index first = indexPair.first; - const Index last = indexPair.second; - - // We can always do optimized packet reads from left hand side right now, because - // the vertical matrix dimension on the left hand side is never contracting. - // On the right hand side we need to check if the contracting dimensions may have - // been shuffled first. - if (Tensor::PacketAccess && - (side == Lhs || internal::array_size<contract_t>::value <= 1 || !inner_dim_reordered) && - (last - first) == (packet_size - 1)) { - - return this->m_tensor.template packet<AlignmentType>(first); - } - - EIGEN_ALIGN_MAX Scalar data[packet_size]; - - data[0] = this->m_tensor.coeff(first); - for (Index k = 1; k < packet_size - 1; k += 2) { - const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1); - data[k] = this->m_tensor.coeff(internal_pair.first); - data[k + 1] = this->m_tensor.coeff(internal_pair.second); - } - data[packet_size - 1] = this->m_tensor.coeff(last); - - return pload<Packet>(data); - } - - template <int AlignmentType> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { - // whole method makes column major assumption - - // don't need to add offsets for now (because operator handles that) - const Index half_packet_size = unpacket_traits<HalfPacket>::size; - if (half_packet_size == packet_size) { - return loadPacket<AlignmentType>(i, j); - } - EIGEN_ALIGN_MAX Scalar data[half_packet_size]; - for (Index k = 0; k < half_packet_size; k++) { - data[k] = operator()(i + k, j); - } - return pload<HalfPacket>(data); - } -}; - - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - bool inner_dim_contiguous, - bool inner_dim_reordered, int Alignment> -class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment> -{ - public: - typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment> ParentMapper; - - EIGEN_DEVICE_FUNC - BaseTensorContractionMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) : - ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - - typedef typename Tensor::PacketReturnType Packet; - template <int> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { - EIGEN_ALIGN_MAX Scalar data[1]; - data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); - return pload<typename Tensor::PacketReturnType>(data); - } - template <int> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const { - return loadPacket(i, j); - } -}; - - -template<typename Scalar, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - int packet_size, - bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> -class TensorContractionSubMapper { - public: - typedef typename Tensor::PacketReturnType Packet; - typedef typename unpacket_traits<Packet>::half HalfPacket; - - typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper; - typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self; - typedef Self LinearMapper; - - enum { - // We can use direct offsets iff the parent mapper supports then and we can compute the strides. - // TODO: we should also enable direct offsets for the Rhs case. - UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size<contract_t>::value > 0) - }; - - EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) - : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { - // Bake the offsets into the buffer used by the base mapper whenever possible. This avoids the need to recompute - // this offset every time we attempt to access a coefficient. - if (UseDirectOffsets) { - Index stride = m_base_mapper.stride(); - m_base_mapper.offsetBuffer(vert_offset + horiz_offset * stride); - } - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { - if (UseDirectOffsets) { - return m_base_mapper(i, 0); - } - return m_base_mapper(i + m_vert_offset, m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { - if (UseDirectOffsets) { - return m_base_mapper(i, j); - } - return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { - if (UseDirectOffsets) { - return m_base_mapper.template loadPacket<Alignment>(i, 0); - } - return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, m_horiz_offset); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { - if (UseDirectOffsets) { - return m_base_mapper.template loadPacket<Alignment>(i, j); - } - return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, j + m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { - if (UseDirectOffsets) { - return m_base_mapper.template loadHalfPacket<Alignment>(i, 0); - } - return m_base_mapper.template loadHalfPacket<Alignment>(i + m_vert_offset, m_horiz_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { - if (UseDirectOffsets) { - m_base_mapper.storePacket(i, 0, p); - } - m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { - if (UseDirectOffsets) { - return LinearMapper(m_base_mapper, i, j); - } - return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); - } - - template <typename PacketT, int AlignmentType> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { - EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - const int ActualAlignment = (AlignmentType == Aligned) && (Alignment == Aligned) ? Aligned : Unaligned; - if (UseDirectOffsets) { - return m_base_mapper.template loadPacket<ActualAlignment>(i, 0); - } - return m_base_mapper.template loadPacket<ActualAlignment>(i + m_vert_offset, m_horiz_offset); - } - - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool aligned(Index) const { - return false; - } - - private: - ParentMapper m_base_mapper; - const Index m_vert_offset; - const Index m_horiz_offset; -}; - - -template<typename Scalar_, typename Index, int side, - typename Tensor, - typename nocontract_t, typename contract_t, - int packet_size, - bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> -class TensorContractionInputMapper - : public BaseTensorContractionMapper<Scalar_, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> { - - public: - typedef Scalar_ Scalar; - typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base; - typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper; - typedef SubMapper VectorMapper; - - EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor, - const nocontract_t& nocontract_strides, - const nocontract_t& ij_strides, - const contract_t& contract_strides, - const contract_t& k_strides) - : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { - return SubMapper(*this, i, j); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { - return VectorMapper(*this, i, j); - } -}; - - - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h deleted file mode 100644 index c70dea0..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ /dev/null @@ -1,1043 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H - -// evaluator for thread pool device -#ifdef EIGEN_USE_THREADS - -namespace Eigen { - -#ifdef EIGEN_USE_SIMPLE_THREAD_POOL -namespace internal { - -template<typename LhsScalar, typename LhsMapper, typename Index> -struct packLhsArg { - LhsScalar* blockA; - const LhsMapper& lhs; - const Index m_start; - const Index k_start; - const Index mc; - const Index kc; -}; - -template<typename LhsScalar, typename RhsScalar, typename RhsMapper, typename OutputMapper, typename Index> -struct packRhsAndKernelArg { - const MaxSizeVector<LhsScalar*>* blockAs; - RhsScalar* blockB; - const RhsMapper& rhs; - OutputMapper& output; - const Index m; - const Index k; - const Index n; - const Index mc; - const Index kc; - const Index nc; - const Index num_threads; - const Index num_blockAs; - const Index max_m; - const Index k_block_idx; - const Index m_block_idx; - const Index n_block_idx; - const Index m_blocks; - const Index n_blocks; - MaxSizeVector<Notification*>* kernel_notifications; - const MaxSizeVector<Notification*>* lhs_notifications; - const bool need_to_pack; -}; - -} // end namespace internal -#endif // EIGEN_USE_SIMPLE_THREAD_POOL - -template<typename Indices, typename LeftArgType, typename RightArgType> -struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, ThreadPoolDevice> : - public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, ThreadPoolDevice> > { - - typedef ThreadPoolDevice Device; - - typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self; - typedef TensorContractionEvaluatorBase<Self> Base; - - typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - enum { - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - }; - - // Most of the code is assuming that both input tensors are ColMajor. If the - // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: - // If we want to compute A * B = C, where A is LHS and B is RHS, the code - // will pretend B is LHS and A is RHS. - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; - typedef typename internal::conditional< - static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; - - static const int LDims = - internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; - static const int RDims = - internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value; - static const int ContractDims = internal::array_size<Indices>::value; - - typedef array<Index, LDims> left_dim_mapper_t; - typedef array<Index, RDims> right_dim_mapper_t; - - typedef array<Index, ContractDims> contract_t; - typedef array<Index, LDims - ContractDims> left_nocontract_t; - typedef array<Index, RDims - ContractDims> right_nocontract_t; - - static const int NumDims = LDims + RDims - 2 * ContractDims; - - typedef DSizes<Index, NumDims> Dimensions; - - // typedefs needed in evalTo - typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar; - typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar; - typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits; - - typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator; - typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator; - - TensorEvaluator(const XprType& op, const Device& device) : - Base(op, device) {} - -#ifndef EIGEN_USE_SIMPLE_THREAD_POOL - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, - bool rhs_inner_dim_reordered, int Alignment> - void evalProduct(Scalar* buffer) const { - typedef internal::TensorContractionInputMapper< - LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t, - contract_t, internal::packet_traits<LhsScalar>::size, - lhs_inner_dim_contiguous, false, Unaligned> - LhsMapper; - typedef internal::TensorContractionInputMapper< - RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t, - contract_t, internal::packet_traits<RhsScalar>::size, - rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned> - RhsMapper; - typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper; - typedef internal::gemm_pack_lhs<LhsScalar, Index, - typename LhsMapper::SubMapper, Traits::mr, - Traits::LhsProgress, ColMajor> - LhsPacker; - typedef internal::gemm_pack_rhs< - RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> - RhsPacker; - typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper, - Traits::mr, Traits::nr, false, false> - GebpKernel; - - const Index m = this->m_i_size; - const Index n = this->m_j_size; - const Index k = this->m_k_size; - if (m == 0 || n == 0 || k == 0) return; - - // Compute a set of algorithm parameters: - // - kernel block sizes (bm, bn, bk) - // - task grain sizes (number of kernels executed per task: gm, gn) - // - number of threads - // - sharding by row/column - // - parallel packing or first lhs then rhs - // and some derived parameters: - // - number of tasks (nm, nn, nk) - // - number of kernels (nm0, nn0) - // Unfortunately, all these parameters are tightly interdependent. - // So in some cases we first compute approximate values, then compute other - // values based on these approximations and then refine the approximations. - - // There are lots of heuristics here. There is some reasoning behind them, - // but ultimately they are just tuned on contraction benchmarks for - // different input configurations, thread counts and instruction sets. - // So feel free to question any of them. - - // Compute whether we want to shard by row or by column. - // This is a first approximation, it will be refined later. Since we don't - // know number of threads yet we use 2, because what's we are most - // interested in at this point is whether it makes sense to use - // parallelization at all or not. - bool shard_by_col = shardByCol(m, n, 2); - - // First approximation of kernel blocking sizes. - // Again, we don't know number of threads yet, so we use 2. - Index bm, bn, bk; - if (shard_by_col) { - internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, - internal::ShardByCol> - blocking(k, m, n, 2); - bm = blocking.mc(); - bn = blocking.nc(); - bk = blocking.kc(); - } else { - internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, - internal::ShardByRow> - blocking(k, m, n, 2); - bm = blocking.mc(); - bn = blocking.nc(); - bk = blocking.kc(); - } - - // Compute optimal number of threads. - // Note: we use bk instead of k here because we are interested in amount of - // _parallelizable_ computations, and computations are not parallelizable - // across k dimension. - const TensorOpCost cost = - contractionCost(m, n, bm, bn, bk, shard_by_col, false); - int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads( - static_cast<double>(n) * m, cost, this->m_device.numThreads()); - - // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost - // model is not tuned. Remove this when the cost model is tuned. - if (n == 1) num_threads = 1; - - if (num_threads == 1) { - // The single-threaded algorithm should be faster in this case. - if (n == 1) - this->template evalGemv<lhs_inner_dim_contiguous, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Alignment>(buffer); - else - this->template evalGemm<lhs_inner_dim_contiguous, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Alignment>(buffer); - return; - } - - // Now that we know number of threads, recalculate sharding and blocking. - shard_by_col = shardByCol(m, n, num_threads); - if (shard_by_col) { - internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, - internal::ShardByCol> - blocking(k, m, n, num_threads); - bm = blocking.mc(); - bn = blocking.nc(); - bk = blocking.kc(); - } else { - internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, - internal::ShardByRow> - blocking(k, m, n, num_threads); - bm = blocking.mc(); - bn = blocking.nc(); - bk = blocking.kc(); - } - - // Number of kernels for each dimension. - Index nm0 = divup(m, bm); - Index nn0 = divup(n, bn); - Index nk = divup(k, bk); - - // Calculate task grain size (number of kernels executed per task). - // This task size coarsening serves two purposes: - // 1. It reduces per-task overheads including synchronization overheads. - // 2. It allows to use caches better (reuse the same packed rhs in several - // consecutive kernels). - Index gm = 1; - Index gn = 1; - // If we are sharding by column, then we prefer to reduce rows first. - if (shard_by_col) { - gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col); - gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col); - } else { - gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col); - gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col); - } - // Number of tasks in each dimension. - Index nm = divup(nm0, gm); - Index nn = divup(nn0, gn); - - // Last by not least, decide whether we want to issue both lhs and rhs - // packing in parallel; or issue lhs packing first, and then issue rhs - // packing when lhs packing completes (for !shard_by_col lhs and rhs are - // swapped). Parallel packing allows more parallelism (for both packing and - // kernels), while sequential packing provides better locality (once - // a thread finishes rhs packing it proceed to kernels with that rhs). - // First, we are interested in parallel packing if there are few tasks. - bool parallel_pack = num_threads >= nm * nn; - // Also do parallel packing if all data fits into L2$. - if (m * bk * Index(sizeof(LhsScalar)) + n * bk * Index(sizeof(RhsScalar)) <= - l2CacheSize() * num_threads) - parallel_pack = true; - // But don't do it if we will use each rhs only once. Locality seems to be - // more important in this case. - if ((shard_by_col ? nm : nn) == 1) parallel_pack = false; - - LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, - this->m_i_strides, this->m_left_contracting_strides, - this->m_k_strides); - - RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, - this->m_j_strides, this->m_right_contracting_strides, - this->m_k_strides); - - Context<LhsPacker, RhsPacker, GebpKernel, LhsMapper, RhsMapper, - OutputMapper>(this->m_device, num_threads, lhs, rhs, buffer, m, n, - k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, nn0, - shard_by_col, parallel_pack) - .run(); - } - - // Context coordinates a single parallel gemm operation. - template <typename LhsPacker, typename RhsPacker, typename GebpKernel, - typename LhsMapper, typename RhsMapper, typename OutputMapper> - class Context { - public: - Context(const Device& device, int num_threads, LhsMapper& lhs, - RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm, - Index bn, Index bk, Index nm, Index nn, Index nk, Index gm, - Index gn, Index nm0, Index nn0, bool shard_by_col, - bool parallel_pack) - : device_(device), - lhs_(lhs), - rhs_(rhs), - buffer_(buffer), - output_(buffer, tm), - num_threads_(num_threads), - shard_by_col_(shard_by_col), - parallel_pack_(parallel_pack), - m_(tm), - n_(tn), - k_(tk), - bm_(bm), - bn_(bn), - bk_(bk), - nm_(nm), - nn_(nn), - nk_(nk), - gm_(gm), - gn_(gn), - nm0_(nm0), - nn0_(nn0) - { - for (Index x = 0; x < P; x++) { - // Normal number of notifications for k slice switch is - // nm_ + nn_ + nm_ * nn_. However, first P - 1 slices will receive only - // nm_ + nn_ notifications, because they will not receive notifications - // from preceeding kernels. - state_switch_[x] = - x == 0 - ? 1 - : (parallel_pack_ ? nn_ + nm_ : (shard_by_col_ ? nn_ : nm_)) + - (x == P - 1 ? nm_ * nn_ : 0); - state_packing_ready_[x] = - parallel_pack_ ? 0 : (shard_by_col_ ? nm_ : nn_); - state_kernel_[x] = new std::atomic<uint8_t>*[nm_]; - for (Index m = 0; m < nm_; m++) { - state_kernel_[x][m] = new std::atomic<uint8_t>[nn_]; - // Kernels generally receive 3 notifications (previous kernel + 2 - // packing), but the first slice won't get notifications from previous - // kernels. - for (Index n = 0; n < nn_; n++) - state_kernel_[x][m][n].store( - (x == 0 ? 0 : 1) + (parallel_pack_ ? 2 : 1), - std::memory_order_relaxed); - } - } - - // Allocate memory for packed rhs/lhs matrices. - size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1); - size_t lhs_size = - divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align; - size_t rhs_size = - divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align; - packed_mem_ = static_cast<char*>(internal::aligned_malloc( - (nm0_ * lhs_size + nn0_ * rhs_size) * std::min<size_t>(nk_, P - 1))); - char* mem = static_cast<char*>(packed_mem_); - for (Index x = 0; x < numext::mini<Index>(nk_, P - 1); x++) { - packed_lhs_[x].resize(nm0_); - for (Index m = 0; m < nm0_; m++) { - packed_lhs_[x][m] = reinterpret_cast<LhsScalar*>(mem); - mem += lhs_size; - } - packed_rhs_[x].resize(nn0_); - for (Index n = 0; n < nn0_; n++) { - packed_rhs_[x][n] = reinterpret_cast<RhsScalar*>(mem); - mem += rhs_size; - } - } - } - - ~Context() { - for (Index x = 0; x < P; x++) { - for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m]; - delete[] state_kernel_[x]; - } - internal::aligned_free(packed_mem_); - } - - void run() { - // Kick off packing of the first slice. - signal_switch(0, 1); - // Wait for overall completion. - // TODO(dvyukov): this wait can lead to deadlock. - // If nthreads contractions are concurrently submitted from worker - // threads, this wait will block all worker threads and the system will - // deadlock. - done_.Wait(); - } - - private: - Notification done_; - const Device& device_; - LhsMapper& lhs_; - RhsMapper& rhs_; - Scalar* const buffer_; - OutputMapper output_; - const int num_threads_; - const bool shard_by_col_; - const bool parallel_pack_; - // Matrix sizes. - const Index m_; - const Index n_; - const Index k_; - // Block sizes. - const Index bm_; - const Index bn_; - const Index bk_; - // Number of tasks. - const Index nm_; - const Index nn_; - const Index nk_; - // Task grain sizes (number of kernels executed per task). - const Index gm_; - const Index gn_; - // Number of blocks (this is different from ni_/nn_ because of task size - // coarsening). - const Index nm0_; - const Index nn0_; - - // Parallelization strategy. - // - // Blocks related to the same k block can run in parallel because they write - // to different output blocks. So we parallelize within k slices, this - // gives us parallelism level of m x n. Before we can start any kernels - // related to k-th slice, we need to issue m lhs packing tasks and n rhs - // packing tasks. - // - // However, there is a bottleneck when we are finishing kernels for k-th - // slice (at the very end there is only 1 runnable kernel). To mitigate this - // bottleneck we allow kernels from k-th and k+1-th slices to run in - // parallel. Note that (m, n, k) and (m, n, k+1) kernels write to the same - // output block, so they must not run in parallel. - // - // This gives us the following dependency graph. - // On each k slice we have m x n kernel tasks, m lhs paking tasks and n rhs - // packing tasks. - // Kernel (m, n, k) can start when: - // - kernel (m, n, k-1) has finished - // - lhs packing (m, k) has finished - // - rhs packing (n, k) has finished - // Lhs/rhs packing can start when: - // - all k-1 packing has finished (artificially imposed to limit amount of - // parallel packing) - // - // On top of that we limit runnable tasks to two consecutive k slices. - // This is done to limit amount of memory we need for packed lhs/rhs - // (for each k slice we need m*bk + n*bk memory in packed_lhs_/packed_rhs_). - // - // state_switch_ tracks when we are ready to switch to the next k slice. - // state_kernel_[m][n] tracks when we are ready to kick off kernel (m, n). - // These variable are rolling over 3 consecutive k slices: first two we are - // actively executing + one to track completion of kernels in the second - // slice. - static const Index P = 3; - void* packed_mem_; - std::vector<LhsScalar*> packed_lhs_[P - 1]; - std::vector<RhsScalar*> packed_rhs_[P - 1]; - std::atomic<uint8_t>** state_kernel_[P]; - // state_switch_ is frequently modified by worker threads, while other - // fields are read-only after constructor. Let's move it to a separate cache - // line to reduce cache-coherency traffic. - char pad_[128]; - std::atomic<Index> state_packing_ready_[P]; - std::atomic<Index> state_switch_[P]; - - void pack_lhs(Index m, Index k) { - const Index mend = m * gm_ + gm(m); - for (Index m1 = m * gm_; m1 < mend; m1++) - LhsPacker()(packed_lhs_[k % (P - 1)][m1], - lhs_.getSubMapper(m1 * bm_, k * bk_), bk(k), bm(m1)); - - if (!parallel_pack_ && shard_by_col_) { - signal_packing(k); - } else { - signal_switch(k + 1); - for (Index n = nn_ - 1; n >= 0; n--) signal_kernel(m, n, k, n == 0); - } - } - - void pack_rhs(Index n, Index k) { - const Index nend = n * gn_ + gn(n); - for (Index n1 = n * gn_; n1 < nend; n1++) { - if (k == 0) { - // Zero the output memory in parallel. - // On 10000x2x10000 mm zeroing can easily take half of time. - // Zero (bn x m) row. Safe to do here because all kernels that will - // write to this memory depend on completion of this task. - // Note: don't call device_.memset() here. device_.memset() blocks on - // thread pool worker thread, which can lead to underutilization and - // deadlocks. - memset(buffer_ + n1 * bn_ * m_, 0, bn(n1) * m_ * sizeof(Scalar)); - } - RhsPacker()(packed_rhs_[k % (P - 1)][n1], - rhs_.getSubMapper(k * bk_, n1 * bn_), bk(k), bn(n1)); - } - - if (parallel_pack_ || shard_by_col_) { - signal_switch(k + 1); - for (Index m = nm_ - 1; m >= 0; m--) signal_kernel(m, n, k, m == 0); - } else { - signal_packing(k); - } - } - - void kernel(Index m, Index n, Index k) { - // Note: order of iteration matters here. Iteration over m is innermost - // because we want to reuse the same packed rhs in consequetive tasks - // (rhs fits into L2$ while lhs only into L3$). - const Index nend = n * gn_ + gn(n); - const Index mend = m * gm_ + gm(m); - if (shard_by_col_) { - for (Index n1 = n * gn_; n1 < nend; n1++) { - for (Index m1 = m * gm_; m1 < mend; m1++) - GebpKernel()(output_.getSubMapper(m1 * bm_, n1 * bn_), - packed_lhs_[k % (P - 1)][m1], - packed_rhs_[k % (P - 1)][n1], bm(m1), bk(k), bn(n1), - Scalar(1), -1, -1, 0, 0); - } - } else { - for (Index m1 = m * gm_; m1 < mend; m1++) - for (Index n1 = n * gn_; n1 < nend; n1++) { - GebpKernel()(output_.getSubMapper(m1 * bm_, n1 * bn_), - packed_lhs_[k % (P - 1)][m1], - packed_rhs_[k % (P - 1)][n1], bm(m1), bk(k), bn(n1), - Scalar(1), -1, -1, 0, 0); - } - } - signal_kernel(m, n, k + 1, false); - signal_switch(k + 2); - } - - void signal_packing(Index k) { - eigen_assert(!parallel_pack_); - Index s = state_packing_ready_[k % P].fetch_sub(1); - eigen_assert(s > 0); - if (s != 1) return; - state_packing_ready_[k % P] = shard_by_col_ ? nm_ : nn_; - enqueue_packing(k, shard_by_col_); - } - - void signal_kernel(Index m, Index n, Index k, bool sync) { - std::atomic<uint8_t>* state = &state_kernel_[k % P][m][n]; - Index s = state->load(); - eigen_assert(s > 0); - if (s != 1 && state->fetch_sub(1) != 1) return; - state->store(parallel_pack_ ? 3 : 2, std::memory_order_relaxed); - if (sync) - kernel(m, n, k); - else - device_.enqueueNoNotification([=]() { kernel(m, n, k); }); - } - - void signal_switch(Index k, Index v = 1) { - Index s = state_switch_[k % P].fetch_sub(v); - eigen_assert(s >= v); - if (s != v) return; - - // Ready to switch to the next k slice. - // Reset counter for the next iteration. - state_switch_[k % P] = - (parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)) + - nm_ * nn_; - if (k < nk_) { - // Issue lhs/rhs packing. Their completion will in turn kick off - // kernels. - if (parallel_pack_) { - enqueue_packing(k, !shard_by_col_); - enqueue_packing(k, shard_by_col_); - } else if (shard_by_col_) { - enqueue_packing(k, false); - } else { - enqueue_packing(k, true); - } - - // Termination handling. - // Because kernel completion signals k + 2 switch, we need to finish nk - // + 2 slices without issuing any tasks on nk + 1 slice. So here we - // pretend that all nk + 1 packing tasks just finish instantly; so that - // nk + 2 switch only waits for completion of nk kernels. - } else if (k == nk_) { - signal_switch(k + 1, - parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)); - } else { - done_.Notify(); - } - } - - // Enqueue all rhs/lhs packing for k-th slice. - void enqueue_packing(Index k, bool rhs) { - enqueue_packing_helper(0, rhs ? nn_ : nm_, k, rhs); - } - - void enqueue_packing_helper(Index start, Index end, Index k, bool rhs) { - if (end - start == 1) { - if (rhs) - pack_rhs(start, k); - else - pack_lhs(start, k); - } else { - Index mid = (start + end) / 2; - device_.enqueueNoNotification( - [=]() { enqueue_packing_helper(mid, end, k, rhs); }); - device_.enqueueNoNotification( - [=]() { enqueue_packing_helper(start, mid, k, rhs); }); - } - } - - // Block sizes with accounting for potentially incomplete last block. - Index bm(Index m) const { return m + 1 < nm0_ ? bm_ : m_ + bm_ - bm_ * nm0_; } - Index bn(Index n) const { return n + 1 < nn0_ ? bn_ : n_ + bn_ - bn_ * nn0_; } - Index bk(Index k) const { return k + 1 < nk_ ? bk_ : k_ + bk_ - bk_ * nk_; } - // Task grain sizes accounting for potentially incomplete last task. - Index gm(Index m) const { return m + 1 < nm_ ? gm_ : nm0_ + gm_ - gm_ * nm_; } - Index gn(Index n) const { return n + 1 < nn_ ? gn_ : nn0_ + gn_ - gn_ * nn_; } - - Context(const Context&) = delete; - void operator=(const Context&) = delete; - }; - - // Decide whether we want to shard m x n contraction by columns or by rows. - static bool shardByCol(Index m, Index n, Index num_threads) { - // Note: we are comparing both n and m against Traits::nr, it is not - // a mistake. We are trying to figure out how both n and m will fit into - // the main sharding dimension. - - // Sharding by column is the default - // ... unless there is enough data for vectorization over rows - if (m / num_threads >= Traits::nr && - // and not enough data for vectorization over columns - (n / num_threads < Traits::nr || - // ... or barely enough data for vectorization over columns, - // but it is not evenly dividable across threads - (n / num_threads < 4 * Traits::nr && - (n % (num_threads * Traits::nr)) != 0 && - // ... and it is evenly dividable across threads for rows - ((m % (num_threads * Traits::nr)) == 0 || - // .. or it is not evenly dividable for both dimensions but - // there is much more data over rows so that corner effects are - // mitigated. - (m / n >= 6))))) - return false; - // Wait, or if matrices are just substantially prolonged over the other - // dimension. - if (n / num_threads < 16 * Traits::nr && m > n * 32) return false; - return true; - } - - Index coarsenM(Index m, Index n, Index bm, Index bn, Index bk, Index gn, - int num_threads, bool shard_by_col) const { - Index gm = 1; - Index gm1 = 1; - Index nm0 = divup(m, bm); - Index nm1 = nm0; - for (;;) { - // Find the next candidate for m grain size. It needs to result in - // different number of blocks. E.g. if we have 10 kernels, we want to try - // 5 and 10, but not 6, 7, 8 and 9. - while (gm1 <= nm0 && nm1 == divup(nm0, gm1)) gm1++; - if (gm1 > nm0) break; - // Check the candidate. - int res = checkGrain(m, n, bm, bn, bk, gm1, gn, gm, gn, num_threads, - shard_by_col); - if (res < 0) break; - nm1 = divup(nm0, gm1); - if (res == 0) continue; - // Commit new grain size. - gm = gm1; - } - return gm; - } - - Index coarsenN(Index m, Index n, Index bm, Index bn, Index bk, Index gm, - int num_threads, bool shard_by_col) const { - Index gn = 1; - Index gn1 = 1; - Index nn0 = divup(n, bn); - Index nn1 = nn0; - for (;;) { - while (gn1 <= nn0 && nn1 == divup(nn0, gn1)) gn1++; - if (gn1 > nn0) break; - int res = checkGrain(m, n, bm, bn, bk, gm, gn1, gm, gn, num_threads, - shard_by_col); - if (res < 0) break; - nn1 = divup(nn0, gn1); - if (res == 0) continue; - gn = gn1; - } - return gn; - } - - // checkGrain checks whether grain (gm, gn) is suitable and is better than - // (oldgm, oldgn). - int checkGrain(Index m, Index n, Index bm, Index bn, Index bk, Index gm, - Index gn, Index oldgm, Index oldgn, int num_threads, - bool shard_by_col) const { - const TensorOpCost cost = - contractionCost(bm * gm, bn * gn, bm, bn, bk, shard_by_col, true); - double taskSize = TensorCostModel<ThreadPoolDevice>::taskSize( - static_cast<double>(bm) * gm * bn * gn, cost); - // If the task is too small, then we agree on it regardless of anything - // else. Otherwise synchronization overheads will dominate. - if (taskSize < 1) return 1; - // If it is too large, then we reject it and all larger tasks. - if (taskSize > 2) return -1; - // Now we are in presumably good task size range. - // The main deciding factor here is parallelism. Consider that we have 12 - // kernels and 4 threads. Grains of 2, 3 and 4 all yield good task sizes. - // But 2/4 yield 6/3 tasks, which gives us parallelism of 0.75 (at most 3/4 - // of cores will be busy). While grain size 3 gives us 4 tasks, which gives - // us parallelism of 1 (we can load all cores). - Index nm0 = divup(m, bm); - Index nn0 = divup(n, bn); - Index new_tasks = divup(nm0, gm) * divup(nn0, gn); - double new_parallelism = static_cast<double>(new_tasks) / - (divup<int>(new_tasks, num_threads) * num_threads); - Index old_tasks = divup(nm0, oldgm) * divup(nn0, oldgn); - double old_parallelism = static_cast<double>(old_tasks) / - (divup<int>(old_tasks, num_threads) * num_threads); - if (new_parallelism > old_parallelism || new_parallelism == 1) return 1; - return 0; - } - -#else // EIGEN_USE_SIMPLE_THREAD_POOL - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalProduct(Scalar* buffer) const { - if (this->m_j_size == 1) { - this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); - return; - } - - evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); - } - - template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> - void evalGemm(Scalar* buffer) const { - // columns in left side, rows in right side - const Index k = this->m_k_size; - - // rows in left side - const Index m = this->m_i_size; - - // columns in right side - const Index n = this->m_j_size; - - // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) - this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); - - - const int lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size; - const int rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size; - - typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs, - LeftEvaluator, left_nocontract_t, - contract_t, lhs_packet_size, - lhs_inner_dim_contiguous, - false, Unaligned> LhsMapper; - - typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs, - RightEvaluator, right_nocontract_t, - contract_t, rhs_packet_size, - rhs_inner_dim_contiguous, - rhs_inner_dim_reordered, Unaligned> RhsMapper; - - typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper; - - // TODO: packing could be faster sometimes if we supported row major tensor mappers - typedef internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, Traits::mr, - Traits::LhsProgress, ColMajor> LhsPacker; - typedef internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> RhsPacker; - - // TODO: replace false, false with conjugate values? - typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper, - Traits::mr, Traits::nr, false, false> GebpKernel; - - typedef internal::packLhsArg<LhsScalar, LhsMapper, Index> packLArg; - typedef internal::packRhsAndKernelArg<LhsScalar, RhsScalar, RhsMapper, OutputMapper, Index> packRKArg; - - // initialize data mappers - LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, - this->m_left_contracting_strides, this->m_k_strides); - - RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, - this->m_right_contracting_strides, this->m_k_strides); - - OutputMapper output(buffer, m); - - // compute block sizes (which depend on number of threads) - const Index num_threads = this->m_device.numThreads(); - internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, num_threads); - Index mc = blocking.mc(); - Index nc = blocking.nc(); - Index kc = blocking.kc(); - eigen_assert(mc <= m); - eigen_assert(nc <= n); - eigen_assert(kc <= k); - -#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) - const Index k_blocks = CEIL_DIV(k, kc); - const Index n_blocks = CEIL_DIV(n, nc); - const Index m_blocks = CEIL_DIV(m, mc); - const Index sizeA = mc * kc; - const Index sizeB = kc * nc; - - /* cout << "m: " << m << " n: " << n << " k: " << k << endl; - cout << "mc: " << mc << " nc: " << nc << " kc: " << kc << endl; - cout << "m_blocks: " << m_blocks << " n_blocks: " << n_blocks << " k_blocks: " << k_blocks << endl; - cout << "num threads: " << num_threads << endl; - */ - - // note: m_device.allocate should return 16 byte aligned pointers, but if blockA and blockB - // aren't 16 byte aligned segfaults will happen due to SIMD instructions - // note: You can get away with allocating just a single blockA and offsets and meet the - // the alignment requirements with the assumption that - // (Traits::mr * sizeof(ResScalar)) % 16 == 0 - const Index numBlockAs = numext::mini(num_threads, m_blocks); - MaxSizeVector<LhsScalar *> blockAs(num_threads); - for (int i = 0; i < num_threads; i++) { - blockAs.push_back(static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar)))); - } - - // To circumvent alignment issues, I'm just going to separately allocate the memory for each thread - // TODO: is this too much memory to allocate? This simplifies coding a lot, but is wasteful. - // Other options: (1) reuse memory when a thread finishes. con: tricky - // (2) allocate block B memory in each thread. con: overhead - MaxSizeVector<RhsScalar *> blockBs(n_blocks); - for (int i = 0; i < n_blocks; i++) { - blockBs.push_back(static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar)))); - } - - // lhs_notifications starts with all null Notifications - MaxSizeVector<Notification*> lhs_notifications(num_threads, nullptr); - - // this should really be numBlockAs * n_blocks; - const Index num_kernel_notifications = num_threads * n_blocks; - MaxSizeVector<Notification*> kernel_notifications(num_kernel_notifications, - nullptr); - - for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) { - const Index k_start = k_block_idx * kc; - // make sure we don't overshoot right edge of left matrix - const Index actual_kc = numext::mini(k_start + kc, k) - k_start; - - for (Index m_block_idx = 0; m_block_idx < m_blocks; m_block_idx += numBlockAs) { - const Index num_blocks = numext::mini(m_blocks-m_block_idx, numBlockAs); - - for (Index mt_block_idx = m_block_idx; mt_block_idx < m_block_idx+num_blocks; mt_block_idx++) { - const Index m_start = mt_block_idx * mc; - const Index actual_mc = numext::mini(m_start + mc, m) - m_start; - eigen_assert(actual_mc > 0); - - Index blockAId = (k_block_idx * m_blocks + mt_block_idx) % num_threads; - - for (int i = 0; i < n_blocks; ++i) { - Index notification_id = (blockAId * n_blocks + i); - // Wait for any current kernels using this slot to complete - // before using it. - if (kernel_notifications[notification_id]) { - wait_until_ready(kernel_notifications[notification_id]); - delete kernel_notifications[notification_id]; - } - kernel_notifications[notification_id] = new Notification(); - } - const packLArg arg = { - blockAs[blockAId], // blockA - lhs, // lhs - m_start, // m - k_start, // k - actual_mc, // mc - actual_kc, // kc - }; - - // Delete any existing notification since we may be - // replacing it. The algorithm should ensure that there are - // no existing waiters on this notification. - delete lhs_notifications[blockAId]; - lhs_notifications[blockAId] = - this->m_device.enqueue(&Self::packLhs<packLArg, LhsPacker>, arg); - } - - // now start kernels. - const Index m_base_start = m_block_idx * mc; - const bool need_to_pack = m_block_idx == 0; - - for (Index n_block_idx = 0; n_block_idx < n_blocks; n_block_idx++) { - const Index n_start = n_block_idx * nc; - const Index actual_nc = numext::mini(n_start + nc, n) - n_start; - - // first make sure the previous kernels are all done before overwriting rhs. Also wait if - // we're going to start new k. In both cases need_to_pack is true. - if (need_to_pack) { - for (Index i = num_blocks; i < num_threads; ++i) { - Index blockAId = (k_block_idx * m_blocks + i + m_block_idx) % num_threads; - Index future_id = (blockAId * n_blocks + n_block_idx); - wait_until_ready(kernel_notifications[future_id]); - } - } - - packRKArg arg = { - &blockAs, // blockA - blockBs[n_block_idx], // blockB - rhs, // rhs - output, // output - m_base_start, // m - k_start, // k - n_start, // n - mc, // mc - actual_kc, // kc - actual_nc, // nc - num_threads, - numBlockAs, - m, - k_block_idx, - m_block_idx, - n_block_idx, // n_block_idx - m_blocks, // m_blocks - n_blocks, // n_blocks - &kernel_notifications, // kernel notifications - &lhs_notifications, // lhs notifications - need_to_pack, // need_to_pack - }; - - // We asynchronously kick off this function, which ends up - // notifying the appropriate kernel_notifications objects, - // which this thread waits on before exiting. - this->m_device.enqueueNoNotification(&Self::packRhsAndKernel<packRKArg, RhsPacker, GebpKernel>, arg); - } - } - } - - // Make sure all the kernels are done. - for (size_t i = 0; i < kernel_notifications.size(); ++i) { - wait_until_ready(kernel_notifications[i]); - delete kernel_notifications[i]; - } - - // No need to wait for lhs notifications since they should have - // already been waited on. Just clean them up. - for (size_t i = 0; i < lhs_notifications.size(); ++i) { - delete lhs_notifications[i]; - } - - // deallocate all of the memory for both A and B's - for (size_t i = 0; i < blockAs.size(); i++) { - this->m_device.deallocate(blockAs[i]); - } - for (size_t i = 0; i < blockBs.size(); i++) { - this->m_device.deallocate(blockBs[i]); - } - -#undef CEIL_DIV - } - - /* - * Packs a LHS block of size (mt, kc) starting at lhs(m, k). Before packing - * the LHS block, check that all of the kernels that worked on the same - * mt_block_idx in the previous m_block are done. - */ - template <typename packLArg, typename LhsPacker> - static void packLhs(const packLArg arg) { - // perform actual packing - LhsPacker pack_lhs; - pack_lhs(arg.blockA, arg.lhs.getSubMapper(arg.m_start, arg.k_start), arg.kc, arg.mc); - } - - /* - * Packs a RHS block of size (kc, nc) starting at (k, n) after checking that - * all kernels in the previous block are done. - * Then for each LHS future, we wait on the future and then call GEBP - * on the area packed by the future (which starts at - * blockA + future_idx * mt * kc) on the LHS and with the full packed - * RHS block. - * The output of this GEBP is written to output(m + i * mt, n). - */ - template <typename packRKArg, typename RhsPacker, typename GebpKernel> - static void packRhsAndKernel(packRKArg arg) { - if (arg.need_to_pack) { - RhsPacker pack_rhs; - pack_rhs(arg.blockB, arg.rhs.getSubMapper(arg.k, arg.n), arg.kc, arg.nc); - } - - GebpKernel gebp; - for (Index mt_block_idx = 0; mt_block_idx < arg.num_blockAs; mt_block_idx++) { - const Index m_base_start = arg.m + arg.mc*mt_block_idx; - if (m_base_start < arg.max_m) { - Index blockAId = (arg.k_block_idx * arg.m_blocks + mt_block_idx + arg.m_block_idx) % arg.num_threads; - wait_until_ready((*arg.lhs_notifications)[blockAId]); - const Index actual_mc = numext::mini(m_base_start + arg.mc, arg.max_m) - m_base_start; - gebp(arg.output.getSubMapper(m_base_start, arg.n), - (*arg.blockAs)[blockAId], arg.blockB, - actual_mc, arg.kc, arg.nc, Scalar(1), -1, -1, 0, 0); - - // Notify that the kernel is done. - const Index set_idx = blockAId * arg.n_blocks + arg.n_block_idx; - (*arg.kernel_notifications)[set_idx]->Notify(); - } - } - } -#endif // EIGEN_USE_SIMPLE_THREAD_POOL - - TensorOpCost contractionCost(Index m, Index n, Index bm, Index bn, Index bk, - bool shard_by_col, bool prepacked) const { - const int packed_size = std::min<int>(PacketType<LhsScalar, Device>::size, - PacketType<RhsScalar, Device>::size); - const int output_packet_size = internal::unpacket_traits<PacketReturnType>::size; - const double kd = static_cast<double>(bk); - // Peak VFMA bandwidth is 0.5. However if we have not enough data for - // vectorization bandwidth drops. The 4.0 and 2.0 bandwidth is determined - // experimentally. - double computeBandwidth = bk == 1 ? 4.0 : - (shard_by_col ? bn : bm) < Traits::nr || - (shard_by_col ? bm : bn) < Traits::mr ? 2.0 : 0.5; -#ifndef EIGEN_VECTORIZE_FMA - // Bandwidth of all of VFMA/MULPS/ADDPS is 0.5 on latest Intel processors. - // However for MULPS/ADDPS we have dependent sequence of 2 such instructions, - // so overall bandwidth is 1.0. - if (computeBandwidth == 0.5) computeBandwidth = 1.0; -#endif - // Computations. - TensorOpCost cost = TensorOpCost(0, 0, kd * computeBandwidth, true, packed_size); - // Output stores. - cost += TensorOpCost(0, sizeof(CoeffReturnType), 0, true, output_packet_size); - if (prepacked) { - // Packing and kernels are executed in different tasks. When we calculate - // task grain size we look only at kernel cost assuming that kernel - // is more expensive than packing. - return cost; - } - // Lhs/rhs loads + computations. - TensorOpCost lhsCost = this->m_leftImpl.costPerCoeff(true) * (kd / n); - TensorOpCost rhsCost = this->m_rightImpl.costPerCoeff(true) * (kd / m); - // Lhs packing memory cost does not contribute considerably to overall - // execution time because lhs is prefetched early and accessed sequentially. - if (shard_by_col) - lhsCost.dropMemoryCost(); - else - rhsCost.dropMemoryCost(); - return cost + lhsCost + rhsCost; - } -}; - -} // end namespace Eigen - -#endif // EIGEN_USE_THREADS -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h deleted file mode 100644 index 860a694..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ /dev/null @@ -1,279 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H - -namespace Eigen { - -/** \class TensorConversionOp - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor conversion class. This class makes it possible to vectorize - * type casting operations when the number of scalars per packet in the source - * and the destination type differ - */ -namespace internal { -template<typename TargetType, typename XprType> -struct traits<TensorConversionOp<TargetType, XprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef TargetType Scalar; - typedef typename traits<XprType>::StorageKind StorageKind; - typedef typename traits<XprType>::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = traits<XprType>::NumDimensions; - static const int Layout = traits<XprType>::Layout; - enum { Flags = 0 }; -}; - -template<typename TargetType, typename XprType> -struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense> -{ - typedef const TensorConversionOp<TargetType, XprType>& type; -}; - -template<typename TargetType, typename XprType> -struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type> -{ - typedef TensorConversionOp<TargetType, XprType> type; -}; - -} // end namespace internal - - -template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio> -struct PacketConverter { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketConverter(const TensorEvaluator& impl) - : m_impl(impl) {} - - template<int LoadMode, typename Index> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { - return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index)); - } - - private: - const TensorEvaluator& m_impl; -}; - - -template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> -struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketConverter(const TensorEvaluator& impl) - : m_impl(impl) {} - - template<int LoadMode, typename Index> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { - const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; - - SrcPacket src1 = m_impl.template packet<LoadMode>(index); - SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize); - TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2); - return result; - } - - private: - const TensorEvaluator& m_impl; -}; - -template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> -struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketConverter(const TensorEvaluator& impl) - : m_impl(impl) {} - - template<int LoadMode, typename Index> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { - const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; - - SrcPacket src1 = m_impl.template packet<LoadMode>(index); - SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize); - SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize); - SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize); - TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4); - return result; - } - - private: - const TensorEvaluator& m_impl; -}; - -template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> -struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketConverter(const TensorEvaluator& impl) - : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} - - template<int LoadMode, typename Index> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { - const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; - // Only call m_impl.packet() when we have direct access to the underlying data. This - // ensures that we don't compute the subexpression twice. We may however load some - // coefficients twice, but in practice this doesn't negatively impact performance. - if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) { - // Force unaligned memory loads since we can't ensure alignment anymore - return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index)); - } else { - const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size; - typedef typename internal::unpacket_traits<SrcPacket>::type SrcType; - typedef typename internal::unpacket_traits<TgtPacket>::type TgtType; - internal::scalar_cast_op<SrcType, TgtType> converter; - EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize]; - for (int i = 0; i < TgtPacketSize; ++i) { - values[i] = converter(m_impl.coeff(index+i)); - } - TgtPacket rslt = internal::pload<TgtPacket>(values); - return rslt; - } - } - - private: - const TensorEvaluator& m_impl; - const typename TensorEvaluator::Index m_maxIndex; -}; - -template<typename TargetType, typename XprType> -class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename internal::traits<TensorConversionOp>::Scalar Scalar; - typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind; - typedef typename internal::traits<TensorConversionOp>::Index Index; - typedef typename internal::nested<TensorConversionOp>::type Nested; - typedef Scalar CoeffReturnType; - typedef typename NumTraits<Scalar>::Real RealScalar; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) - : m_xpr(xpr) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; -}; - -template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) { - impl.evalSubExprsIfNeeded(NULL); - return true; - } -}; - -template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) { - return impl.evalSubExprsIfNeeded(data); - } -}; - - -// Eval as rvalue -template<typename TargetType, typename ArgType, typename Device> -struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> -{ - typedef TensorConversionOp<TargetType, ArgType> XprType; - typedef typename XprType::Index Index; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - typedef TargetType Scalar; - typedef TargetType CoeffReturnType; - typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename PacketType<SrcType, Device>::type PacketSourceType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = true, - Layout = TensorEvaluator<ArgType, Device>::Layout, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) - { - return ConversionSubExprEval<internal::is_same<TargetType, SrcType>::value, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() - { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - internal::scalar_cast_op<SrcType, TargetType> converter; - return converter(m_impl.coeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess & - internal::type_casting_traits<SrcType, TargetType>::VectorizedCast; - return PacketConv<LoadMode, Vectorizable>::run(m_impl, index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>(); - if (vectorized) { - const double SrcCoeffRatio = - internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; - const double TgtCoeffRatio = - internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; - return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) + - TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize)); - } else { - return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost); - } - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - template <int LoadMode, bool ActuallyVectorize> - struct PacketConv { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { - internal::scalar_cast_op<SrcType, TargetType> converter; - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - for (int i = 0; i < PacketSize; ++i) { - values[i] = converter(impl.coeff(index+i)); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - }; - - template <int LoadMode> - struct PacketConv<LoadMode, true> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { - const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; - const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; - PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType, - SrcCoeffRatio, TgtCoeffRatio> converter(impl); - return converter.template packet<LoadMode>(index); - } - }; - - TensorEvaluator<ArgType, Device> m_impl; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h deleted file mode 100644 index abdf742..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ /dev/null @@ -1,1104 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H - -namespace Eigen { - -/** \class TensorConvolution - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor convolution class. - * - * - */ -namespace internal { - -template <typename Index, typename InputDims, int NumKernelDims, int Layout> -class IndexMapper { - public: - IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims, - const array<Index, NumKernelDims>& indices) { - - array<Index, NumDims> dimensions = input_dims; - for (int i = 0; i < NumKernelDims; ++i) { - const Index index = indices[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - dimensions[index] = result_dim; - } - - array<Index, NumDims> inputStrides; - array<Index, NumDims> outputStrides; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - inputStrides[0] = 1; - outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - inputStrides[i] = inputStrides[i-1] * input_dims[i-1]; - outputStrides[i] = outputStrides[i-1] * dimensions[i-1]; - } - } else { - inputStrides[NumDims - 1] = 1; - outputStrides[NumDims - 1] = 1; - for (int i = static_cast<int>(NumDims) - 2; i >= 0; --i) { - inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1]; - outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1]; - } - } - - array<Index, NumDims> cudaInputDimensions; - array<Index, NumDims> cudaOutputDimensions; - array<Index, NumDims> tmp = dimensions; - array<Index, NumDims> ordering; - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - for (int i = 0; i < NumKernelDims; ++i) { - const Index index = i + offset; - ordering[index] = indices[i]; - tmp[indices[i]] = -1; - cudaInputDimensions[index] = input_dims[indices[i]]; - cudaOutputDimensions[index] = dimensions[indices[i]]; - } - - int written = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? NumKernelDims - : 0; - for (int i = 0; i < NumDims; ++i) { - if (tmp[i] >= 0) { - ordering[written] = i; - cudaInputDimensions[written] = input_dims[i]; - cudaOutputDimensions[written] = dimensions[i]; - ++written; - } - } - - for (int i = 0; i < NumDims; ++i) { - m_inputStrides[i] = inputStrides[ordering[i]]; - m_outputStrides[i] = outputStrides[ordering[i]]; - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumDims; ++i) { - if (i > NumKernelDims) { - m_cudaInputStrides[i] = - m_cudaInputStrides[i - 1] * cudaInputDimensions[i - 1]; - m_cudaOutputStrides[i] = - m_cudaOutputStrides[i - 1] * cudaOutputDimensions[i - 1]; - } else { - m_cudaInputStrides[i] = 1; - m_cudaOutputStrides[i] = 1; - } - } - } else { - for (int i = NumDims - 1; i >= 0; --i) { - if (i + 1 < offset) { - m_cudaInputStrides[i] = - m_cudaInputStrides[i + 1] * cudaInputDimensions[i + 1]; - m_cudaOutputStrides[i] = - m_cudaOutputStrides[i + 1] * cudaOutputDimensions[i + 1]; - } else { - m_cudaInputStrides[i] = 1; - m_cudaOutputStrides[i] = 1; - } - } - } - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputPlaneToTensorInputOffset(Index p) const { - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int d = NumDims - 1; d > NumKernelDims; --d) { - const Index idx = p / m_cudaInputStrides[d]; - inputIndex += idx * m_inputStrides[d]; - p -= idx * m_cudaInputStrides[d]; - } - inputIndex += p * m_inputStrides[NumKernelDims]; - } else { - std::ptrdiff_t limit = 0; - if (NumKernelDims < NumDims) { - limit = NumDims - NumKernelDims - 1; - } - for (int d = 0; d < limit; ++d) { - const Index idx = p / m_cudaInputStrides[d]; - inputIndex += idx * m_inputStrides[d]; - p -= idx * m_cudaInputStrides[d]; - } - inputIndex += p * m_inputStrides[limit]; - } - return inputIndex; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputPlaneToTensorOutputOffset(Index p) const { - Index outputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int d = NumDims - 1; d > NumKernelDims; --d) { - const Index idx = p / m_cudaOutputStrides[d]; - outputIndex += idx * m_outputStrides[d]; - p -= idx * m_cudaOutputStrides[d]; - } - outputIndex += p * m_outputStrides[NumKernelDims]; - } else { - std::ptrdiff_t limit = 0; - if (NumKernelDims < NumDims) { - limit = NumDims - NumKernelDims - 1; - } - for (int d = 0; d < limit; ++d) { - const Index idx = p / m_cudaOutputStrides[d]; - outputIndex += idx * m_outputStrides[d]; - p -= idx * m_cudaOutputStrides[d]; - } - outputIndex += p * m_outputStrides[limit]; - } - return outputIndex; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_inputStrides[offset]; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_outputStrides[offset]; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1]; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1]; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j, Index k) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] + - k * m_inputStrides[offset + 2]; - } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const { - const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : NumDims - NumKernelDims; - return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] + - k * m_outputStrides[offset + 2]; - } - - private: - static const int NumDims = internal::array_size<InputDims>::value; - array<Index, NumDims> m_inputStrides; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_cudaInputStrides; - array<Index, NumDims> m_cudaOutputStrides; -}; - - - -template<typename Dimensions, typename InputXprType, typename KernelXprType> -struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename promote_storage_type<typename InputXprType::Scalar, - typename KernelXprType::Scalar>::ret Scalar; - typedef typename promote_storage_type<typename traits<InputXprType>::StorageKind, - typename traits<KernelXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<InputXprType>::Index, - typename traits<KernelXprType>::Index>::type Index; - typedef typename InputXprType::Nested LhsNested; - typedef typename KernelXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - static const int NumDimensions = traits<InputXprType>::NumDimensions; - static const int Layout = traits<InputXprType>::Layout; - - enum { - Flags = 0 - }; -}; - -template<typename Dimensions, typename InputXprType, typename KernelXprType> -struct eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, Eigen::Dense> -{ - typedef const TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>& type; -}; - -template<typename Dimensions, typename InputXprType, typename KernelXprType> -struct nested<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, 1, typename eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >::type> -{ - typedef TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> type; -}; - -} // end namespace internal - - - -template<typename Indices, typename InputXprType, typename KernelXprType> -class TensorConvolutionOp : public TensorBase<TensorConvolutionOp<Indices, InputXprType, KernelXprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorConvolutionOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::promote_storage_type<typename InputXprType::CoeffReturnType, - typename KernelXprType::CoeffReturnType>::ret CoeffReturnType; - typedef typename Eigen::internal::nested<TensorConvolutionOp>::type Nested; - typedef typename Eigen::internal::traits<TensorConvolutionOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorConvolutionOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims) - : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Indices& indices() const { return m_indices; } - - /** \returns the nested expressions */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const typename internal::remove_all<typename InputXprType::Nested>::type& - inputExpression() const { return m_input_xpr; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const typename internal::remove_all<typename KernelXprType::Nested>::type& - kernelExpression() const { return m_kernel_xpr; } - - protected: - typename InputXprType::Nested m_input_xpr; - typename KernelXprType::Nested m_kernel_xpr; - const Indices m_indices; -}; - - -template<typename Indices, typename InputArgType, typename KernelArgType, typename Device> -struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Device> -{ - typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType; - - static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value; - static const int NumKernelDims = internal::array_size<Indices>::value; - typedef typename XprType::Index Index; - typedef DSizes<Index, NumDims> Dimensions; - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess, - Layout = TensorEvaluator<InputArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - - const typename TensorEvaluator<InputArgType, Device>::Dimensions& input_dims = m_inputImpl.dimensions(); - const typename TensorEvaluator<KernelArgType, Device>::Dimensions& kernel_dims = m_kernelImpl.dimensions(); - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputStride[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1]; - } - } else { - m_inputStride[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1]; - } - } - - m_dimensions = m_inputImpl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumKernelDims; ++i) { - const Index index = op.indices()[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - m_dimensions[index] = result_dim; - if (i > 0) { - m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1]; - } else { - m_kernelStride[0] = 1; - } - m_indexStride[i] = m_inputStride[index]; - } - - m_outputStride[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1]; - } - } else { - for (int i = NumKernelDims - 1; i >= 0; --i) { - const Index index = op.indices()[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - m_dimensions[index] = result_dim; - if (i < NumKernelDims - 1) { - m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1]; - } else { - m_kernelStride[NumKernelDims - 1] = 1; - } - m_indexStride[i] = m_inputStride[index]; - } - - m_outputStride[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - m_inputImpl.evalSubExprsIfNeeded(NULL); - preloadKernel(); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_inputImpl.cleanup(); - if (m_local_kernel) { - m_device.deallocate((void*)m_kernel); - m_local_kernel = false; - } - m_kernel = NULL; - } - - void evalTo(typename XprType::Scalar* buffer) { - evalSubExprsIfNeeded(NULL); - for (int i = 0; i < dimensions().TotalSize(); ++i) { - buffer[i] += coeff(i); - } - cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - CoeffReturnType result = CoeffReturnType(0); - convolve(firstInput(index), 0, NumKernelDims-1, result); - return result; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const - { - Index indices[2] = {index, index+PacketSize-1}; - Index startInputs[2] = {0, 0}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / m_outputStride[i]; - const Index idx1 = indices[1] / m_outputStride[i]; - startInputs[0] += idx0 * m_inputStride[i]; - startInputs[1] += idx1 * m_inputStride[i]; - indices[0] -= idx0 * m_outputStride[i]; - indices[1] -= idx1 * m_outputStride[i]; - } - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx0 = indices[0] / m_outputStride[i]; - const Index idx1 = indices[1] / m_outputStride[i]; - startInputs[0] += idx0 * m_inputStride[i]; - startInputs[1] += idx1 * m_inputStride[i]; - indices[0] -= idx0 * m_outputStride[i]; - indices[1] -= idx1 * m_outputStride[i]; - } - } - startInputs[0] += indices[0]; - startInputs[1] += indices[1]; - - if (startInputs[1]-startInputs[0] == PacketSize-1) { - PacketReturnType result = internal::pset1<PacketReturnType>(0); - convolvePacket(startInputs[0], 0, NumKernelDims-1, result); - return result; - } else { - EIGEN_ALIGN_MAX Scalar data[PacketSize]; - data[0] = Scalar(0); - convolve(startInputs[0], 0, NumKernelDims-1, data[0]); - for (int i = 1; i < PacketSize-1; ++i) { - data[i] = Scalar(0); - convolve(firstInput(index+i), 0, NumKernelDims-1, data[i]); - } - data[PacketSize-1] = Scalar(0); - convolve(startInputs[1], 0, NumKernelDims-1, data[PacketSize-1]); - return internal::pload<PacketReturnType>(data); - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - const double kernel_size = m_kernelImpl.dimensions().TotalSize(); - // We ignore the use of fused multiply-add. - const double convolve_compute_cost = - TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>(); - const double firstIndex_compute_cost = - NumDims * - (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() + - TensorOpCost::DivCost<Index>()); - return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + - kernel_size * (m_inputImpl.costPerCoeff(vectorized) + - m_kernelImpl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, convolve_compute_cost, vectorized, - PacketSize)); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - private: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { - Index startInput = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStride[i]; - startInput += idx * m_inputStride[i]; - index -= idx * m_outputStride[i]; - } - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStride[i]; - startInput += idx * m_inputStride[i]; - index -= idx * m_outputStride[i]; - } - } - startInput += index; - return startInput; - } - - EIGEN_DEVICE_FUNC void convolve(Index firstIndex, Index firstKernel, int DimIndex, CoeffReturnType& accum) const { - for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) { - const Index input = firstIndex + j * m_indexStride[DimIndex]; - const Index kernel = firstKernel + j * m_kernelStride[DimIndex]; - if (DimIndex > 0) { - convolve(input, kernel, DimIndex-1, accum); - } else { - accum += m_inputImpl.coeff(input) * m_kernel[kernel]; - } - } - } - - template <typename Packet> - EIGEN_DEVICE_FUNC void convolvePacket(Index firstIndex, Index firstKernel, int DimIndex, Packet& accum) const { - for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) { - const Index input = firstIndex + j * m_indexStride[DimIndex]; - const Index kernel = firstKernel + j * m_kernelStride[DimIndex]; - if (DimIndex > 0) { - convolvePacket(input, kernel, DimIndex-1, accum); - } else { - accum = internal::pmadd<Packet>(m_inputImpl.template packet<Unaligned>(input), internal::pset1<Packet>(m_kernel[kernel]), accum); - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() { - // Don't make a local copy of the kernel unless we have to (i.e. it's an - // expression that needs to be evaluated) - const Scalar* in_place = m_kernelImpl.data(); - if (in_place) { - m_kernel = in_place; - m_local_kernel = false; - } else { - size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); - Scalar* local = (Scalar*)m_device.allocate(kernel_sz); - typedef TensorEvalToOp<const KernelArgType> EvalTo; - EvalTo evalToTmp(local, m_kernelArg); - const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value; - internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device); - - m_kernel = local; - m_local_kernel = true; - } - } - - array<Index, NumDims> m_inputStride; - array<Index, NumDims> m_outputStride; - - array<Index, NumKernelDims> m_indexStride; - array<Index, NumKernelDims> m_kernelStride; - TensorEvaluator<InputArgType, Device> m_inputImpl; - TensorEvaluator<KernelArgType, Device> m_kernelImpl; - Dimensions m_dimensions; - - KernelArgType m_kernelArg; - const Scalar* m_kernel; - bool m_local_kernel; - const Device& m_device; -}; - - - - -// Use an optimized implementation of the evaluation code for GPUs whenever possible. -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - -template <int StaticKernelSize> -struct GetKernelSize { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int /*kernelSize*/) const { - return StaticKernelSize; - } -}; -template <> -struct GetKernelSize<Dynamic> { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int kernelSize) const { - return kernelSize; - } -}; - -template <typename InputEvaluator, typename Index, typename InputDims, - int StaticKernelSize> -__global__ void EigenConvolutionKernel1D( - InputEvaluator eval, - const internal::IndexMapper<Index, InputDims, 1, InputEvaluator::Layout> - indexMapper, - const float* __restrict kernel, const int numPlanes, const int numX, - const int maxX, const int kernelSize, float* buffer) { - extern __shared__ float s[]; - - const int first_x = blockIdx.x * maxX; - const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; - const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSize>()(kernelSize); - const int num_x_output = last_x - first_x + 1; - - const int first_plane = blockIdx.y * blockDim.y; - const int plane_stride = blockDim.y * gridDim.y; - - for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) { - // Load inputs to shared memory - const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); - const int plane_kernel_offset = threadIdx.y * num_x_input; - #pragma unroll - for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { - const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x); - s[i + plane_kernel_offset] = eval.coeff(tensor_index); - } - - __syncthreads(); - - // Compute the convolution - const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); - - #pragma unroll - for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { - const int kernel_offset = plane_kernel_offset + i; - float result = 0.0f; - #pragma unroll - for (int k = 0; k < GetKernelSize<StaticKernelSize>()(kernelSize); ++k) { - result += s[k + kernel_offset] * kernel[k]; - } - const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x); - buffer[tensor_index] = result; - } - __syncthreads(); - } -}; - -template <typename InputEvaluator, typename Index, typename InputDims, - int StaticKernelSizeX, int StaticKernelSizeY> -__global__ void EigenConvolutionKernel2D( - InputEvaluator eval, - const internal::IndexMapper<Index, InputDims, 2, InputEvaluator::Layout> - indexMapper, - const float* __restrict kernel, const int numPlanes, const int numX, - const int maxX, const int numY, const int maxY, const int kernelSizeX, - const int kernelSizeY, float* buffer) { - extern __shared__ float s[]; - - const int first_x = blockIdx.x * maxX; - const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; - const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSizeX>()(kernelSizeX); - const int num_x_output = last_x - first_x + 1; - - const int first_y = blockIdx.y * maxY; - const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1; - const int num_y_input = last_y - first_y + GetKernelSize<StaticKernelSizeY>()(kernelSizeY); - const int num_y_output = last_y - first_y + 1; - - const int first_plane = blockIdx.z * blockDim.z; - const int plane_stride = blockDim.z * gridDim.z; - - for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) { - - const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); - const int plane_kernel_offset = threadIdx.z * num_y_input; - - // Load inputs to shared memory - #pragma unroll - for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { - const int input_offset = num_x_input * (j + plane_kernel_offset); - #pragma unroll - for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { - const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y); - s[i + input_offset] = eval.coeff(tensor_index); - } - } - - __syncthreads(); - - // Convolution - const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); - - #pragma unroll - for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { - #pragma unroll - for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { - float result = 0.0f; - #pragma unroll - for (int l = 0; l < GetKernelSize<StaticKernelSizeY>()(kernelSizeY); ++l) { - const int kernel_offset = kernelSizeX * l; - const int input_offset = i + num_x_input * (j + l + plane_kernel_offset); - #pragma unroll - for (int k = 0; k < GetKernelSize<StaticKernelSizeX>()(kernelSizeX); ++k) { - result += s[k + input_offset] * kernel[k + kernel_offset]; - } - } - const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y); - buffer[tensor_index] = result; - } - } - - __syncthreads(); - } -}; - -template <typename InputEvaluator, typename Index, typename InputDims> -__global__ void EigenConvolutionKernel3D( - InputEvaluator eval, - const internal::IndexMapper<Index, InputDims, 3, InputEvaluator::Layout> - indexMapper, - const float* __restrict kernel, const size_t numPlanes, const size_t numX, - const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ, - const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY, - const size_t kernelSizeZ, float* buffer) { - extern __shared__ float s[]; - - // Load inputs to shared memory - const int first_x = blockIdx.x * maxX; - const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; - const int num_x_input = last_x - first_x + kernelSizeX; - - const int first_y = blockIdx.y * maxY; - const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1; - const int num_y_input = last_y - first_y + kernelSizeY; - - const int first_z = blockIdx.z * maxZ; - const int last_z = (first_z + maxZ < numZ ? first_z + maxZ : numZ) - 1; - const int num_z_input = last_z - first_z + kernelSizeZ; - - for (int p = 0; p < numPlanes; ++p) { - - const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); - const int plane_kernel_offset = 0; - - for (int k = threadIdx.z; k < num_z_input; k += blockDim.z) { - for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { - for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { - const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y, k+first_z); - s[i + num_x_input * (j + num_y_input * (k + plane_kernel_offset))] = eval.coeff(tensor_index); - } - } - } - - __syncthreads(); - - // Convolution - const int num_z_output = last_z - first_z + 1; - const int num_y_output = last_y - first_y + 1; - const int num_x_output = last_x - first_x + 1; - const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); - - for (int k = threadIdx.z; k < num_z_output; k += blockDim.z) { - for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { - for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { - float result = 0.0f; - for (int n = 0; n < kernelSizeZ; ++n) { - for (int m = 0; m < kernelSizeY; ++m) { - for (int l = 0; l < kernelSizeX; ++l) { - result += s[i + l + num_x_input * (j + m + num_y_input * (k + n + plane_kernel_offset))] * kernel[l + kernelSizeX * (m + kernelSizeY * n)]; - } - } - } - const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y, k+first_z); - buffer[tensor_index] = result; - } - } - } - __syncthreads(); - } -}; - - - -template<typename Indices, typename InputArgType, typename KernelArgType> -struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, GpuDevice> -{ - typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType; - - static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value; - static const int NumKernelDims = internal::array_size<Indices>::value; - typedef typename XprType::Index Index; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions KernelDimensions; - - enum { - IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned, - PacketAccess = false, - Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device) - : m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - - const typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions& input_dims = m_inputImpl.dimensions(); - const typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions& kernel_dims = m_kernelImpl.dimensions(); - - m_dimensions = m_inputImpl.dimensions(); - for (int i = 0; i < NumKernelDims; ++i) { - const Index index = op.indices()[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - m_dimensions[index] = result_dim; - } - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType; - typedef typename InputArgType::Scalar Scalar; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { - preloadKernel(); - m_inputImpl.evalSubExprsIfNeeded(NULL); - if (data) { - executeEval(data); - return false; - } else { - m_buf = (Scalar*)m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)); - executeEval(m_buf); - return true; - } - } - - EIGEN_STRONG_INLINE void cleanup() { - m_inputImpl.cleanup(); - if (m_buf) { - m_device.deallocate(m_buf); - m_buf = NULL; - } - if (m_local_kernel) { - m_device.deallocate((void*)m_kernel); - m_local_kernel = false; - } - m_kernel = NULL; - } - - EIGEN_STRONG_INLINE void preloadKernel() { - // Don't make a local copy of the kernel unless we have to (i.e. it's an - // expression that needs to be evaluated) - const Scalar* in_place = m_kernelImpl.data(); - if (in_place) { - m_kernel = in_place; - m_local_kernel = false; - } else { - size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); - Scalar* local = (Scalar*)m_device.allocate(kernel_sz); - typedef TensorEvalToOp<const KernelArgType> EvalTo; - EvalTo evalToTmp(local, m_kernelArg); - const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value; - internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device); - - m_kernel = local; - m_local_kernel = true; - } - } - - static unsigned int ceil(unsigned int num, unsigned int denom) { - const unsigned int rounded_toward_zero = num / denom; - if (num > rounded_toward_zero * denom) { - return rounded_toward_zero + 1; - } - return rounded_toward_zero; - } - - void executeEval(Scalar* data) const { - typedef typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions InputDims; - - const int maxSharedMem = m_device.sharedMemPerBlock(); - const int maxThreadsPerBlock = m_device.maxCudaThreadsPerBlock(); - const int maxBlocksPerProcessor = m_device.maxCudaThreadsPerMultiProcessor() / maxThreadsPerBlock; - const int numMultiProcessors = m_device.getNumCudaMultiProcessors(); - const int warpSize = 32; - - switch (NumKernelDims) { - case 1: { - const int kernel_size = m_kernelImpl.dimensions().TotalSize(); - - const int numX = dimensions()[m_indices[0]]; - const int numP = dimensions().TotalSize() / numX; - int maxX; - dim3 block_size; - - const int single_stride_dim = - static_cast<int>(Layout) == static_cast<int>(ColMajor) - ? 0 - : m_inputImpl.dimensions().rank() - 1; - if (m_indices[0] == single_stride_dim) { - // Maximum the reuse - const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32; - maxX = numext::mini<int>(inner_dim, numX); - const int maxP = numext::mini<int>(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP); - block_size.x = numext::mini(maxThreadsPerBlock, maxX); - block_size.y = numext::mini<int>(maxThreadsPerBlock / block_size.x, maxP); - } - else { - // Read as much as possible alongside the inner most dimension, that is the plane - const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar)); - const int maxP = numext::mini<int>(inner_dim, numP); - maxX = numext::mini<int>(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX); - - block_size.x = numext::mini(warpSize, maxX); - block_size.y = numext::mini<int>(maxThreadsPerBlock/block_size.x, maxP); - } - - const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar); - assert(shared_mem <= maxSharedMem); - - const int num_x_blocks = ceil(numX, maxX); - const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); - const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks); - - dim3 num_blocks(num_x_blocks, numext::mini<int>(num_y_blocks, ceil(numP, block_size.y))); - - - //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; - - const array<Index, 1> indices(m_indices[0]); - const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]); - internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper( - m_inputImpl.dimensions(), kernel_dims, indices); - switch(kernel_size) { - case 4: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data); - break; - } - case 7: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 7, data); - break; - } - default: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, kernel_size, data); - } - } - break; - } - - case 2: { - const int idxX = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 1; - const int idxY = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 0; - const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; - const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; - - const int numX = dimensions()[m_indices[idxX]]; - const int numY = dimensions()[m_indices[idxY]]; - const int numP = dimensions().TotalSize() / (numX*numY); - - const float scaling_factor = sqrtf(static_cast<float>(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x)); - - // Snap maxX to warp size - int inner_dim = ((static_cast<int>(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32; - const int maxX = numext::mini<int>(inner_dim, numX); - const int maxY = numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY); - const int maxP = numext::mini<int>(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP); - - dim3 block_size; - block_size.x = numext::mini(1024, maxX); - block_size.y = numext::mini<int>(1024/block_size.x, maxY); - block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxP); - - const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar); - assert(shared_mem <= maxSharedMem); - - const int num_x_blocks = ceil(numX, maxX); - const int num_y_blocks = ceil(numY, maxY); - const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); - const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks); - - dim3 num_blocks(num_x_blocks, num_y_blocks, numext::mini<int>(num_z_blocks, ceil(numP, block_size.z))); - - - //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; - - const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]); - const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX], - m_kernelImpl.dimensions()[idxY]); - internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper( - m_inputImpl.dimensions(), kernel_dims, indices); - switch (kernel_size_x) { - case 4: { - switch (kernel_size_y) { - case 7: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, 7, data); - break; - } - default: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, kernel_size_y, data); - break; - } - } - break; - } - case 7: { - switch (kernel_size_y) { - case 4: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, 4, data); - break; - } - default: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, kernel_size_y, data); - break; - } - } - break; - } - default: { - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, kernel_size_x, kernel_size_y, data); - break; - } - } - break; - } - - case 3: { - const int idxX = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 2; - const int idxY = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 1; - const int idxZ = - static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 0; - - const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; - const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; - const int kernel_size_z = m_kernelImpl.dimensions()[idxZ]; - - const int numX = dimensions()[m_indices[idxX]]; - const int numY = dimensions()[m_indices[idxY]]; - const int numZ = dimensions()[m_indices[idxZ]]; - const int numP = dimensions().TotalSize() / (numX*numY*numZ); - - const int maxX = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX)); - const int maxY = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY)); - const int maxZ = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ)); - - dim3 block_size; - block_size.x = numext::mini(32, maxX); - block_size.y = numext::mini(32, maxY); - block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxZ); - dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ)); - - const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar); - assert(shared_mem <= maxSharedMem); - - //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; - const array<Index, 3> indices(m_indices[idxX], m_indices[idxY], - m_indices[idxZ]); - const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX], - m_kernelImpl.dimensions()[idxY], - m_kernelImpl.dimensions()[idxZ]); - internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper( - m_inputImpl.dimensions(), kernel_dims, indices); - - LAUNCH_CUDA_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data); - break; - } - - default: { - EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE); - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - eigen_assert(m_buf); - eigen_assert(index < m_dimensions.TotalSize()); - return m_buf[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const - { - eigen_assert(m_buf); - eigen_assert(index < m_dimensions.TotalSize()); - return internal::ploadt<PacketReturnType, LoadMode>(m_buf+index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost - // model. - const double kernel_size = m_kernelImpl.dimensions().TotalSize(); - // We ignore the use of fused multiply-add. - const double convolve_compute_cost = - TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>(); - const double firstIndex_compute_cost = - NumDims * - (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() + - TensorOpCost::DivCost<Index>()); - return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + - kernel_size * (m_inputImpl.costPerCoeff(vectorized) + - m_kernelImpl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, convolve_compute_cost, vectorized, - PacketSize)); - } - - private: - // No assignment (copies are needed by the kernels) - TensorEvaluator& operator = (const TensorEvaluator&); - - TensorEvaluator<InputArgType, GpuDevice> m_inputImpl; - TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl; - KernelArgType m_kernelArg; - Indices m_indices; - Dimensions m_dimensions; - Scalar* m_buf; - const Scalar* m_kernel; - bool m_local_kernel; - - const GpuDevice& m_device; -}; -#endif - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h deleted file mode 100644 index 83c449c..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +++ /dev/null @@ -1,212 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Rasmus Munk Larsen <rmlarsen@google.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H -#define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H - -namespace Eigen { - -/** \class TensorEvaluator - * \ingroup CXX11_Tensor_Module - * - * \brief A cost model used to limit the number of threads used for evaluating - * tensor expression. - * - */ - -// Class storing the cost of evaluating a tensor expression in terms of the -// estimated number of operand bytes loads, bytes stored, and compute cycles. -class TensorOpCost { - public: - // TODO(rmlarsen): Fix the scalar op costs in Eigen proper. Even a simple - // model based on minimal reciprocal throughput numbers from Intel or - // Agner Fog's tables would be better than what is there now. - template <typename ArgType> - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() { - return internal::functor_traits< - internal::scalar_product_op<ArgType, ArgType> >::Cost; - } - template <typename ArgType> - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost() { - return internal::functor_traits<internal::scalar_sum_op<ArgType> >::Cost; - } - template <typename ArgType> - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost() { - return internal::functor_traits< - internal::scalar_quotient_op<ArgType, ArgType> >::Cost; - } - template <typename ArgType> - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost() { - return internal::functor_traits<internal::scalar_mod_op<ArgType> >::Cost; - } - template <typename SrcType, typename TargetType> - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost() { - return internal::functor_traits< - internal::scalar_cast_op<SrcType, TargetType> >::Cost; - } - - EIGEN_DEVICE_FUNC - TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {} - EIGEN_DEVICE_FUNC - TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles) - : bytes_loaded_(bytes_loaded), - bytes_stored_(bytes_stored), - compute_cycles_(compute_cycles) {} - - EIGEN_DEVICE_FUNC - TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles, - bool vectorized, double packet_size) - : bytes_loaded_(bytes_loaded), - bytes_stored_(bytes_stored), - compute_cycles_(vectorized ? compute_cycles / packet_size - : compute_cycles) { - eigen_assert(bytes_loaded >= 0 && (numext::isfinite)(bytes_loaded)); - eigen_assert(bytes_stored >= 0 && (numext::isfinite)(bytes_stored)); - eigen_assert(compute_cycles >= 0 && (numext::isfinite)(compute_cycles)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_loaded() const { - return bytes_loaded_; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_stored() const { - return bytes_stored_; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double compute_cycles() const { - return compute_cycles_; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double total_cost( - double load_cost, double store_cost, double compute_cost) const { - return load_cost * bytes_loaded_ + store_cost * bytes_stored_ + - compute_cost * compute_cycles_; - } - - // Drop memory access component. Intended for cases when memory accesses are - // sequential or are completely masked by computations. - EIGEN_DEVICE_FUNC void dropMemoryCost() { - bytes_loaded_ = 0; - bytes_stored_ = 0; - } - - // TODO(rmlarsen): Define min in terms of total cost, not elementwise. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin( - const TensorOpCost& rhs) const { - double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded()); - double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored()); - double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles()); - return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); - } - - // TODO(rmlarsen): Define max in terms of total cost, not elementwise. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax( - const TensorOpCost& rhs) const { - double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded()); - double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored()); - double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles()); - return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator+=( - const TensorOpCost& rhs) { - bytes_loaded_ += rhs.bytes_loaded(); - bytes_stored_ += rhs.bytes_stored(); - compute_cycles_ += rhs.compute_cycles(); - return *this; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator*=(double rhs) { - bytes_loaded_ *= rhs; - bytes_stored_ *= rhs; - compute_cycles_ *= rhs; - return *this; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator+( - TensorOpCost lhs, const TensorOpCost& rhs) { - lhs += rhs; - return lhs; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*( - TensorOpCost lhs, double rhs) { - lhs *= rhs; - return lhs; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*( - double lhs, TensorOpCost rhs) { - rhs *= lhs; - return rhs; - } - - friend std::ostream& operator<<(std::ostream& os, const TensorOpCost& tc) { - return os << "[bytes_loaded = " << tc.bytes_loaded() - << ", bytes_stored = " << tc.bytes_stored() - << ", compute_cycles = " << tc.compute_cycles() << "]"; - } - - private: - double bytes_loaded_; - double bytes_stored_; - double compute_cycles_; -}; - -// TODO(rmlarsen): Implement a policy that chooses an "optimal" number of theads -// in [1:max_threads] instead of just switching multi-threading off for small -// work units. -template <typename Device> -class TensorCostModel { - public: - // Scaling from Eigen compute cost to device cycles. - static const int kDeviceCyclesPerComputeCycle = 1; - - // Costs in device cycles. - static const int kStartupCycles = 100000; - static const int kPerThreadCycles = 100000; - static const int kTaskSize = 40000; - - // Returns the number of threads in [1:max_threads] to use for - // evaluating an expression with the given output size and cost per - // coefficient. - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int numThreads( - double output_size, const TensorOpCost& cost_per_coeff, int max_threads) { - double cost = totalCost(output_size, cost_per_coeff); - int threads = (cost - kStartupCycles) / kPerThreadCycles + 0.9; - return numext::mini(max_threads, numext::maxi(1, threads)); - } - - // taskSize assesses parallel task size. - // Value of 1.0 means ideal parallel task size. Values < 1.0 mean that task - // granularity needs to be increased to mitigate parallelization overheads. - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double taskSize( - double output_size, const TensorOpCost& cost_per_coeff) { - return totalCost(output_size, cost_per_coeff) / kTaskSize; - } - - private: - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double totalCost( - double output_size, const TensorOpCost& cost_per_coeff) { - // Cost of memory fetches from L2 cache. 64 is typical cache line size. - // 11 is L2 cache latency on Haswell. - // We don't know whether data is in L1, L2 or L3. But we are most interested - // in single-threaded computational time around 100us-10ms (smaller time - // is too small for parallelization, larger time is not intersting - // either because we are probably using all available threads already). - // And for the target time range, L2 seems to be what matters. Data set - // fitting into L1 is too small to take noticeable time. Data set fitting - // only into L3 presumably will take more than 10ms to load and process. - const double kLoadCycles = 1.0 / 64 * 11; - const double kStoreCycles = 1.0 / 64 * 11; - // Scaling from Eigen compute cost to device cycles. - return output_size * - cost_per_coeff.total_cost(kLoadCycles, kStoreCycles, - kDeviceCyclesPerComputeCycle); - } -}; - -} // namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h deleted file mode 100644 index e020d07..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ /dev/null @@ -1,313 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H -#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H - -namespace Eigen { - -/** \class TensorCustomUnaryOp - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor custom class. - * - * - */ -namespace internal { -template<typename CustomUnaryFunc, typename XprType> -struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> > -{ - typedef typename XprType::Scalar Scalar; - typedef typename XprType::StorageKind StorageKind; - typedef typename XprType::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = traits<XprType>::NumDimensions; - static const int Layout = traits<XprType>::Layout; -}; - -template<typename CustomUnaryFunc, typename XprType> -struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense> -{ - typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type; -}; - -template<typename CustomUnaryFunc, typename XprType> -struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType> > -{ - typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> type; -}; - -} // end namespace internal - - - -template<typename CustomUnaryFunc, typename XprType> -class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename internal::nested<TensorCustomUnaryOp>::type Nested; - typedef typename internal::traits<TensorCustomUnaryOp>::StorageKind StorageKind; - typedef typename internal::traits<TensorCustomUnaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func) - : m_expr(expr), m_func(func) {} - - EIGEN_DEVICE_FUNC - const CustomUnaryFunc& func() const { return m_func; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_expr; } - - protected: - typename XprType::Nested m_expr; - const CustomUnaryFunc m_func; -}; - - -// Eval as rvalue -template<typename CustomUnaryFunc, typename XprType, typename Device> -struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Device> -{ - typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType; - typedef typename internal::traits<ArgType>::Index Index; - static const int NumDims = internal::traits<ArgType>::NumDimensions; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename internal::remove_const<typename ArgType::Scalar>::type Scalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = TensorEvaluator<XprType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device) - : m_op(op), m_device(device), m_result(NULL) - { - m_dimensions = op.func().dimensions(op.expression()); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - if (data) { - evalTo(data); - return false; - } else { - m_result = static_cast<CoeffReturnType*>( - m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); - evalTo(m_result); - return true; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - if (m_result != NULL) { - m_device.deallocate(m_result); - m_result = NULL; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_result[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - // TODO(rmlarsen): Extend CustomOp API to return its cost estimate. - return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } - - protected: - EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { - TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result( - data, m_dimensions); - m_op.func().eval(m_op.expression(), result, m_device); - } - - Dimensions m_dimensions; - const ArgType m_op; - const Device& m_device; - CoeffReturnType* m_result; -}; - - - -/** \class TensorCustomBinaryOp - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor custom class. - * - * - */ -namespace internal { -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> -struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> > -{ - typedef typename internal::promote_storage_type<typename LhsXprType::Scalar, - typename RhsXprType::Scalar>::ret Scalar; - typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType, - typename RhsXprType::CoeffReturnType>::ret CoeffReturnType; - typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, - typename traits<RhsXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<LhsXprType>::Index, - typename traits<RhsXprType>::Index>::type Index; - typedef typename LhsXprType::Nested LhsNested; - typedef typename RhsXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - static const int NumDimensions = traits<LhsXprType>::NumDimensions; - static const int Layout = traits<LhsXprType>::Layout; -}; - -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> -struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Eigen::Dense> -{ - typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type; -}; - -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> -struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> > -{ - typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> type; -}; - -} // end namespace internal - - - -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> -class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, ReadOnlyAccessors> -{ - public: - typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::traits<TensorCustomBinaryOp>::CoeffReturnType CoeffReturnType; - typedef typename internal::nested<TensorCustomBinaryOp>::type Nested; - typedef typename internal::traits<TensorCustomBinaryOp>::StorageKind StorageKind; - typedef typename internal::traits<TensorCustomBinaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func) - - : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {} - - EIGEN_DEVICE_FUNC - const CustomBinaryFunc& func() const { return m_func; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename LhsXprType::Nested>::type& - lhsExpression() const { return m_lhs_xpr; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename RhsXprType::Nested>::type& - rhsExpression() const { return m_rhs_xpr; } - - protected: - typename LhsXprType::Nested m_lhs_xpr; - typename RhsXprType::Nested m_rhs_xpr; - const CustomBinaryFunc m_func; -}; - - -// Eval as rvalue -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, typename Device> -struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Device> -{ - typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType; - typedef typename internal::traits<XprType>::Index Index; - static const int NumDims = internal::traits<XprType>::NumDimensions; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - BlockAccess = false, - Layout = TensorEvaluator<LhsXprType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_op(op), m_device(device), m_result(NULL) - { - m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression()); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - if (data) { - evalTo(data); - return false; - } else { - m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); - evalTo(m_result); - return true; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - if (m_result != NULL) { - m_device.deallocate(m_result); - m_result = NULL; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_result[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - // TODO(rmlarsen): Extend CustomOp API to return its cost estimate. - return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } - - protected: - EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { - TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions); - m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); - } - - Dimensions m_dimensions; - const XprType m_op; - const Device& m_device; - CoeffReturnType* m_result; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h deleted file mode 100644 index 29e50a3..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h +++ /dev/null @@ -1,68 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H -#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H - -namespace Eigen { - -/** \class TensorDevice - * \ingroup CXX11_Tensor_Module - * - * \brief Pseudo expression providing an operator = that will evaluate its argument - * on the specified computing 'device' (GPU, thread pool, ...) - * - * Example: - * C.device(EIGEN_GPU) = A + B; - * - * Todo: operator *= and /=. - */ - -template <typename ExpressionType, typename DeviceType> class TensorDevice { - public: - TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { - typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign; - Assign assign(m_expression, other); - internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); - return *this; - } - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum; - Sum sum(m_expression, other); - typedef TensorAssignOp<ExpressionType, const Sum> Assign; - Assign assign(m_expression, sum); - internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); - return *this; - } - - template<typename OtherDerived> - EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { - typedef typename OtherDerived::Scalar Scalar; - typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference; - Difference difference(m_expression, other); - typedef TensorAssignOp<ExpressionType, const Difference> Assign; - Assign assign(m_expression, difference); - internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); - return *this; - } - - protected: - const DeviceType& m_device; - ExpressionType& m_expression; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h deleted file mode 100644 index 4f5767b..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ /dev/null @@ -1,337 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H) -#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H - -namespace Eigen { - -static const int kCudaScratchSize = 1024; - -// This defines an interface that GPUDevice can take to use -// CUDA streams underneath. -class StreamInterface { - public: - virtual ~StreamInterface() {} - - virtual const cudaStream_t& stream() const = 0; - virtual const cudaDeviceProp& deviceProperties() const = 0; - - // Allocate memory on the actual device where the computation will run - virtual void* allocate(size_t num_bytes) const = 0; - virtual void deallocate(void* buffer) const = 0; - - // Return a scratchpad buffer of size 1k - virtual void* scratchpad() const = 0; - - // Return a semaphore. The semaphore is initially initialized to 0, and - // each kernel using it is responsible for resetting to 0 upon completion - // to maintain the invariant that the semaphore is always equal to 0 upon - // each kernel start. - virtual unsigned int* semaphore() const = 0; -}; - -static cudaDeviceProp* m_deviceProperties; -static bool m_devicePropInitialized = false; - -static void initializeDeviceProp() { - if (!m_devicePropInitialized) { - // Attempts to ensure proper behavior in the case of multiple threads - // calling this function simultaneously. This would be trivial to - // implement if we could use std::mutex, but unfortunately mutex don't - // compile with nvcc, so we resort to atomics and thread fences instead. - // Note that if the caller uses a compiler that doesn't support c++11 we - // can't ensure that the initialization is thread safe. -#if __cplusplus >= 201103L - static std::atomic<bool> first(true); - if (first.exchange(false)) { -#else - static bool first = true; - if (first) { - first = false; -#endif - // We're the first thread to reach this point. - int num_devices; - cudaError_t status = cudaGetDeviceCount(&num_devices); - if (status != cudaSuccess) { - std::cerr << "Failed to get the number of CUDA devices: " - << cudaGetErrorString(status) - << std::endl; - assert(status == cudaSuccess); - } - m_deviceProperties = new cudaDeviceProp[num_devices]; - for (int i = 0; i < num_devices; ++i) { - status = cudaGetDeviceProperties(&m_deviceProperties[i], i); - if (status != cudaSuccess) { - std::cerr << "Failed to initialize CUDA device #" - << i - << ": " - << cudaGetErrorString(status) - << std::endl; - assert(status == cudaSuccess); - } - } - -#if __cplusplus >= 201103L - std::atomic_thread_fence(std::memory_order_release); -#endif - m_devicePropInitialized = true; - } else { - // Wait for the other thread to inititialize the properties. - while (!m_devicePropInitialized) { -#if __cplusplus >= 201103L - std::atomic_thread_fence(std::memory_order_acquire); -#endif - sleep(1); - } - } - } -} - -static const cudaStream_t default_stream = cudaStreamDefault; - -class CudaStreamDevice : public StreamInterface { - public: - // Use the default stream on the current device - CudaStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) { - cudaGetDevice(&device_); - initializeDeviceProp(); - } - // Use the default stream on the specified device - CudaStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) { - initializeDeviceProp(); - } - // Use the specified stream. Note that it's the - // caller responsibility to ensure that the stream can run on - // the specified device. If no device is specified the code - // assumes that the stream is associated to the current gpu device. - CudaStreamDevice(const cudaStream_t* stream, int device = -1) - : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) { - if (device < 0) { - cudaGetDevice(&device_); - } else { - int num_devices; - cudaError_t err = cudaGetDeviceCount(&num_devices); - EIGEN_UNUSED_VARIABLE(err) - assert(err == cudaSuccess); - assert(device < num_devices); - device_ = device; - } - initializeDeviceProp(); - } - - virtual ~CudaStreamDevice() { - if (scratch_) { - deallocate(scratch_); - } - } - - const cudaStream_t& stream() const { return *stream_; } - const cudaDeviceProp& deviceProperties() const { - return m_deviceProperties[device_]; - } - virtual void* allocate(size_t num_bytes) const { - cudaError_t err = cudaSetDevice(device_); - EIGEN_UNUSED_VARIABLE(err) - assert(err == cudaSuccess); - void* result; - err = cudaMalloc(&result, num_bytes); - assert(err == cudaSuccess); - assert(result != NULL); - return result; - } - virtual void deallocate(void* buffer) const { - cudaError_t err = cudaSetDevice(device_); - EIGEN_UNUSED_VARIABLE(err) - assert(err == cudaSuccess); - assert(buffer != NULL); - err = cudaFree(buffer); - assert(err == cudaSuccess); - } - - virtual void* scratchpad() const { - if (scratch_ == NULL) { - scratch_ = allocate(kCudaScratchSize + sizeof(unsigned int)); - } - return scratch_; - } - - virtual unsigned int* semaphore() const { - if (semaphore_ == NULL) { - char* scratch = static_cast<char*>(scratchpad()) + kCudaScratchSize; - semaphore_ = reinterpret_cast<unsigned int*>(scratch); - cudaError_t err = cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_); - EIGEN_UNUSED_VARIABLE(err) - assert(err == cudaSuccess); - } - return semaphore_; - } - - private: - const cudaStream_t* stream_; - int device_; - mutable void* scratch_; - mutable unsigned int* semaphore_; -}; - -struct GpuDevice { - // The StreamInterface is not owned: the caller is - // responsible for its initialization and eventual destruction. - explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) { - eigen_assert(stream); - } - explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) { - eigen_assert(stream); - } - // TODO(bsteiner): This is an internal API, we should not expose it. - EIGEN_STRONG_INLINE const cudaStream_t& stream() const { - return stream_->stream(); - } - - EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return stream_->allocate(num_bytes); - } - - EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - stream_->deallocate(buffer); - } - - EIGEN_STRONG_INLINE void* scratchpad() const { - return stream_->scratchpad(); - } - - EIGEN_STRONG_INLINE unsigned int* semaphore() const { - return stream_->semaphore(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { -#ifndef __CUDA_ARCH__ - cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, - stream_->stream()); - EIGEN_UNUSED_VARIABLE(err) - assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - cudaError_t err = - cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); - EIGEN_UNUSED_VARIABLE(err) - assert(err == cudaSuccess); - } - - EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - cudaError_t err = - cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); - EIGEN_UNUSED_VARIABLE(err) - assert(err == cudaSuccess); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { -#ifndef __CUDA_ARCH__ - cudaError_t err = cudaMemsetAsync(buffer, c, n, stream_->stream()); - EIGEN_UNUSED_VARIABLE(err) - assert(err == cudaSuccess); -#else - eigen_assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_STRONG_INLINE size_t numThreads() const { - // FIXME - return 32; - } - - EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { - // FIXME - return 48*1024; - } - - EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { - // We won't try to take advantage of the l2 cache for the time being, and - // there is no l3 cache on cuda devices. - return firstLevelCacheSize(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const { -#if defined(__CUDACC__) && !defined(__CUDA_ARCH__) - cudaError_t err = cudaStreamSynchronize(stream_->stream()); - if (err != cudaSuccess) { - std::cerr << "Error detected in CUDA stream: " - << cudaGetErrorString(err) - << std::endl; - assert(err == cudaSuccess); - } -#else - assert(false && "The default device should be used instead to generate kernel code"); -#endif - } - - EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { - return stream_->deviceProperties().multiProcessorCount; - } - EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { - return stream_->deviceProperties().maxThreadsPerBlock; - } - EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { - return stream_->deviceProperties().maxThreadsPerMultiProcessor; - } - EIGEN_STRONG_INLINE int sharedMemPerBlock() const { - return stream_->deviceProperties().sharedMemPerBlock; - } - EIGEN_STRONG_INLINE int majorDeviceVersion() const { - return stream_->deviceProperties().major; - } - EIGEN_STRONG_INLINE int minorDeviceVersion() const { - return stream_->deviceProperties().minor; - } - - EIGEN_STRONG_INLINE int maxBlocks() const { - return max_blocks_; - } - - // This function checks if the CUDA runtime recorded an error for the - // underlying stream device. - inline bool ok() const { -#ifdef __CUDACC__ - cudaError_t error = cudaStreamQuery(stream_->stream()); - return (error == cudaSuccess) || (error == cudaErrorNotReady); -#else - return false; -#endif - } - - private: - const StreamInterface* stream_; - int max_blocks_; -}; - -#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \ - (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \ - assert(cudaGetLastError() == cudaSuccess); - - -// FIXME: Should be device and kernel specific. -#ifdef __CUDACC__ -static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { -#ifndef __CUDA_ARCH__ - cudaError_t status = cudaDeviceSetSharedMemConfig(config); - EIGEN_UNUSED_VARIABLE(status) - assert(status == cudaSuccess); -#else - EIGEN_UNUSED_VARIABLE(config) -#endif -} -#endif - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h deleted file mode 100644 index 9d14139..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +++ /dev/null @@ -1,81 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H -#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H - - -namespace Eigen { - -// Default device for the machine (typically a single cpu core) -struct DefaultDevice { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { - ::memset(buffer, c, n); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { -#ifndef __CUDA_ARCH__ - // Running on the host CPU - return 1; -#else - // Running on a CUDA device - return 32; -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { -#ifndef __CUDA_ARCH__ - // Running on the host CPU - return l1CacheSize(); -#else - // Running on a CUDA device, return the amount of shared memory available. - return 48*1024; -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { -#ifndef __CUDA_ARCH__ - // Running single threaded on the host CPU - return l3CacheSize(); -#else - // Running on a CUDA device - return firstLevelCacheSize(); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { -#ifndef __CUDA_ARCH__ - // Running single threaded on the host CPU - // Should return an enum that encodes the ISA supported by the CPU - return 1; -#else - // Running on a CUDA device - return __CUDA_ARCH__ / 100; -#endif - } -}; - -} // namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h deleted file mode 100644 index 7c03989..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ /dev/null @@ -1,122 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> - -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H) -#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H - -namespace Eigen { -struct SyclDevice { - /// class members - /// sycl queue - mutable cl::sycl::queue m_queue; - /// std::map is the container used to make sure that we create only one buffer - /// per pointer. The lifespan of the buffer now depends on the lifespan of SyclDevice. - /// If a non-read-only pointer is needed to be accessed on the host we should manually deallocate it. - mutable std::map<const void *, std::shared_ptr<void>> buffer_map; - /// creating device by using selector - template<typename dev_Selector> SyclDevice(dev_Selector s) - : -#ifdef EIGEN_EXCEPTIONS - m_queue(cl::sycl::queue(s, [=](cl::sycl::exception_list l) { - for (const auto& e : l) { - try { - std::rethrow_exception(e); - } catch (cl::sycl::exception e) { - std::cout << e.what() << std::endl; - } - } - })) -#else - m_queue(cl::sycl::queue(s)) -#endif - {} - // destructor - ~SyclDevice() { deallocate_all(); } - - template <typename T> void deallocate(T *p) const { - auto it = buffer_map.find(p); - if (it != buffer_map.end()) { - buffer_map.erase(it); - internal::aligned_free(p); - } - } - void deallocate_all() const { - std::map<const void *, std::shared_ptr<void>>::iterator it=buffer_map.begin(); - while (it!=buffer_map.end()) { - auto p=it->first; - buffer_map.erase(it); - internal::aligned_free(const_cast<void*>(p)); - it=buffer_map.begin(); - } - buffer_map.clear(); - } - - /// creation of sycl accessor for a buffer. This function first tries to find - /// the buffer in the buffer_map. If found it gets the accessor from it, if not, - ///the function then adds an entry by creating a sycl buffer for that particular pointer. - template <cl::sycl::access::mode AcMd, typename T> inline cl::sycl::accessor<T, 1, AcMd, cl::sycl::access::target::global_buffer> - get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh, const T * ptr) const { - return (get_sycl_buffer<T>(num_bytes, ptr)->template get_access<AcMd, cl::sycl::access::target::global_buffer>(cgh)); - } - - template<typename T> inline std::pair<std::map<const void *, std::shared_ptr<void>>::iterator,bool> add_sycl_buffer(const T *ptr, size_t num_bytes) const { - using Type = cl::sycl::buffer<T, 1>; - std::pair<std::map<const void *, std::shared_ptr<void>>::iterator,bool> ret = buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>(ptr, std::shared_ptr<void>(new Type(cl::sycl::range<1>(num_bytes)), - [](void *dataMem) { delete static_cast<Type*>(dataMem); }))); - (static_cast<Type*>(buffer_map.at(ptr).get()))->set_final_data(nullptr); - return ret; - } - - template <typename T> inline cl::sycl::buffer<T, 1>* get_sycl_buffer(size_t num_bytes,const T * ptr) const { - return static_cast<cl::sycl::buffer<T, 1>*>(add_sycl_buffer(ptr, num_bytes).first->second.get()); - } - - /// allocating memory on the cpu - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t) const { - return internal::aligned_malloc(8); - } - - // some runtime conditions that can be applied here - bool isDeviceSuitable() const { return true; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, size_t n) const { - ::memcpy(dst, src, n); - } - - template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const { - auto host_acc= (static_cast<cl::sycl::buffer<T, 1>*>(add_sycl_buffer(dst, n).first->second.get()))-> template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::host_buffer>(); - memcpy(host_acc.get_pointer(), src, n); - } - /// whith the current implementation of sycl, the data is copied twice from device to host. This will be fixed soon. - template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(T *dst, const T *src, size_t n) const { - auto it = buffer_map.find(src); - if (it != buffer_map.end()) { - auto host_acc= (static_cast<cl::sycl::buffer<T, 1>*>(it->second.get()))-> template get_access<cl::sycl::access::mode::read, cl::sycl::access::target::host_buffer>(); - memcpy(dst,host_acc.get_pointer(), n); - } else{ - eigen_assert("no device memory found. The memory might be destroyed before creation"); - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c, size_t n) const { - ::memset(buffer, c, n); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { - return 1; - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h deleted file mode 100644 index 17f0466..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ /dev/null @@ -1,282 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H) -#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H - -namespace Eigen { - -// Use the SimpleThreadPool by default. We'll switch to the new non blocking -// thread pool later. -#ifndef EIGEN_USE_SIMPLE_THREAD_POOL -template <typename Env> using ThreadPoolTempl = NonBlockingThreadPoolTempl<Env>; -typedef NonBlockingThreadPool ThreadPool; -#else -template <typename Env> using ThreadPoolTempl = SimpleThreadPoolTempl<Env>; -typedef SimpleThreadPool ThreadPool; -#endif - - -// Barrier is an object that allows one or more threads to wait until -// Notify has been called a specified number of times. -class Barrier { - public: - Barrier(unsigned int count) : state_(count << 1), notified_(false) { - eigen_assert(((count << 1) >> 1) == count); - } - ~Barrier() { - eigen_assert((state_>>1) == 0); - } - - void Notify() { - unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; - if (v != 1) { - eigen_assert(((v + 2) & ~1) != 0); - return; // either count has not dropped to 0, or waiter is not waiting - } - std::unique_lock<std::mutex> l(mu_); - eigen_assert(!notified_); - notified_ = true; - cv_.notify_all(); - } - - void Wait() { - unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel); - if ((v >> 1) == 0) return; - std::unique_lock<std::mutex> l(mu_); - while (!notified_) { - cv_.wait(l); - } - } - - private: - std::mutex mu_; - std::condition_variable cv_; - std::atomic<unsigned int> state_; // low bit is waiter flag - bool notified_; -}; - - -// Notification is an object that allows a user to to wait for another -// thread to signal a notification that an event has occurred. -// -// Multiple threads can wait on the same Notification object, -// but only one caller must call Notify() on the object. -struct Notification : Barrier { - Notification() : Barrier(1) {}; -}; - - -// Runs an arbitrary function and then calls Notify() on the passed in -// Notification. -template <typename Function, typename... Args> struct FunctionWrapperWithNotification -{ - static void run(Notification* n, Function f, Args... args) { - f(args...); - if (n) { - n->Notify(); - } - } -}; - -template <typename Function, typename... Args> struct FunctionWrapperWithBarrier -{ - static void run(Barrier* b, Function f, Args... args) { - f(args...); - if (b) { - b->Notify(); - } - } -}; - -template <typename SyncType> -static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { - if (n) { - n->Wait(); - } -} - - -// Build a thread pool device on top the an existing pool of threads. -struct ThreadPoolDevice { - // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { } - - EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); - } - - EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); - } - - EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); - } - EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { - memcpy(dst, src, n); - } - - EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { - ::memset(buffer, c, n); - } - - EIGEN_STRONG_INLINE int numThreads() const { - return num_threads_; - } - - EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { - return l1CacheSize(); - } - - EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { - // The l3 cache size is shared between all the cores. - return l3CacheSize() / num_threads_; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { - // Should return an enum that encodes the ISA supported by the CPU - return 1; - } - - template <class Function, class... Args> - EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { - Notification* n = new Notification(); - pool_->Schedule(std::bind(&FunctionWrapperWithNotification<Function, Args...>::run, n, f, args...)); - return n; - } - - template <class Function, class... Args> - EIGEN_STRONG_INLINE void enqueue_with_barrier(Barrier* b, - Function&& f, - Args&&... args) const { - pool_->Schedule(std::bind( - &FunctionWrapperWithBarrier<Function, Args...>::run, b, f, args...)); - } - - template <class Function, class... Args> - EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { - pool_->Schedule(std::bind(f, args...)); - } - - // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if - // called from one of the threads in pool_. Returns -1 otherwise. - EIGEN_STRONG_INLINE int currentThreadId() const { - return pool_->CurrentThreadId(); - } - - // parallelFor executes f with [0, n) arguments in parallel and waits for - // completion. F accepts a half-open interval [first, last). - // Block size is choosen based on the iteration cost and resulting parallel - // efficiency. If block_align is not nullptr, it is called to round up the - // block size. - void parallelFor(Index n, const TensorOpCost& cost, - std::function<Index(Index)> block_align, - std::function<void(Index, Index)> f) const { - typedef TensorCostModel<ThreadPoolDevice> CostModel; - if (n <= 1 || numThreads() == 1 || - CostModel::numThreads(n, cost, static_cast<int>(numThreads())) == 1) { - f(0, n); - return; - } - - // Calculate block size based on (1) the iteration cost and (2) parallel - // efficiency. We want blocks to be not too small to mitigate - // parallelization overheads; not too large to mitigate tail - // effect and potential load imbalance and we also want number - // of blocks to be evenly dividable across threads. - - double block_size_f = 1.0 / CostModel::taskSize(1, cost); - const Index max_oversharding_factor = 4; - Index block_size = numext::mini( - n, numext::maxi<Index>(divup<Index>(n, max_oversharding_factor * numThreads()), - block_size_f)); - const Index max_block_size = numext::mini(n, 2 * block_size); - if (block_align) { - Index new_block_size = block_align(block_size); - eigen_assert(new_block_size >= block_size); - block_size = numext::mini(n, new_block_size); - } - Index block_count = divup(n, block_size); - // Calculate parallel efficiency as fraction of total CPU time used for - // computations: - double max_efficiency = - static_cast<double>(block_count) / - (divup<int>(block_count, numThreads()) * numThreads()); - // Now try to increase block size up to max_block_size as long as it - // doesn't decrease parallel efficiency. - for (Index prev_block_count = block_count; - max_efficiency < 1.0 && prev_block_count > 1;) { - // This is the next block size that divides size into a smaller number - // of blocks than the current block_size. - Index coarser_block_size = divup(n, prev_block_count - 1); - if (block_align) { - Index new_block_size = block_align(coarser_block_size); - eigen_assert(new_block_size >= coarser_block_size); - coarser_block_size = numext::mini(n, new_block_size); - } - if (coarser_block_size > max_block_size) { - break; // Reached max block size. Stop. - } - // Recalculate parallel efficiency. - const Index coarser_block_count = divup(n, coarser_block_size); - eigen_assert(coarser_block_count < prev_block_count); - prev_block_count = coarser_block_count; - const double coarser_efficiency = - static_cast<double>(coarser_block_count) / - (divup<int>(coarser_block_count, numThreads()) * numThreads()); - if (coarser_efficiency + 0.01 >= max_efficiency) { - // Taking it. - block_size = coarser_block_size; - block_count = coarser_block_count; - if (max_efficiency < coarser_efficiency) { - max_efficiency = coarser_efficiency; - } - } - } - - // Recursively divide size into halves until we reach block_size. - // Division code rounds mid to block_size, so we are guaranteed to get - // block_count leaves that do actual computations. - Barrier barrier(static_cast<unsigned int>(block_count)); - std::function<void(Index, Index)> handleRange; - handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) { - if (last - first <= block_size) { - // Single block or less, execute directly. - f(first, last); - barrier.Notify(); - return; - } - // Split into halves and submit to the pool. - Index mid = first + divup((last - first) / 2, block_size) * block_size; - pool_->Schedule([=, &handleRange]() { handleRange(mid, last); }); - pool_->Schedule([=, &handleRange]() { handleRange(first, mid); }); - }; - handleRange(0, n); - barrier.Wait(); - } - - // Convenience wrapper for parallelFor that does not align blocks. - void parallelFor(Index n, const TensorOpCost& cost, - std::function<void(Index, Index)> f) const { - parallelFor(n, cost, nullptr, std::move(f)); - } - - private: - ThreadPoolInterface* pool_; - int num_threads_; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h deleted file mode 100644 index 1a30e45..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h +++ /dev/null @@ -1,236 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H -#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H - -namespace Eigen { - -/** \internal - * - * \class TensorDimensionList - * \ingroup CXX11_Tensor_Module - * - * \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n. - * - * \sa Tensor - */ - -template <typename Index, std::size_t Rank> struct DimensionList { - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - const Index operator[] (const Index i) const { return i; } -}; - -namespace internal { - -template<typename Index, std::size_t Rank> struct array_size<DimensionList<Index, Rank> > { - static const size_t value = Rank; -}; -template<typename Index, std::size_t Rank> struct array_size<const DimensionList<Index, Rank> > { - static const size_t value = Rank; -}; - -template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(DimensionList<Index, Rank>&) { - return n; -} -template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(const DimensionList<Index, Rank>&) { - return n; -} - - -#if EIGEN_HAS_CONSTEXPR -template <typename Index, std::size_t Rank> -struct index_known_statically_impl<DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct index_known_statically_impl<const DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct all_indices_known_statically_impl<DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run() { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct all_indices_known_statically_impl<const DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run() { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct indices_statically_known_to_increase_impl<DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run() { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct indices_statically_known_to_increase_impl<const DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run() { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_eq_impl<DimensionList<Index, Rank> > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return i == value; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_eq_impl<const DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return i == value; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_ne_impl<DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return i != value; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_ne_impl<const DimensionList<Index, Rank> > { - static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return i != value; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_gt_impl<DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return i > value; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_gt_impl<const DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return i > value; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_lt_impl<DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return i < value; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_lt_impl<const DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return i < value; - } -}; - -#else -template <typename Index, std::size_t Rank> -struct index_known_statically_impl<DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct index_known_statically_impl<const DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct all_indices_known_statically_impl<DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct all_indices_known_statically_impl<const DimensionList<Index, Rank> > { - EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct indices_statically_known_to_increase_impl<DimensionList<Index, Rank> > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { - return true; - } -}; -template <typename Index, std::size_t Rank> -struct indices_statically_known_to_increase_impl<const DimensionList<Index, Rank> > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { - return true; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_eq_impl<DimensionList<Index, Rank> > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { - return false; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_eq_impl<const DimensionList<Index, Rank> > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { - return false; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_ne_impl<DimensionList<Index, Rank> > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){ - return false; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_ne_impl<const DimensionList<Index, Rank> > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { - return false; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_gt_impl<DimensionList<Index, Rank> > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { - return false; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_gt_impl<const DimensionList<Index, Rank> > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { - return false; - } -}; - -template <typename Index, std::size_t Rank> -struct index_statically_lt_impl<DimensionList<Index, Rank> > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { - return false; - } -}; -template <typename Index, std::size_t Rank> -struct index_statically_lt_impl<const DimensionList<Index, Rank> > { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { - return false; - } -}; -#endif - -} // end namespace internal -} // end namespace Eigen - - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h deleted file mode 100644 index 451940d..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ /dev/null @@ -1,428 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H -#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H - - -namespace Eigen { - -/** \internal - * - * \class TensorDimensions - * \ingroup CXX11_Tensor_Module - * - * \brief Set of classes used to encode and store the dimensions of a Tensor. - * - * The Sizes class encodes as part of the type the number of dimensions and the - * sizes corresponding to each dimension. It uses no storage space since it is - * entirely known at compile time. - * The DSizes class is its dynamic sibling: the number of dimensions is known - * at compile time but the sizes are set during execution. - * - * \sa Tensor - */ - -// Boilerplate code -namespace internal { - -template<std::size_t n, typename Dimension> struct dget { - static const std::size_t value = get<n, Dimension>::value; -}; - - -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> -struct fixed_size_tensor_index_linearization_helper -{ - template <typename Dimensions> EIGEN_DEVICE_FUNC - static inline Index run(array<Index, NumIndices> const& indices, - const Dimensions& dimensions) - { - return array_get<RowMajor ? n - 1 : (NumIndices - n)>(indices) + - dget<RowMajor ? n - 1 : (NumIndices - n), Dimensions>::value * - fixed_size_tensor_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions); - } -}; - -template<typename Index, std::size_t NumIndices, bool RowMajor> -struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> -{ - template <typename Dimensions> EIGEN_DEVICE_FUNC - static inline Index run(array<Index, NumIndices> const&, const Dimensions&) - { - return 0; - } -}; - -template<typename Index, std::size_t n> -struct fixed_size_tensor_index_extraction_helper -{ - template <typename Dimensions> EIGEN_DEVICE_FUNC - static inline Index run(const Index index, - const Dimensions& dimensions) - { - const Index mult = (index == n-1) ? 1 : 0; - return array_get<n-1>(dimensions) * mult + - fixed_size_tensor_index_extraction_helper<Index, n - 1>::run(index, dimensions); - } -}; - -template<typename Index> -struct fixed_size_tensor_index_extraction_helper<Index, 0> -{ - template <typename Dimensions> EIGEN_DEVICE_FUNC - static inline Index run(const Index, - const Dimensions&) - { - return 0; - } - }; - -} // end namespace internal - - -// Fixed size -#ifndef EIGEN_EMULATE_CXX11_META_H -template <typename std::ptrdiff_t... Indices> -struct Sizes : internal::numeric_list<std::ptrdiff_t, Indices...> { - typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base; - static const std::ptrdiff_t total_size = internal::arg_prod(Indices...); - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const { - return Base::count; - } - - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t TotalSize() { - return internal::arg_prod(Indices...); - } - - EIGEN_DEVICE_FUNC Sizes() { } - template <typename DenseIndex> - explicit EIGEN_DEVICE_FUNC Sizes(const array<DenseIndex, Base::count>& /*indices*/) { - // todo: add assertion - } -#if EIGEN_HAS_VARIADIC_TEMPLATES - template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { } - explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) { - // todo: add assertion - } -#endif - - template <typename T> Sizes& operator = (const T& /*other*/) { - // add assertion failure if the size of other is different - return *this; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const { - return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count>::run(index, *this); - } - - template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { - return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *static_cast<const Base*>(this)); - } - template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { - return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *static_cast<const Base*>(this)); - } -}; - -namespace internal { -template <typename std::ptrdiff_t... Indices> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indices...>&) { - return Sizes<Indices...>::total_size; -} -} - -#else - -template <std::size_t n> -struct non_zero_size { - typedef internal::type2val<std::size_t, n> type; -}; -template <> -struct non_zero_size<0> { - typedef internal::null_type type; -}; - -template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0, std::size_t V5=0> struct Sizes { - typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base; - static const size_t count = Base::count; - static const std::size_t total_size = internal::arg_prod<Base>::value; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { - return count; - } - - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() { - return internal::arg_prod<Base>::value; - } - - Sizes() { } - template <typename DenseIndex> - explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) { - // todo: add assertion - } - template <typename T> Sizes& operator = (const T& /*other*/) { - // add assertion failure if the size of other is different - return *this; - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { } - explicit Sizes(std::initializer_list<std::size_t>) { - // todo: add assertion - } -#else - EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) { - } - EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) { - } - EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) { - } - EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { - } - EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index operator[] (const Index index) const { - switch (index) { - case 0: - return internal::get<0, Base>::value; - case 1: - return internal::get<1, Base>::value; - case 2: - return internal::get<2, Base>::value; - case 3: - return internal::get<3, Base>::value; - case 4: - return internal::get<4, Base>::value; - default: - eigen_assert(false && "index overflow"); - return static_cast<Index>(-1); - } - } - - template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { - return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *reinterpret_cast<const Base*>(this)); - } - template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { - return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *reinterpret_cast<const Base*>(this)); - } -}; - -namespace internal { -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) { - return Sizes<V1, V2, V3, V4, V5>::total_size; -} -} - -#endif - -// Boilerplate -namespace internal { -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> -struct tensor_index_linearization_helper -{ - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Index run(array<Index, NumIndices> const& indices, array<Index, NumIndices> const& dimensions) - { - return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) + - array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) * - tensor_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions); - } -}; - -template<typename Index, std::size_t NumIndices, bool RowMajor> -struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> -{ - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Index run(array<Index, NumIndices> const& indices, array<Index, NumIndices> const&) - { - return array_get<RowMajor ? 0 : NumIndices - 1>(indices); - } -}; -} // end namespace internal - - - -// Dynamic size -template <typename DenseIndex, int NumDims> -struct DSizes : array<DenseIndex, NumDims> { - typedef array<DenseIndex, NumDims> Base; - static const int count = NumDims; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { - return NumDims; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const { - return (NumDims == 0) ? 1 : internal::array_prod(*static_cast<const Base*>(this)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() { - for (int i = 0 ; i < NumDims; ++i) { - (*this)[i] = 0; - } - } - EIGEN_DEVICE_FUNC explicit DSizes(const array<DenseIndex, NumDims>& a) : Base(a) { } - - EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0) { - eigen_assert(NumDims == 1); - (*this)[0] = i0; - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension, IndexTypes... otherDimensions) : Base({{firstDimension, secondDimension, otherDimensions...}}) { - EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) - } -#else - EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1) { - eigen_assert(NumDims == 2); - (*this)[0] = i0; - (*this)[1] = i1; - } - EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { - eigen_assert(NumDims == 3); - (*this)[0] = i0; - (*this)[1] = i1; - (*this)[2] = i2; - } - EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { - eigen_assert(NumDims == 4); - (*this)[0] = i0; - (*this)[1] = i1; - (*this)[2] = i2; - (*this)[3] = i3; - } - EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { - eigen_assert(NumDims == 5); - (*this)[0] = i0; - (*this)[1] = i1; - (*this)[2] = i2; - (*this)[3] = i3; - (*this)[4] = i4; - } -#endif - - EIGEN_DEVICE_FUNC DSizes& operator = (const array<DenseIndex, NumDims>& other) { - *static_cast<Base*>(this) = other; - return *this; - } - - // A constexpr would be so much better here - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfColMajor(const array<DenseIndex, NumDims>& indices) const { - return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, false>::run(indices, *static_cast<const Base*>(this)); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const { - return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this)); - } -}; - - - - -// Boilerplate -namespace internal { -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> -struct tensor_vsize_index_linearization_helper -{ - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const& dimensions) - { - return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) + - array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) * - tensor_vsize_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions); - } -}; - -template<typename Index, std::size_t NumIndices, bool RowMajor> -struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor> -{ - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const&) - { - return array_get<RowMajor ? 0 : NumIndices - 1>(indices); - } -}; -} // end namespace internal - - -namespace internal { - -template <typename DenseIndex, int NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > { - static const size_t value = NumDims; -}; -template <typename DenseIndex, int NumDims> struct array_size<DSizes<DenseIndex, NumDims> > { - static const size_t value = NumDims; -}; -#ifndef EIGEN_EMULATE_CXX11_META_H -template <typename std::ptrdiff_t... Indices> struct array_size<const Sizes<Indices...> > { -static const std::ptrdiff_t value = Sizes<Indices...>::count; -}; -template <typename std::ptrdiff_t... Indices> struct array_size<Sizes<Indices...> > { -static const std::ptrdiff_t value = Sizes<Indices...>::count; -}; -template <std::ptrdiff_t n, typename std::ptrdiff_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) { - return get<n, internal::numeric_list<std::size_t, Indices...> >::value; -} -template <std::ptrdiff_t n> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) { - eigen_assert(false && "should never be called"); - return -1; -} -#else -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > { - static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; -}; -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > { - static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; -}; -template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>&) { - return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value; -} - -#endif - - -template <typename Dims1, typename Dims2, size_t n, size_t m> -struct sizes_match_below_dim { - static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) { - return false; - } -}; -template <typename Dims1, typename Dims2, size_t n> -struct sizes_match_below_dim<Dims1, Dims2, n, n> { - static EIGEN_DEVICE_FUNC inline bool run(Dims1& dims1, Dims2& dims2) { - return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) & - sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2); - } -}; -template <typename Dims1, typename Dims2> -struct sizes_match_below_dim<Dims1, Dims2, 0, 0> { - static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) { - return true; - } -}; - -} // end namespace internal - - -template <typename Dims1, typename Dims2> -EIGEN_DEVICE_FUNC bool dimensions_match(Dims1& dims1, Dims2& dims2) { - return internal::sizes_match_below_dim<Dims1, Dims2, internal::array_size<Dims1>::value, internal::array_size<Dims2>::value>::run(dims1, dims2); -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h deleted file mode 100644 index 0698713..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ /dev/null @@ -1,181 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H -#define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H - -namespace Eigen { - -/** \class TensorForcedEval - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor reshaping class. - * - * - */ -namespace internal { -template<typename XprType, template <class> class MakePointer_> -struct traits<TensorEvalToOp<XprType, MakePointer_> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; - - enum { - Flags = 0 - }; - template <class T> - struct MakePointer { - // Intermediate typedef to workaround MSVC issue. - typedef MakePointer_<T> MakePointerT; - typedef typename MakePointerT::Type Type; - }; -}; - -template<typename XprType, template <class> class MakePointer_> -struct eval<TensorEvalToOp<XprType, MakePointer_>, Eigen::Dense> -{ - typedef const TensorEvalToOp<XprType, MakePointer_>& type; -}; - -template<typename XprType, template <class> class MakePointer_> -struct nested<TensorEvalToOp<XprType, MakePointer_>, 1, typename eval<TensorEvalToOp<XprType, MakePointer_> >::type> -{ - typedef TensorEvalToOp<XprType, MakePointer_> type; -}; - -} // end namespace internal - - - - -template<typename XprType, template <class> class MakePointer_> -class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename MakePointer_<CoeffReturnType>::Type PointerType; - typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested; - typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr) - : m_xpr(expr), m_buffer(buffer) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; } - - protected: - typename XprType::Nested m_xpr; - PointerType m_buffer; -}; - - - -template<typename ArgType, typename Device, template <class> class MakePointer_> -struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device> -{ - typedef TensorEvalToOp<ArgType, MakePointer_> XprType; - typedef typename ArgType::Scalar Scalar; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - typedef typename XprType::Index Index; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = true - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_device(device), - m_buffer(op.buffer()), m_op(op), m_expression(op.expression()) - { } - - // Used for accessor extraction in SYCL Managed TensorMap: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const { - return m_op; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { - } - - typedef typename internal::traits<const TensorEvalToOp<ArgType, MakePointer_> >::template MakePointer<CoeffReturnType>::Type DevicePointer; - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) { - EIGEN_UNUSED_VARIABLE(scalar); - eigen_assert(scalar == NULL); - return m_impl.evalSubExprsIfNeeded(m_buffer); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { - m_buffer[i] = m_impl.coeff(i); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { - internal::pstoret<CoeffReturnType, PacketReturnType, Aligned>(m_buffer + i, m_impl.template packet<TensorEvaluator<ArgType, Device>::IsAligned ? Aligned : Unaligned>(i)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_buffer[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - // We assume that evalPacket or evalScalar is called to perform the - // assignment and account for the cost of the write here. - return m_impl.costPerCoeff(vectorized) + - TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; } - ArgType expression() const { return m_expression; } - - /// required by sycl in order to extract the accessor - const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - /// added for sycl in order to construct the buffer from the sycl device - const Device& device() const{return m_device;} - - private: - TensorEvaluator<ArgType, Device> m_impl; - const Device& m_device; - DevicePointer m_buffer; - const XprType& m_op; - const ArgType m_expression; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h deleted file mode 100644 index 834ce07..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ /dev/null @@ -1,633 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H -#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H - -namespace Eigen { - -/** \class TensorEvaluator - * \ingroup CXX11_Tensor_Module - * - * \brief The tensor evaluator classes. - * - * These classes are responsible for the evaluation of the tensor expression. - * - * TODO: add support for more types of expressions, in particular expressions - * leading to lvalues (slicing, reshaping, etc...) - */ - -// Generic evaluator -template<typename Derived, typename Device> -struct TensorEvaluator -{ - typedef typename Derived::Index Index; - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename Derived::Dimensions Dimensions; - - // NumDimensions is -1 for variable dim tensors - static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? - internal::traits<Derived>::NumDimensions : 0; - - enum { - IsAligned = Derived::IsAligned, - PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), - Layout = Derived::Layout, - CoordAccess = NumCoords > 0, - RawAccess = true - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) - { } - - // Used for accessor extraction in SYCL Managed TensorMap: - const Derived& derived() const { return m_impl; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { - if (dest) { - m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize()); - return false; - } - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - eigen_assert(m_data); - return m_data[index]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - eigen_assert(m_data); - return m_data[index]; - } - - template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketReturnType packet(Index index) const - { - return internal::ploadt<PacketReturnType, LoadMode>(m_data + index); - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const { - eigen_assert(m_data); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return m_data[m_dims.IndexOfColMajor(coords)]; - } else { - return m_data[m_dims.IndexOfRowMajor(coords)]; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<DenseIndex, NumCoords>& coords) { - eigen_assert(m_data); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return m_data[m_dims.IndexOfColMajor(coords)]; - } else { - return m_data[m_dims.IndexOfRowMajor(coords)]; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, - internal::unpacket_traits<PacketReturnType>::size); - } - - EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; } - - /// required by sycl in order to construct sycl buffer from raw pointer - const Device& device() const{return m_device;} - - protected: - typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data; - Dimensions m_dims; - const Device& m_device; - const Derived& m_impl; -}; - -namespace { -template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -T loadConstant(const T* address) { - return *address; -} -// Use the texture cache on CUDA devices whenever possible -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 -template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -float loadConstant(const float* address) { - return __ldg(address); -} -template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -double loadConstant(const double* address) { - return __ldg(address); -} -template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -Eigen::half loadConstant(const Eigen::half* address) { - return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); -} -#endif -} - - -// Default evaluator for rvalues -template<typename Derived, typename Device> -struct TensorEvaluator<const Derived, Device> -{ - typedef typename Derived::Index Index; - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename Derived::Dimensions Dimensions; - - // NumDimensions is -1 for variable dim tensors - static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? - internal::traits<Derived>::NumDimensions : 0; - - enum { - IsAligned = Derived::IsAligned, - PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), - Layout = Derived::Layout, - CoordAccess = NumCoords > 0, - RawAccess = true - }; - - // Used for accessor extraction in SYCL Managed TensorMap: - const Derived& derived() const { return m_impl; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) { - m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar)); - return false; - } - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - eigen_assert(m_data); - return loadConstant(m_data+index); - } - - template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketReturnType packet(Index index) const - { - return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const { - eigen_assert(m_data); - const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords) - : m_dims.IndexOfRowMajor(coords); - return loadConstant(m_data+index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, - internal::unpacket_traits<PacketReturnType>::size); - } - - EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; } - - /// added for sycl in order to construct the buffer from the sycl device - const Device& device() const{return m_device;} - - protected: - typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data; - Dimensions m_dims; - const Device& m_device; - const Derived& m_impl; -}; - - - - -// -------------------- CwiseNullaryOp -------------------- - -template<typename NullaryOp, typename ArgType, typename Device> -struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> -{ - typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType; - - enum { - IsAligned = true, - PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC - TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::traits<XprType>::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_wrapper(m_functor, index); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, - internal::unpacket_traits<PacketReturnType>::size); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } - - /// required by sycl in order to extract the accessor - const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; } - /// required by sycl in order to extract the accessor - NullaryOp functor() const { return m_functor; } - - - private: - const NullaryOp m_functor; - TensorEvaluator<ArgType, Device> m_argImpl; - const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper; -}; - - - -// -------------------- CwiseUnaryOp -------------------- - -template<typename UnaryOp, typename ArgType, typename Device> -struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device> -{ - typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess & internal::functor_traits<UnaryOp>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), - m_argImpl(op.nestedExpression(), device) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::traits<XprType>::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - m_argImpl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_argImpl.cleanup(); - } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_functor(m_argImpl.coeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - const double functor_cost = internal::functor_traits<UnaryOp>::Cost; - return m_argImpl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } - - /// required by sycl in order to extract the accessor - const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; } - /// added for sycl in order to construct the buffer from sycl device - UnaryOp functor() const { return m_functor; } - - - private: - const UnaryOp m_functor; - TensorEvaluator<ArgType, Device> m_argImpl; -}; - - -// -------------------- CwiseBinaryOp -------------------- - -template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device> -struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device> -{ - typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType; - - enum { - IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess & - internal::functor_traits<BinaryOp>::PacketAccess, - Layout = TensorEvaluator<LeftArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), - m_leftImpl(op.lhsExpression(), device), - m_rightImpl(op.rhsExpression(), device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); - eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::traits<XprType>::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const - { - // TODO: use right impl instead if right impl dimensions are known at compile time. - return m_leftImpl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - m_leftImpl.evalSubExprsIfNeeded(NULL); - m_rightImpl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_leftImpl.cleanup(); - m_rightImpl.cleanup(); - } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index)); - } - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - const double functor_cost = internal::functor_traits<BinaryOp>::Cost; - return m_leftImpl.costPerCoeff(vectorized) + - m_rightImpl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; } - /// required by sycl in order to extract the accessor - BinaryOp functor() const { return m_functor; } - - private: - const BinaryOp m_functor; - TensorEvaluator<LeftArgType, Device> m_leftImpl; - TensorEvaluator<RightArgType, Device> m_rightImpl; -}; - -// -------------------- CwiseTernaryOp -------------------- - -template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device> -struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device> -{ - typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType; - - enum { - IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned, - PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess & TensorEvaluator<Arg2Type, Device>::PacketAccess & TensorEvaluator<Arg3Type, Device>::PacketAccess & - internal::functor_traits<TernaryOp>::PacketAccess, - Layout = TensorEvaluator<Arg1Type, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), - m_arg1Impl(op.arg1Expression(), device), - m_arg2Impl(op.arg2Expression(), device), - m_arg3Impl(op.arg3Expression(), device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); - - EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind, - typename internal::traits<Arg2Type>::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind, - typename internal::traits<Arg3Type>::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index, - typename internal::traits<Arg2Type>::Index>::value), - STORAGE_INDEX_MUST_MATCH) - EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index, - typename internal::traits<Arg3Type>::Index>::value), - STORAGE_INDEX_MUST_MATCH) - - eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::traits<XprType>::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const - { - // TODO: use arg2 or arg3 dimensions if they are known at compile time. - return m_arg1Impl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - m_arg1Impl.evalSubExprsIfNeeded(NULL); - m_arg2Impl.evalSubExprsIfNeeded(NULL); - m_arg3Impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_arg1Impl.cleanup(); - m_arg2Impl.cleanup(); - m_arg3Impl.cleanup(); - } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); - } - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index), - m_arg2Impl.template packet<LoadMode>(index), - m_arg3Impl.template packet<LoadMode>(index)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - const double functor_cost = internal::functor_traits<TernaryOp>::Cost; - return m_arg1Impl.costPerCoeff(vectorized) + - m_arg2Impl.costPerCoeff(vectorized) + - m_arg3Impl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } - - /// required by sycl in order to extract the accessor - const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; } - - private: - const TernaryOp m_functor; - TensorEvaluator<Arg1Type, Device> m_arg1Impl; - TensorEvaluator<Arg2Type, Device> m_arg2Impl; - TensorEvaluator<Arg3Type, Device> m_arg3Impl; -}; - - -// -------------------- SelectOp -------------------- - -template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device> -struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device> -{ - typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType; - typedef typename XprType::Scalar Scalar; - - enum { - IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess & - internal::packet_traits<Scalar>::HasBlend, - Layout = TensorEvaluator<IfArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_condImpl(op.ifExpression(), device), - m_thenImpl(op.thenExpression(), device), - m_elseImpl(op.elseExpression(), device) - { - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions())); - eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions())); - } - - typedef typename XprType::Index Index; - typedef typename internal::traits<XprType>::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions; - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const - { - // TODO: use then or else impl instead if they happen to be known at compile time. - return m_condImpl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - m_condImpl.evalSubExprsIfNeeded(NULL); - m_thenImpl.evalSubExprsIfNeeded(NULL); - m_elseImpl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_condImpl.cleanup(); - m_thenImpl.cleanup(); - m_elseImpl.cleanup(); - } - - EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const - { - return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index); - } - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const - { - internal::Selector<PacketSize> select; - for (Index i = 0; i < PacketSize; ++i) { - select.select[i] = m_condImpl.coeff(index+i); - } - return internal::pblend(select, - m_thenImpl.template packet<LoadMode>(index), - m_elseImpl.template packet<LoadMode>(index)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - return m_condImpl.costPerCoeff(vectorized) + - m_thenImpl.costPerCoeff(vectorized) - .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; } - - private: - TensorEvaluator<IfArgType, Device> m_condImpl; - TensorEvaluator<ThenArgType, Device> m_thenImpl; - TensorEvaluator<ElseArgType, Device> m_elseImpl; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h deleted file mode 100644 index f01d77c..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ /dev/null @@ -1,288 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H -#define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H - -namespace Eigen { - -/** \class TensorExecutor - * \ingroup CXX11_Tensor_Module - * - * \brief The tensor executor class. - * - * This class is responsible for launch the evaluation of the expression on - * the specified computing device. - */ -namespace internal { - -// Default strategy: the expression is evaluated with a single cpu thread. -template<typename Expression, typename Device, bool Vectorizable> -class TensorExecutor -{ - public: - typedef typename Expression::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(const Expression& expr, const Device& device = Device()) - { - TensorEvaluator<Expression, Device> evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) - { - const Index size = array_prod(evaluator.dimensions()); - for (Index i = 0; i < size; ++i) { - evaluator.evalScalar(i); - } - } - evaluator.cleanup(); - } -}; - - -template<typename Expression> -class TensorExecutor<Expression, DefaultDevice, true> -{ - public: - typedef typename Expression::Index Index; - EIGEN_DEVICE_FUNC - static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice()) - { - TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) - { - const Index size = array_prod(evaluator.dimensions()); - const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size; - // Give the compiler a strong hint to unroll the loop. But don't insist - // on unrolling, because if the function is expensive the compiler should not - // unroll the loop at the expense of inlining. - const Index UnrolledSize = (size / (4 * PacketSize)) * 4 * PacketSize; - for (Index i = 0; i < UnrolledSize; i += 4*PacketSize) { - for (Index j = 0; j < 4; j++) { - evaluator.evalPacket(i + j * PacketSize); - } - } - const Index VectorizedSize = (size / PacketSize) * PacketSize; - for (Index i = UnrolledSize; i < VectorizedSize; i += PacketSize) { - evaluator.evalPacket(i); - } - for (Index i = VectorizedSize; i < size; ++i) { - evaluator.evalScalar(i); - } - } - evaluator.cleanup(); - } -}; - - - -// Multicore strategy: the index space is partitioned and each partition is executed on a single core -#ifdef EIGEN_USE_THREADS -template <typename Evaluator, typename Index, bool Vectorizable> -struct EvalRange { - static void run(Evaluator* evaluator_in, const Index first, const Index last) { - Evaluator evaluator = *evaluator_in; - eigen_assert(last >= first); - for (Index i = first; i < last; ++i) { - evaluator.evalScalar(i); - } - } - - static Index alignBlockSize(Index size) { - return size; - } -}; - -template <typename Evaluator, typename Index> -struct EvalRange<Evaluator, Index, true> { - static const int PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size; - - static void run(Evaluator* evaluator_in, const Index first, const Index last) { - Evaluator evaluator = *evaluator_in; - eigen_assert(last >= first); - Index i = first; - if (last - first >= PacketSize) { - eigen_assert(first % PacketSize == 0); - Index last_chunk_offset = last - 4 * PacketSize; - // Give the compiler a strong hint to unroll the loop. But don't insist - // on unrolling, because if the function is expensive the compiler should not - // unroll the loop at the expense of inlining. - for (; i <= last_chunk_offset; i += 4*PacketSize) { - for (Index j = 0; j < 4; j++) { - evaluator.evalPacket(i + j * PacketSize); - } - } - last_chunk_offset = last - PacketSize; - for (; i <= last_chunk_offset; i += PacketSize) { - evaluator.evalPacket(i); - } - } - for (; i < last; ++i) { - evaluator.evalScalar(i); - } - } - - static Index alignBlockSize(Index size) { - // Align block size to packet size and account for unrolling in run above. - if (size >= 16 * PacketSize) { - return (size + 4 * PacketSize - 1) & ~(4 * PacketSize - 1); - } - // Aligning to 4 * PacketSize would increase block size by more than 25%. - return (size + PacketSize - 1) & ~(PacketSize - 1); - } -}; - -template <typename Expression, bool Vectorizable> -class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> { - public: - typedef typename Expression::Index Index; - static inline void run(const Expression& expr, const ThreadPoolDevice& device) - { - typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator; - Evaluator evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) - { - const Index size = array_prod(evaluator.dimensions()); -#if !defined(EIGEN_USE_SIMPLE_THREAD_POOL) - device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), - EvalRange<Evaluator, Index, Vectorizable>::alignBlockSize, - [&evaluator](Index first, Index last) { - EvalRange<Evaluator, Index, Vectorizable>::run(&evaluator, first, last); - }); -#else - size_t num_threads = device.numThreads(); - if (num_threads > 1) { - num_threads = TensorCostModel<ThreadPoolDevice>::numThreads( - size, evaluator.costPerCoeff(Vectorizable), num_threads); - } - if (num_threads == 1) { - EvalRange<Evaluator, Index, Vectorizable>::run(&evaluator, 0, size); - } else { - const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1; - Index blocksz = std::ceil<Index>(static_cast<float>(size)/num_threads) + PacketSize - 1; - const Index blocksize = numext::maxi<Index>(PacketSize, (blocksz - (blocksz % PacketSize))); - const Index numblocks = size / blocksize; - - Barrier barrier(numblocks); - for (int i = 0; i < numblocks; ++i) { - device.enqueue_with_barrier( - &barrier, &EvalRange<Evaluator, Index, Vectorizable>::run, - &evaluator, i * blocksize, (i + 1) * blocksize); - } - if (numblocks * blocksize < size) { - EvalRange<Evaluator, Index, Vectorizable>::run( - &evaluator, numblocks * blocksize, size); - } - barrier.Wait(); - } -#endif // defined(!EIGEN_USE_SIMPLE_THREAD_POOL) - } - evaluator.cleanup(); - } -}; -#endif // EIGEN_USE_THREADS - - -// GPU: the evaluation of the expression is offloaded to a GPU. -#if defined(EIGEN_USE_GPU) - -template <typename Expression, bool Vectorizable> -class TensorExecutor<Expression, GpuDevice, Vectorizable> { - public: - typedef typename Expression::Index Index; - static void run(const Expression& expr, const GpuDevice& device); -}; - - -#if defined(__CUDACC__) -template <typename Evaluator, typename Index, bool Vectorizable> -struct EigenMetaKernelEval { - static __device__ EIGEN_ALWAYS_INLINE - void run(Evaluator& eval, Index first, Index last, Index step_size) { - for (Index i = first; i < last; i += step_size) { - eval.evalScalar(i); - } - } -}; - -template <typename Evaluator, typename Index> -struct EigenMetaKernelEval<Evaluator, Index, true> { - static __device__ EIGEN_ALWAYS_INLINE - void run(Evaluator& eval, Index first, Index last, Index step_size) { - const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size; - const Index vectorized_size = (last / PacketSize) * PacketSize; - const Index vectorized_step_size = step_size * PacketSize; - - // Use the vector path - for (Index i = first * PacketSize; i < vectorized_size; - i += vectorized_step_size) { - eval.evalPacket(i); - } - for (Index i = vectorized_size + first; i < last; i += step_size) { - eval.evalScalar(i); - } - } -}; - -template <typename Evaluator, typename Index> -__global__ void -__launch_bounds__(1024) -EigenMetaKernel(Evaluator eval, Index size) { - - const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; - const Index step_size = blockDim.x * gridDim.x; - - const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned; - EigenMetaKernelEval<Evaluator, Index, vectorizable>::run(eval, first_index, size, step_size); -} - -/*static*/ -template <typename Expression, bool Vectorizable> -inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run( - const Expression& expr, const GpuDevice& device) { - TensorEvaluator<Expression, GpuDevice> evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) { - const int block_size = device.maxCudaThreadsPerBlock(); - const int max_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / block_size; - const Index size = array_prod(evaluator.dimensions()); - // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0. - const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size)), 1); - - LAUNCH_CUDA_KERNEL( - (EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, Index>), - num_blocks, block_size, 0, device, evaluator, size); - } - evaluator.cleanup(); -} - -#endif // __CUDACC__ -#endif // EIGEN_USE_GPU - -// SYCL Executor policy -#ifdef EIGEN_USE_SYCL - -template <typename Expression, bool Vectorizable> -class TensorExecutor<Expression, SyclDevice, Vectorizable> { -public: - static inline void run(const Expression &expr, const SyclDevice &device) { - // call TensorSYCL module - TensorSycl::run(expr, device); - } -}; - -#endif - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h deleted file mode 100644 index 85dfc7a..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ /dev/null @@ -1,371 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H -#define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H - -namespace Eigen { - -/** \class TensorExpr - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor expression classes. - * - * The TensorCwiseNullaryOp class applies a nullary operators to an expression. - * This is typically used to generate constants. - * - * The TensorCwiseUnaryOp class represents an expression where a unary operator - * (e.g. cwiseSqrt) is applied to an expression. - * - * The TensorCwiseBinaryOp class represents an expression where a binary - * operator (e.g. addition) is applied to a lhs and a rhs expression. - * - */ -namespace internal { -template<typename NullaryOp, typename XprType> -struct traits<TensorCwiseNullaryOp<NullaryOp, XprType> > - : traits<XprType> -{ - typedef traits<XprType> XprTraits; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::Nested XprTypeNested; - typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; - - enum { - Flags = 0 - }; -}; - -} // end namespace internal - - - -template<typename NullaryOp, typename XprType> -class TensorCwiseNullaryOp : public TensorBase<TensorCwiseNullaryOp<NullaryOp, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef TensorCwiseNullaryOp<NullaryOp, XprType> Nested; - typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseNullaryOp(const XprType& xpr, const NullaryOp& func = NullaryOp()) - : m_xpr(xpr), m_functor(func) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - nestedExpression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - const NullaryOp& functor() const { return m_functor; } - - protected: - typename XprType::Nested m_xpr; - const NullaryOp m_functor; -}; - - - -namespace internal { -template<typename UnaryOp, typename XprType> -struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> > - : traits<XprType> -{ - // TODO(phli): Add InputScalar, InputPacket. Check references to - // current Scalar/Packet to see if the intent is Input or Output. - typedef typename result_of<UnaryOp(typename XprType::Scalar)>::type Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprType::Nested XprTypeNested; - typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename UnaryOp, typename XprType> -struct eval<TensorCwiseUnaryOp<UnaryOp, XprType>, Eigen::Dense> -{ - typedef const TensorCwiseUnaryOp<UnaryOp, XprType>& type; -}; - -template<typename UnaryOp, typename XprType> -struct nested<TensorCwiseUnaryOp<UnaryOp, XprType>, 1, typename eval<TensorCwiseUnaryOp<UnaryOp, XprType> >::type> -{ - typedef TensorCwiseUnaryOp<UnaryOp, XprType> type; -}; - -} // end namespace internal - - - -template<typename UnaryOp, typename XprType> -class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType>, ReadOnlyAccessors> -{ - public: - // TODO(phli): Add InputScalar, InputPacket. Check references to - // current Scalar/Packet to see if the intent is Input or Output. - typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef Scalar CoeffReturnType; - typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested; - typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) - : m_xpr(xpr), m_functor(func) {} - - EIGEN_DEVICE_FUNC - const UnaryOp& functor() const { return m_functor; } - - /** \returns the nested expression */ - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - nestedExpression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const UnaryOp m_functor; -}; - - -namespace internal { -template<typename BinaryOp, typename LhsXprType, typename RhsXprType> -struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs - // are different. - // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to - // current Scalar/Packet to see if the intent is Inputs or Output. - typedef typename result_of< - BinaryOp(typename LhsXprType::Scalar, - typename RhsXprType::Scalar)>::type Scalar; - typedef traits<LhsXprType> XprTraits; - typedef typename promote_storage_type< - typename traits<LhsXprType>::StorageKind, - typename traits<RhsXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type< - typename traits<LhsXprType>::Index, - typename traits<RhsXprType>::Index>::type Index; - typedef typename LhsXprType::Nested LhsNested; - typedef typename RhsXprType::Nested RhsNested; - typedef typename remove_reference<LhsNested>::type _LhsNested; - typedef typename remove_reference<RhsNested>::type _RhsNested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; - - enum { - Flags = 0 - }; -}; - -template<typename BinaryOp, typename LhsXprType, typename RhsXprType> -struct eval<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, Eigen::Dense> -{ - typedef const TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>& type; -}; - -template<typename BinaryOp, typename LhsXprType, typename RhsXprType> -struct nested<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, 1, typename eval<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >::type> -{ - typedef TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> type; -}; - -} // end namespace internal - - - -template<typename BinaryOp, typename LhsXprType, typename RhsXprType> -class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, ReadOnlyAccessors> -{ - public: - // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to - // current Scalar/Packet to see if the intent is Inputs or Output. - typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef Scalar CoeffReturnType; - typedef typename Eigen::internal::nested<TensorCwiseBinaryOp>::type Nested; - typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp()) - : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {} - - EIGEN_DEVICE_FUNC - const BinaryOp& functor() const { return m_functor; } - - /** \returns the nested expressions */ - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename LhsXprType::Nested>::type& - lhsExpression() const { return m_lhs_xpr; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename RhsXprType::Nested>::type& - rhsExpression() const { return m_rhs_xpr; } - - protected: - typename LhsXprType::Nested m_lhs_xpr; - typename RhsXprType::Nested m_rhs_xpr; - const BinaryOp m_functor; -}; - - -namespace internal { -template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> -struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> > -{ - // Type promotion to handle the case where the types of the args are different. - typedef typename result_of< - TernaryOp(typename Arg1XprType::Scalar, - typename Arg2XprType::Scalar, - typename Arg3XprType::Scalar)>::type Scalar; - typedef traits<Arg1XprType> XprTraits; - typedef typename traits<Arg1XprType>::StorageKind StorageKind; - typedef typename traits<Arg1XprType>::Index Index; - typedef typename Arg1XprType::Nested Arg1Nested; - typedef typename Arg2XprType::Nested Arg2Nested; - typedef typename Arg3XprType::Nested Arg3Nested; - typedef typename remove_reference<Arg1Nested>::type _Arg1Nested; - typedef typename remove_reference<Arg2Nested>::type _Arg2Nested; - typedef typename remove_reference<Arg3Nested>::type _Arg3Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; - - enum { - Flags = 0 - }; -}; - -template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> -struct eval<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, Eigen::Dense> -{ - typedef const TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>& type; -}; - -template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> -struct nested<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, 1, typename eval<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> >::type> -{ - typedef TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> type; -}; - -} // end namespace internal - - - -template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> -class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef Scalar CoeffReturnType; - typedef typename Eigen::internal::nested<TensorCwiseTernaryOp>::type Nested; - typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseTernaryOp(const Arg1XprType& arg1, const Arg2XprType& arg2, const Arg3XprType& arg3, const TernaryOp& func = TernaryOp()) - : m_arg1_xpr(arg1), m_arg2_xpr(arg2), m_arg3_xpr(arg3), m_functor(func) {} - - EIGEN_DEVICE_FUNC - const TernaryOp& functor() const { return m_functor; } - - /** \returns the nested expressions */ - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename Arg1XprType::Nested>::type& - arg1Expression() const { return m_arg1_xpr; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename Arg2XprType::Nested>::type& - arg2Expression() const { return m_arg2_xpr; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename Arg3XprType::Nested>::type& - arg3Expression() const { return m_arg3_xpr; } - - protected: - typename Arg1XprType::Nested m_arg1_xpr; - typename Arg2XprType::Nested m_arg2_xpr; - typename Arg3XprType::Nested m_arg3_xpr; - const TernaryOp m_functor; -}; - - -namespace internal { -template<typename IfXprType, typename ThenXprType, typename ElseXprType> -struct traits<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> > - : traits<ThenXprType> -{ - typedef typename traits<ThenXprType>::Scalar Scalar; - typedef traits<ThenXprType> XprTraits; - typedef typename promote_storage_type<typename traits<ThenXprType>::StorageKind, - typename traits<ElseXprType>::StorageKind>::ret StorageKind; - typedef typename promote_index_type<typename traits<ElseXprType>::Index, - typename traits<ThenXprType>::Index>::type Index; - typedef typename IfXprType::Nested IfNested; - typedef typename ThenXprType::Nested ThenNested; - typedef typename ElseXprType::Nested ElseNested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename IfXprType, typename ThenXprType, typename ElseXprType> -struct eval<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, Eigen::Dense> -{ - typedef const TensorSelectOp<IfXprType, ThenXprType, ElseXprType>& type; -}; - -template<typename IfXprType, typename ThenXprType, typename ElseXprType> -struct nested<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, 1, typename eval<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >::type> -{ - typedef TensorSelectOp<IfXprType, ThenXprType, ElseXprType> type; -}; - -} // end namespace internal - - -template<typename IfXprType, typename ThenXprType, typename ElseXprType> -class TensorSelectOp : public TensorBase<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorSelectOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::promote_storage_type<typename ThenXprType::CoeffReturnType, - typename ElseXprType::CoeffReturnType>::ret CoeffReturnType; - typedef typename Eigen::internal::nested<TensorSelectOp>::type Nested; - typedef typename Eigen::internal::traits<TensorSelectOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorSelectOp>::Index Index; - - EIGEN_DEVICE_FUNC - TensorSelectOp(const IfXprType& a_condition, - const ThenXprType& a_then, - const ElseXprType& a_else) - : m_condition(a_condition), m_then(a_then), m_else(a_else) - { } - - EIGEN_DEVICE_FUNC - const IfXprType& ifExpression() const { return m_condition; } - - EIGEN_DEVICE_FUNC - const ThenXprType& thenExpression() const { return m_then; } - - EIGEN_DEVICE_FUNC - const ElseXprType& elseExpression() const { return m_else; } - - protected: - typename IfXprType::Nested m_condition; - typename ThenXprType::Nested m_then; - typename ElseXprType::Nested m_else; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_EXPR_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h deleted file mode 100644 index 08eb559..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ /dev/null @@ -1,651 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H -#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H - -// This code requires the ability to initialize arrays of constant -// values directly inside a class. -#if __cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1900 - -namespace Eigen { - -/** \class TensorFFT - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor FFT class. - * - * TODO: - * Vectorize the Cooley Tukey and the Bluestein algorithm - * Add support for multithreaded evaluation - * Improve the performance on GPU - */ - -template <bool NeedUprade> struct MakeComplex { - template <typename T> - EIGEN_DEVICE_FUNC - T operator() (const T& val) const { return val; } -}; - -template <> struct MakeComplex<true> { - template <typename T> - EIGEN_DEVICE_FUNC - std::complex<T> operator() (const T& val) const { return std::complex<T>(val, 0); } -}; - -template <> struct MakeComplex<false> { - template <typename T> - EIGEN_DEVICE_FUNC - std::complex<T> operator() (const std::complex<T>& val) const { return val; } -}; - -template <int ResultType> struct PartOf { - template <typename T> T operator() (const T& val) const { return val; } -}; - -template <> struct PartOf<RealPart> { - template <typename T> T operator() (const std::complex<T>& val) const { return val.real(); } -}; - -template <> struct PartOf<ImagPart> { - template <typename T> T operator() (const std::complex<T>& val) const { return val.imag(); } -}; - -namespace internal { -template <typename FFT, typename XprType, int FFTResultType, int FFTDir> -struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> { - typedef traits<XprType> XprTraits; - typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; - typedef typename XprTraits::Scalar InputScalar; - typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template <typename FFT, typename XprType, int FFTResultType, int FFTDirection> -struct eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, Eigen::Dense> { - typedef const TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>& type; -}; - -template <typename FFT, typename XprType, int FFTResultType, int FFTDirection> -struct nested<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, 1, typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> { - typedef TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> type; -}; - -} // end namespace internal - -template <typename FFT, typename XprType, int FFTResultType, int FFTDir> -class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> { - public: - typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; - typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; - typedef OutputScalar CoeffReturnType; - typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested; - typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorFFTOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) - : m_xpr(expr), m_fft(fft) {} - - EIGEN_DEVICE_FUNC - const FFT& fft() const { return m_fft; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& expression() const { - return m_xpr; - } - - protected: - typename XprType::Nested m_xpr; - const FFT m_fft; -}; - -// Eval as rvalue -template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir> -struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> { - typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; - typedef internal::traits<XprType> XprTraits; - typedef typename XprTraits::Scalar InputScalar; - typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; - typedef OutputScalar CoeffReturnType; - typedef typename PacketType<OutputScalar, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = true, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) { - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - eigen_assert(input_dims[i] > 0); - m_dimensions[i] = input_dims[i]; - } - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; - } - } else { - m_strides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; - } - } - m_size = m_dimensions.TotalSize(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_dimensions; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) { - m_impl.evalSubExprsIfNeeded(NULL); - if (data) { - evalToBuf(data); - return false; - } else { - m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size); - evalToBuf(m_data); - return true; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - if (m_data) { - m_device.deallocate(m_data); - m_data = NULL; - } - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { - return m_data[index]; - } - - template <int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType - packet(Index index) const { - return internal::ploadt<PacketReturnType, LoadMode>(m_data + index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } - - - private: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) { - const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value; - ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); - - for (Index i = 0; i < m_size; ++i) { - buf[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(i)); - } - - for (size_t i = 0; i < m_fft.size(); ++i) { - Index dim = m_fft[i]; - eigen_assert(dim >= 0 && dim < NumDims); - Index line_len = m_dimensions[dim]; - eigen_assert(line_len >= 1); - ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); - const bool is_power_of_two = isPowerOfTwo(line_len); - const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); - const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); - - ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); - ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); - ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); - if (!is_power_of_two) { - // Compute twiddle factors - // t_n = exp(sqrt(-1) * pi * n^2 / line_len) - // for n = 0, 1,..., line_len-1. - // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2 - pos_j_base_powered[0] = ComplexScalar(1, 0); - if (line_len > 1) { - const RealScalar pi_over_len(EIGEN_PI / line_len); - const ComplexScalar pos_j_base = ComplexScalar( - std::cos(pi_over_len), std::sin(pi_over_len)); - pos_j_base_powered[1] = pos_j_base; - if (line_len > 2) { - const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; - for (int j = 2; j < line_len + 1; ++j) { - pos_j_base_powered[j] = pos_j_base_powered[j - 1] * - pos_j_base_powered[j - 1] / - pos_j_base_powered[j - 2] * pos_j_base_sq; - } - } - } - } - - for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { - const Index base_offset = getBaseOffsetFromIndex(partial_index, dim); - - // get data into line_buf - const Index stride = m_strides[dim]; - if (stride == 1) { - memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar)); - } else { - Index offset = base_offset; - for (int j = 0; j < line_len; ++j, offset += stride) { - line_buf[j] = buf[offset]; - } - } - - // processs the line - if (is_power_of_two) { - processDataLineCooleyTukey(line_buf, line_len, log_len); - } - else { - processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered); - } - - // write back - if (FFTDir == FFT_FORWARD && stride == 1) { - memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar)); - } else { - Index offset = base_offset; - const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0); - for (int j = 0; j < line_len; ++j, offset += stride) { - buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor; - } - } - } - m_device.deallocate(line_buf); - if (!is_power_of_two) { - m_device.deallocate(a); - m_device.deallocate(b); - m_device.deallocate(pos_j_base_powered); - } - } - - if(!write_to_out) { - for (Index i = 0; i < m_size; ++i) { - data[i] = PartOf<FFTResultType>()(buf[i]); - } - m_device.deallocate(buf); - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) { - eigen_assert(x > 0); - return !(x & (x - 1)); - } - - // The composite number for padding, used in Bluestein's FFT algorithm - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) { - Index i = 2; - while (i < 2 * n - 1) i *= 2; - return i; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) { - Index log2m = 0; - while (m >>= 1) log2m++; - return log2m; - } - - // Call Cooley Tukey algorithm directly, data length must be power of 2 - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) { - eigen_assert(isPowerOfTwo(line_len)); - scramble_FFT(line_buf, line_len); - compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len); - } - - // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { - Index n = line_len; - Index m = good_composite; - ComplexScalar* data = line_buf; - - for (Index i = 0; i < n; ++i) { - if(FFTDir == FFT_FORWARD) { - a[i] = data[i] * numext::conj(pos_j_base_powered[i]); - } - else { - a[i] = data[i] * pos_j_base_powered[i]; - } - } - for (Index i = n; i < m; ++i) { - a[i] = ComplexScalar(0, 0); - } - - for (Index i = 0; i < n; ++i) { - if(FFTDir == FFT_FORWARD) { - b[i] = pos_j_base_powered[i]; - } - else { - b[i] = numext::conj(pos_j_base_powered[i]); - } - } - for (Index i = n; i < m - n; ++i) { - b[i] = ComplexScalar(0, 0); - } - for (Index i = m - n; i < m; ++i) { - if(FFTDir == FFT_FORWARD) { - b[i] = pos_j_base_powered[m-i]; - } - else { - b[i] = numext::conj(pos_j_base_powered[m-i]); - } - } - - scramble_FFT(a, m); - compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len); - - scramble_FFT(b, m); - compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len); - - for (Index i = 0; i < m; ++i) { - a[i] *= b[i]; - } - - scramble_FFT(a, m); - compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len); - - //Do the scaling after ifft - for (Index i = 0; i < m; ++i) { - a[i] /= m; - } - - for (Index i = 0; i < n; ++i) { - if(FFTDir == FFT_FORWARD) { - data[i] = a[i] * numext::conj(pos_j_base_powered[i]); - } - else { - data[i] = a[i] * pos_j_base_powered[i]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) { - eigen_assert(isPowerOfTwo(n)); - Index j = 1; - for (Index i = 1; i < n; ++i){ - if (j > i) { - std::swap(data[j-1], data[i-1]); - } - Index m = n >> 1; - while (m >= 2 && j > m) { - j -= m; - m >>= 1; - } - j += m; - } - } - - template <int Dir> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) { - ComplexScalar tmp = data[1]; - data[1] = data[0] - data[1]; - data[0] += tmp; - } - - template <int Dir> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) { - ComplexScalar tmp[4]; - tmp[0] = data[0] + data[1]; - tmp[1] = data[0] - data[1]; - tmp[2] = data[2] + data[3]; - if (Dir == FFT_FORWARD) { - tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]); - } else { - tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]); - } - data[0] = tmp[0] + tmp[2]; - data[1] = tmp[1] + tmp[3]; - data[2] = tmp[0] - tmp[2]; - data[3] = tmp[1] - tmp[3]; - } - - template <int Dir> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) { - ComplexScalar tmp_1[8]; - ComplexScalar tmp_2[8]; - - tmp_1[0] = data[0] + data[1]; - tmp_1[1] = data[0] - data[1]; - tmp_1[2] = data[2] + data[3]; - if (Dir == FFT_FORWARD) { - tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1); - } else { - tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1); - } - tmp_1[4] = data[4] + data[5]; - tmp_1[5] = data[4] - data[5]; - tmp_1[6] = data[6] + data[7]; - if (Dir == FFT_FORWARD) { - tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1); - } else { - tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1); - } - tmp_2[0] = tmp_1[0] + tmp_1[2]; - tmp_2[1] = tmp_1[1] + tmp_1[3]; - tmp_2[2] = tmp_1[0] - tmp_1[2]; - tmp_2[3] = tmp_1[1] - tmp_1[3]; - tmp_2[4] = tmp_1[4] + tmp_1[6]; -// SQRT2DIV2 = sqrt(2)/2 -#define SQRT2DIV2 0.7071067811865476 - if (Dir == FFT_FORWARD) { - tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2); - tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1); - tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2); - } else { - tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2); - tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1); - tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2); - } - data[0] = tmp_2[0] + tmp_2[4]; - data[1] = tmp_2[1] + tmp_2[5]; - data[2] = tmp_2[2] + tmp_2[6]; - data[3] = tmp_2[3] + tmp_2[7]; - data[4] = tmp_2[0] - tmp_2[4]; - data[5] = tmp_2[1] - tmp_2[5]; - data[6] = tmp_2[2] - tmp_2[6]; - data[7] = tmp_2[3] - tmp_2[7]; - } - - template <int Dir> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge( - ComplexScalar* data, Index n, Index n_power_of_2) { - // Original code: - // RealScalar wtemp = std::sin(M_PI/n); - // RealScalar wpi = -std::sin(2 * M_PI/n); - const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2]; - const RealScalar wpi = (Dir == FFT_FORWARD) - ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2] - : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; - - const ComplexScalar wp(wtemp, wpi); - const ComplexScalar wp_one = wp + ComplexScalar(1, 0); - const ComplexScalar wp_one_2 = wp_one * wp_one; - const ComplexScalar wp_one_3 = wp_one_2 * wp_one; - const ComplexScalar wp_one_4 = wp_one_3 * wp_one; - const Index n2 = n / 2; - ComplexScalar w(1.0, 0.0); - for (Index i = 0; i < n2; i += 4) { - ComplexScalar temp0(data[i + n2] * w); - ComplexScalar temp1(data[i + 1 + n2] * w * wp_one); - ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2); - ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3); - w = w * wp_one_4; - - data[i + n2] = data[i] - temp0; - data[i] += temp0; - - data[i + 1 + n2] = data[i + 1] - temp1; - data[i + 1] += temp1; - - data[i + 2 + n2] = data[i + 2] - temp2; - data[i + 2] += temp2; - - data[i + 3 + n2] = data[i + 3] - temp3; - data[i + 3] += temp3; - } - } - - template <int Dir> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly( - ComplexScalar* data, Index n, Index n_power_of_2) { - eigen_assert(isPowerOfTwo(n)); - if (n > 8) { - compute_1D_Butterfly<Dir>(data, n / 2, n_power_of_2 - 1); - compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1); - butterfly_1D_merge<Dir>(data, n, n_power_of_2); - } else if (n == 8) { - butterfly_8<Dir>(data); - } else if (n == 4) { - butterfly_4<Dir>(data); - } else if (n == 2) { - butterfly_2<Dir>(data); - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const { - Index result = 0; - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > omitted_dim; --i) { - const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; - const Index idx = index / partial_m_stride; - index -= idx * partial_m_stride; - result += idx * m_strides[i]; - } - result += index; - } - else { - for (Index i = 0; i < omitted_dim; ++i) { - const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; - const Index idx = index / partial_m_stride; - index -= idx * partial_m_stride; - result += idx * m_strides[i]; - } - result += index; - } - // Value of index_coords[omitted_dim] is not determined to this step - return result; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const { - Index result = base + offset * m_strides[omitted_dim] ; - return result; - } - - protected: - Index m_size; - const FFT& m_fft; - Dimensions m_dimensions; - array<Index, NumDims> m_strides; - TensorEvaluator<ArgType, Device> m_impl; - CoeffReturnType* m_data; - const Device& m_device; - - // This will support a maximum FFT size of 2^32 for each dimension - // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; - const RealScalar m_sin_PI_div_n_LUT[32] = { - RealScalar(0.0), - RealScalar(-2), - RealScalar(-0.999999999999999), - RealScalar(-0.292893218813453), - RealScalar(-0.0761204674887130), - RealScalar(-0.0192147195967696), - RealScalar(-0.00481527332780311), - RealScalar(-0.00120454379482761), - RealScalar(-3.01181303795779e-04), - RealScalar(-7.52981608554592e-05), - RealScalar(-1.88247173988574e-05), - RealScalar(-4.70619042382852e-06), - RealScalar(-1.17654829809007e-06), - RealScalar(-2.94137117780840e-07), - RealScalar(-7.35342821488550e-08), - RealScalar(-1.83835707061916e-08), - RealScalar(-4.59589268710903e-09), - RealScalar(-1.14897317243732e-09), - RealScalar(-2.87243293150586e-10), - RealScalar( -7.18108232902250e-11), - RealScalar(-1.79527058227174e-11), - RealScalar(-4.48817645568941e-12), - RealScalar(-1.12204411392298e-12), - RealScalar(-2.80511028480785e-13), - RealScalar(-7.01277571201985e-14), - RealScalar(-1.75319392800498e-14), - RealScalar(-4.38298482001247e-15), - RealScalar(-1.09574620500312e-15), - RealScalar(-2.73936551250781e-16), - RealScalar(-6.84841378126949e-17), - RealScalar(-1.71210344531737e-17), - RealScalar(-4.28025861329343e-18) - }; - - // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); - const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { - RealScalar(0.0), - RealScalar(0.0), - RealScalar(-1.00000000000000e+00), - RealScalar(-7.07106781186547e-01), - RealScalar(-3.82683432365090e-01), - RealScalar(-1.95090322016128e-01), - RealScalar(-9.80171403295606e-02), - RealScalar(-4.90676743274180e-02), - RealScalar(-2.45412285229123e-02), - RealScalar(-1.22715382857199e-02), - RealScalar(-6.13588464915448e-03), - RealScalar(-3.06795676296598e-03), - RealScalar(-1.53398018628477e-03), - RealScalar(-7.66990318742704e-04), - RealScalar(-3.83495187571396e-04), - RealScalar(-1.91747597310703e-04), - RealScalar(-9.58737990959773e-05), - RealScalar(-4.79368996030669e-05), - RealScalar(-2.39684498084182e-05), - RealScalar(-1.19842249050697e-05), - RealScalar(-5.99211245264243e-06), - RealScalar(-2.99605622633466e-06), - RealScalar(-1.49802811316901e-06), - RealScalar(-7.49014056584716e-07), - RealScalar(-3.74507028292384e-07), - RealScalar(-1.87253514146195e-07), - RealScalar(-9.36267570730981e-08), - RealScalar(-4.68133785365491e-08), - RealScalar(-2.34066892682746e-08), - RealScalar(-1.17033446341373e-08), - RealScalar(-5.85167231706864e-09), - RealScalar(-2.92583615853432e-09) - }; -}; - -} // end namespace Eigen - -#endif // EIGEN_HAS_CONSTEXPR - - -#endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h deleted file mode 100644 index fcee5f6..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ /dev/null @@ -1,389 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H -#define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H - -namespace Eigen { - -/** \class TensorFixedSize - * \ingroup CXX11_Tensor_Module - * - * \brief The fixed sized version of the tensor class. - * - * The fixed sized equivalent of - * Eigen::Tensor<float, 3> t(3, 5, 7); - * is - * Eigen::TensorFixedSize<float, Size<3,5,7>> t; - */ - -template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType> -class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> > -{ - public: - typedef TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> Self; - typedef TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> > Base; - typedef typename Eigen::internal::nested<Self>::type Nested; - typedef typename internal::traits<Self>::StorageKind StorageKind; - typedef typename internal::traits<Self>::Index Index; - typedef Scalar_ Scalar; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename Base::CoeffReturnType CoeffReturnType; - - static const int Options = Options_; - - enum { - IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0), - Layout = Options_ & RowMajor ? RowMajor : ColMajor, - CoordAccess = true, - RawAccess = true - }; - - typedef Dimensions_ Dimensions; - static const std::size_t NumIndices = Dimensions::count; - - protected: - TensorStorage<Scalar, Dimensions, Options> m_storage; - - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } - - // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - // work, because that uses base().coeffRef() - and we don't yet - // implement a similar class hierarchy - inline Self& base() { return *this; } - inline const Self& base() const { return *this; } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeff(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - } -#endif - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const - { - eigen_internal_assert(checkIndexRange(indices)); - return m_storage.data()[linearizedIndex(indices)]; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return m_storage.data()[index]; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& coeff() const - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return m_storage.data()[0]; - } - - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeffRef(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - } -#endif - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) - { - eigen_internal_assert(checkIndexRange(indices)); - return m_storage.data()[linearizedIndex(indices)]; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - eigen_internal_assert(index >= 0 && index < size()); - return m_storage.data()[index]; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return m_storage.data()[0]; - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const - { - if (Options&RowMajor) { - const Index index = i1 + i0 * m_storage.dimensions()[1]; - return m_storage.data()[index]; - } else { - const Index index = i0 + i1 * m_storage.dimensions()[0]; - return m_storage.data()[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const - { - if (Options&RowMajor) { - const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); - return m_storage.data()[index]; - } else { - const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); - return m_storage.data()[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const - { - if (Options&RowMajor) { - const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); - return m_storage.data()[index]; - } else { - const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); - return m_storage.data()[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const - { - if (Options&RowMajor) { - const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); - return m_storage.data()[index]; - } else { - const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); - return m_storage.data()[index]; - } - } -#endif - - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const - { - eigen_assert(checkIndexRange(indices)); - return coeff(indices); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return coeff(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()() const - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeff(); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const - { - // The bracket operator is only for vectors, use the parenthesis operator instead. - EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeff(index); - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) - { - // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}}); - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) - { - if (Options&RowMajor) { - const Index index = i1 + i0 * m_storage.dimensions()[1]; - return m_storage.data()[index]; - } else { - const Index index = i0 + i1 * m_storage.dimensions()[0]; - return m_storage.data()[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) - { - if (Options&RowMajor) { - const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); - return m_storage.data()[index]; - } else { - const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); - return m_storage.data()[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) - { - if (Options&RowMajor) { - const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); - return m_storage.data()[index]; - } else { - const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); - return m_storage.data()[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) - { - if (Options&RowMajor) { - const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); - return m_storage.data()[index]; - } else { - const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); - return m_storage.data()[index]; - } - } -#endif - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices) - { - eigen_assert(checkIndexRange(indices)); - return coeffRef(indices); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index index) - { - eigen_assert(index >= 0 && index < size()); - return coeffRef(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - return coeffRef(); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator[](Index index) - { - // The bracket operator is only for vectors, use the parenthesis operator instead - EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - return coeffRef(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorFixedSize() - : m_storage() - { - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorFixedSize(const Self& other) - : m_storage(other.m_storage) - { - } - -#if EIGEN_HAS_RVALUE_REFERENCES - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize(Self&& other) - : m_storage(other.m_storage) - { - } -#endif - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase<OtherDerived, ReadOnlyAccessors>& other) - { - typedef TensorAssignOp<TensorFixedSize, const OtherDerived> Assign; - Assign assign(*this, other.derived()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - } - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase<OtherDerived, WriteAccessors>& other) - { - typedef TensorAssignOp<TensorFixedSize, const OtherDerived> Assign; - Assign assign(*this, other.derived()); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorFixedSize& operator=(const TensorFixedSize& other) - { - // FIXME: check that the dimensions of other match the dimensions of *this. - // Unfortunately this isn't possible yet when the rhs is an expression. - typedef TensorAssignOp<Self, const TensorFixedSize> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorFixedSize& operator=(const OtherDerived& other) - { - // FIXME: check that the dimensions of other match the dimensions of *this. - // Unfortunately this isn't possible yet when the rhs is an expression. - typedef TensorAssignOp<Self, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - protected: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE bool checkIndexRange(const array<Index, NumIndices>& /*indices*/) const - { - using internal::array_apply_and_reduce; - using internal::array_zip_and_reduce; - using internal::greater_equal_zero_op; - using internal::logical_and_op; - using internal::lesser_op; - - return true; - // check whether the indices are all >= 0 - /* array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) && - // check whether the indices fit in the dimensions - array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions());*/ - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const - { - if (Options&RowMajor) { - return m_storage.dimensions().IndexOfRowMajor(indices); - } else { - return m_storage.dimensions().IndexOfColMajor(indices); - } - } -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h deleted file mode 100644 index 8bece4e..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ /dev/null @@ -1,169 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H -#define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H - -namespace Eigen { - -namespace internal { -template<typename XprType, template <class> class MakePointer_> -struct traits<TensorForcedEvalOp<XprType, MakePointer_> > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename traits<XprType>::StorageKind StorageKind; - typedef typename traits<XprType>::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; - - enum { - Flags = 0 - }; - template <class T> struct MakePointer { - // Intermediate typedef to workaround MSVC issue. - typedef MakePointer_<T> MakePointerT; - typedef typename MakePointerT::Type Type; - }; -}; - -template<typename XprType, template <class> class MakePointer_> -struct eval<TensorForcedEvalOp<XprType, MakePointer_>, Eigen::Dense> -{ - typedef const TensorForcedEvalOp<XprType, MakePointer_>& type; -}; - -template<typename XprType, template <class> class MakePointer_> -struct nested<TensorForcedEvalOp<XprType, MakePointer_>, 1, typename eval<TensorForcedEvalOp<XprType, MakePointer_> >::type> -{ - typedef TensorForcedEvalOp<XprType, MakePointer_> type; -}; - -} // end namespace internal - - - -// FIXME use proper doxygen documentation (e.g. \tparam MakePointer_) - -/** \class TensorForcedEvalOp - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor reshaping class. - * - * - */ -/// `template <class> class MakePointer_` is added to convert the host pointer to the device pointer. -/// It is added due to the fact that for our device compiler `T*` is not allowed. -/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer `T`. -/// This is done through our `MakePointer_` class. By default the Type in the `MakePointer_<T>` is `T*` . -/// Therefore, by adding the default value, we managed to convert the type and it does not break any -/// existing code as its default value is `T*`. -template<typename XprType, template <class> class MakePointer_> -class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType, MakePointer_>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type Nested; - typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType& expr) - : m_xpr(expr) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; -}; - - -template<typename ArgType, typename Device, template <class> class MakePointer_> -struct TensorEvaluator<const TensorForcedEvalOp<ArgType, MakePointer_>, Device> -{ - typedef TensorForcedEvalOp<ArgType, MakePointer_> XprType; - typedef typename ArgType::Scalar Scalar; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - typedef typename XprType::Index Index; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = true, - PacketAccess = (PacketSize > 1), - Layout = TensorEvaluator<ArgType, Device>::Layout, - RawAccess = true - }; - - EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - /// op_ is used for sycl - : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) - { } - - EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - const Index numValues = internal::array_prod(m_impl.dimensions()); - m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); - // Should initialize the memory in case we're dealing with non POD types. - if (NumTraits<CoeffReturnType>::RequireInitialization) { - for (Index i = 0; i < numValues; ++i) { - new(m_buffer+i) CoeffReturnType(); - } - } - typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo; - EvalTo evalToTmp(m_buffer, m_op); - const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value; - internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_device.deallocate(m_buffer); - m_buffer = NULL; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_buffer[index]; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC typename MakePointer<Scalar>::Type data() const { return m_buffer; } - - /// required by sycl in order to extract the sycl accessor - const TensorEvaluator<ArgType, Device>& impl() { return m_impl; } - /// used by sycl in order to build the sycl buffer - const Device& device() const{return m_device;} - private: - TensorEvaluator<ArgType, Device> m_impl; - const ArgType m_op; - const Device& m_device; - typename MakePointer<CoeffReturnType>::Type m_buffer; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h deleted file mode 100644 index 52b803d..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ /dev/null @@ -1,109 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H -#define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H - -namespace Eigen { - -// MakePointer class is used as a container of the adress space of the pointer -// on the host and on the device. From the host side it generates the T* pointer -// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to -// T* m_data on the host. It is always called on the device. -// Specialisation of MakePointer class for creating the sycl buffer with -// map_allocator. -template<typename T> struct MakePointer { - typedef T* Type; -}; - -template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap; -template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor; -template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize; -template<typename PlainObjectType> class TensorRef; -template<typename Derived, int AccessLevel> class TensorBase; - -template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryOp; -template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp; -template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp; -template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> class TensorCwiseTernaryOp; -template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp; -template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_ = MakePointer > class TensorReductionOp; -template<typename XprType> class TensorIndexTupleOp; -template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp; -template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp; -template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp; -template<typename TargetType, typename XprType> class TensorConversionOp; -template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp; -template<typename FFT, typename XprType, int FFTDataType, int FFTDirection> class TensorFFTOp; -template<typename PatchDim, typename XprType> class TensorPatchOp; -template<DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorImagePatchOp; -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorVolumePatchOp; -template<typename Broadcast, typename XprType> class TensorBroadcastingOp; -template<DenseIndex DimId, typename XprType> class TensorChippingOp; -template<typename NewDimensions, typename XprType> class TensorReshapingOp; -template<typename XprType> class TensorLayoutSwapOp; -template<typename StartIndices, typename Sizes, typename XprType> class TensorSlicingOp; -template<typename ReverseDimensions, typename XprType> class TensorReverseOp; -template<typename PaddingDimensions, typename XprType> class TensorPaddingOp; -template<typename Shuffle, typename XprType> class TensorShufflingOp; -template<typename Strides, typename XprType> class TensorStridingOp; -template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> class TensorStridingSlicingOp; -template<typename Strides, typename XprType> class TensorInflationOp; -template<typename Generator, typename XprType> class TensorGeneratorOp; -template<typename LeftXprType, typename RightXprType> class TensorAssignOp; -template<typename Op, typename XprType> class TensorScanOp; - -template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp; -template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp; - -template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp; -template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorForcedEvalOp; - -template<typename ExpressionType, typename DeviceType> class TensorDevice; -template<typename Derived, typename Device> struct TensorEvaluator; - -struct DefaultDevice; -struct ThreadPoolDevice; -struct GpuDevice; -struct SyclDevice; - -enum FFTResultType { - RealPart = 0, - ImagPart = 1, - BothParts = 2 -}; - -enum FFTDirection { - FFT_FORWARD = 0, - FFT_REVERSE = 1 -}; - - -namespace internal { - -template <typename Device, typename Expression> -struct IsVectorizable { - static const bool value = TensorEvaluator<Expression, Device>::PacketAccess; -}; - -template <typename Expression> -struct IsVectorizable<GpuDevice, Expression> { - static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess && - TensorEvaluator<Expression, GpuDevice>::IsAligned; -}; - -template <typename Expression, typename Device, - bool Vectorizable = IsVectorizable<Device, Expression>::value> -class TensorExecutor; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h deleted file mode 100644 index d73f6dc..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ /dev/null @@ -1,489 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H -#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H - -namespace Eigen { -namespace internal { - - -/** \internal - * \brief Template functor to compute the modulo between an array and a scalar. - */ -template <typename Scalar> -struct scalar_mod_op { - EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {} - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; } - const Scalar m_divisor; -}; -template <typename Scalar> -struct functor_traits<scalar_mod_op<Scalar> > -{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; - - -/** \internal - * \brief Template functor to compute the modulo between 2 arrays. - */ -template <typename Scalar> -struct scalar_mod2_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op); - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; } -}; -template <typename Scalar> -struct functor_traits<scalar_mod2_op<Scalar> > -{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; - -template <typename Scalar> -struct scalar_fmod_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op); - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar - operator()(const Scalar& a, const Scalar& b) const { - return numext::fmod(a, b); - } -}; -template <typename Scalar> -struct functor_traits<scalar_fmod_op<Scalar> > { - enum { Cost = 13, // Reciprocal throughput of FPREM on Haswell. - PacketAccess = false }; -}; - - -/** \internal - * \brief Template functor to compute the sigmoid of a scalar - * \sa class CwiseUnaryOp, ArrayBase::sigmoid() - */ -template <typename T> -struct scalar_sigmoid_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { - const T one = T(1); - return one / (one + numext::exp(-x)); - } - - template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Packet packetOp(const Packet& x) const { - const Packet one = pset1<Packet>(T(1)); - return pdiv(one, padd(one, pexp(pnegate(x)))); - } -}; - -template <typename T> -struct functor_traits<scalar_sigmoid_op<T> > { - enum { - Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6, - PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv && - packet_traits<T>::HasNegate && packet_traits<T>::HasExp - }; -}; - - -template<typename Reducer, typename Device> -struct reducer_traits { - enum { - Cost = 1, - PacketAccess = false - }; -}; - -// Standard reduction functors -template <typename T> struct SumReducer -{ - static const bool PacketAccess = packet_traits<T>::HasAdd; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - internal::scalar_sum_op<T> sum_op; - *accum = sum_op(*accum, t); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { - (*accum) = padd<Packet>(*accum, p); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - internal::scalar_cast_op<int, T> conv; - return conv(0); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(initialize()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - return accum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return vaccum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - internal::scalar_sum_op<T> sum_op; - return sum_op(saccum, predux(vaccum)); - } -}; - -template <typename T, typename Device> -struct reducer_traits<SumReducer<T>, Device> { - enum { - Cost = NumTraits<T>::AddCost, - PacketAccess = PacketType<T, Device>::HasAdd - }; -}; - - -template <typename T> struct MeanReducer -{ - static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger; - static const bool IsStateful = true; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - MeanReducer() : scalarCount_(0), packetCount_(0) { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { - internal::scalar_sum_op<T> sum_op; - *accum = sum_op(*accum, t); - scalarCount_++; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { - (*accum) = padd<Packet>(*accum, p); - packetCount_++; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - internal::scalar_cast_op<int, T> conv; - return conv(0); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(initialize()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - return accum / scalarCount_; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return pdiv(vaccum, pset1<Packet>(packetCount_)); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - internal::scalar_sum_op<T> sum_op; - return sum_op(saccum, predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size); - } - - protected: - DenseIndex scalarCount_; - DenseIndex packetCount_; -}; - -template <typename T, typename Device> -struct reducer_traits<MeanReducer<T>, Device> { - enum { - Cost = NumTraits<T>::AddCost, - PacketAccess = PacketType<T, Device>::HasAdd - }; -}; - - -template <typename T, bool IsMax = true, bool IsInteger = true> -struct MinMaxBottomValue { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { - return Eigen::NumTraits<T>::lowest(); - } -}; -template <typename T> -struct MinMaxBottomValue<T, true, false> { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { - return -Eigen::NumTraits<T>::infinity(); - } -}; -template <typename T> -struct MinMaxBottomValue<T, false, true> { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { - return Eigen::NumTraits<T>::highest(); - } -}; -template <typename T> -struct MinMaxBottomValue<T, false, false> { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { - return Eigen::NumTraits<T>::infinity(); - } -}; - - -template <typename T> struct MaxReducer -{ - static const bool PacketAccess = packet_traits<T>::HasMax; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - if (t > *accum) { *accum = t; } - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { - (*accum) = pmax<Packet>(*accum, p); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return MinMaxBottomValue<T, true, Eigen::NumTraits<T>::IsInteger>::bottom_value(); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(initialize()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - return accum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return vaccum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return numext::maxi(saccum, predux_max(vaccum)); - } -}; - -template <typename T, typename Device> -struct reducer_traits<MaxReducer<T>, Device> { - enum { - Cost = NumTraits<T>::AddCost, - PacketAccess = PacketType<T, Device>::HasMax - }; -}; - - -template <typename T> struct MinReducer -{ - static const bool PacketAccess = packet_traits<T>::HasMin; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - if (t < *accum) { *accum = t; } - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { - (*accum) = pmin<Packet>(*accum, p); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return MinMaxBottomValue<T, false, Eigen::NumTraits<T>::IsInteger>::bottom_value(); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(initialize()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - return accum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return vaccum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return numext::mini(saccum, predux_min(vaccum)); - } -}; - -template <typename T, typename Device> -struct reducer_traits<MinReducer<T>, Device> { - enum { - Cost = NumTraits<T>::AddCost, - PacketAccess = PacketType<T, Device>::HasMin - }; -}; - - -template <typename T> struct ProdReducer -{ - static const bool PacketAccess = packet_traits<T>::HasMul; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - internal::scalar_product_op<T> prod_op; - (*accum) = prod_op(*accum, t); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { - (*accum) = pmul<Packet>(*accum, p); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - internal::scalar_cast_op<int, T> conv; - return conv(1); - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(initialize()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - return accum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return vaccum; - } - template <typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - internal::scalar_product_op<T> prod_op; - return prod_op(saccum, predux_mul(vaccum)); - } -}; - -template <typename T, typename Device> -struct reducer_traits<ProdReducer<T>, Device> { - enum { - Cost = NumTraits<T>::MulCost, - PacketAccess = PacketType<T, Device>::HasMul - }; -}; - - -struct AndReducer -{ - static const bool PacketAccess = false; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { - *accum = *accum && t; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { - return accum; - } -}; - -template <typename Device> -struct reducer_traits<AndReducer, Device> { - enum { - Cost = 1, - PacketAccess = false - }; -}; - - -struct OrReducer { - static const bool PacketAccess = false; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { - *accum = *accum || t; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { - return false; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { - return accum; - } -}; - -template <typename Device> -struct reducer_traits<OrReducer, Device> { - enum { - Cost = 1, - PacketAccess = false - }; -}; - - -// Argmin/Argmax reducers -template <typename T> struct ArgMaxTupleReducer -{ - static const bool PacketAccess = false; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - if (t.second > accum->second) { *accum = t; } - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return T(0, NumTraits<typename T::second_type>::lowest()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { - return accum; - } -}; - -template <typename T, typename Device> -struct reducer_traits<ArgMaxTupleReducer<T>, Device> { - enum { - Cost = NumTraits<T>::AddCost, - PacketAccess = false - }; -}; - - -template <typename T> struct ArgMinTupleReducer -{ - static const bool PacketAccess = false; - static const bool IsStateful = false; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const { - if (t.second < accum->second) { *accum = t; } - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return T(0, NumTraits<typename T::second_type>::highest()); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { - return accum; - } -}; - -template <typename T, typename Device> -struct reducer_traits<ArgMinTupleReducer<T>, Device> { - enum { - Cost = NumTraits<T>::AddCost, - PacketAccess = false - }; -}; - - -template <typename T, typename Index, size_t NumDims> -class GaussianGenerator { - public: - static const bool PacketAccess = false; - - EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means, - const array<T, NumDims>& std_devs) - : m_means(means) - { - for (size_t i = 0; i < NumDims; ++i) { - m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2; - } - } - - EIGEN_DEVICE_FUNC T operator()(const array<Index, NumDims>& coordinates) const { - T tmp = T(0); - for (size_t i = 0; i < NumDims; ++i) { - T offset = coordinates[i] - m_means[i]; - tmp += offset * offset / m_two_sigmas[i]; - } - return numext::exp(-tmp); - } - - private: - array<T, NumDims> m_means; - array<T, NumDims> m_two_sigmas; -}; - -template <typename T, typename Index, size_t NumDims> -struct functor_traits<GaussianGenerator<T, Index, NumDims> > { - enum { - Cost = NumDims * (2 * NumTraits<T>::AddCost + NumTraits<T>::MulCost + - functor_traits<scalar_quotient_op<T, T> >::Cost) + - functor_traits<scalar_exp_op<T> >::Cost, - PacketAccess = GaussianGenerator<T, Index, NumDims>::PacketAccess - }; -}; - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h deleted file mode 100644 index e27753b..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ /dev/null @@ -1,185 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H -#define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H - -namespace Eigen { - -/** \class TensorGeneratorOp - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor generator class. - * - * - */ -namespace internal { -template<typename Generator, typename XprType> -struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Generator, typename XprType> -struct eval<TensorGeneratorOp<Generator, XprType>, Eigen::Dense> -{ - typedef const TensorGeneratorOp<Generator, XprType>& type; -}; - -template<typename Generator, typename XprType> -struct nested<TensorGeneratorOp<Generator, XprType>, 1, typename eval<TensorGeneratorOp<Generator, XprType> >::type> -{ - typedef TensorGeneratorOp<Generator, XprType> type; -}; - -} // end namespace internal - - - -template<typename Generator, typename XprType> -class TensorGeneratorOp : public TensorBase<TensorGeneratorOp<Generator, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorGeneratorOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorGeneratorOp>::type Nested; - typedef typename Eigen::internal::traits<TensorGeneratorOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorGeneratorOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType& expr, const Generator& generator) - : m_xpr(expr), m_generator(generator) {} - - EIGEN_DEVICE_FUNC - const Generator& generator() const { return m_generator; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const Generator m_generator; -}; - - -// Eval as rvalue -template<typename Generator, typename ArgType, typename Device> -struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device> -{ - typedef TensorGeneratorOp<Generator, ArgType> XprType; - typedef typename XprType::Index Index; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; - static const int NumDims = internal::array_size<Dimensions>::value; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - enum { - IsAligned = false, - PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_generator(op.generator()) - { - TensorEvaluator<ArgType, Device> impl(op.expression(), device); - m_dimensions = impl.dimensions(); - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; - } - } else { - m_strides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - array<Index, NumDims> coords; - extract_coordinates(index, coords); - return m_generator(coords); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); - - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool) const { - // TODO(rmlarsen): This is just a placeholder. Define interface to make - // generators return their cost. - return TensorOpCost(0, 0, TensorOpCost::AddCost<Scalar>() + - TensorOpCost::MulCost<Scalar>()); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void extract_coordinates(Index index, array<Index, NumDims>& coords) const { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_strides[i]; - index -= idx * m_strides[i]; - coords[i] = idx; - } - coords[0] = index; - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_strides[i]; - index -= idx * m_strides[i]; - coords[i] = idx; - } - coords[NumDims-1] = index; - } - } - - Dimensions m_dimensions; - array<Index, NumDims> m_strides; - Generator m_generator; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h deleted file mode 100644 index 665b861..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h +++ /dev/null @@ -1,33 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H -#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H - -namespace Eigen { - -/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors. - * - * This function computes the regularized incomplete beta function (integral). - * - */ -template <typename ADerived, typename BDerived, typename XDerived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const - TensorCwiseTernaryOp<internal::scalar_betainc_op<typename XDerived::Scalar>, - const ADerived, const BDerived, const XDerived> - betainc(const ADerived& a, const BDerived& b, const XDerived& x) { - return TensorCwiseTernaryOp< - internal::scalar_betainc_op<typename XDerived::Scalar>, const ADerived, - const BDerived, const XDerived>( - a, b, x, internal::scalar_betainc_op<typename XDerived::Scalar>()); -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h deleted file mode 100644 index a901c5d..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ /dev/null @@ -1,79 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H -#define EIGEN_CXX11_TENSOR_TENSOR_IO_H - -namespace Eigen { - -namespace internal { - -// Print the tensor as a 2d matrix -template <typename Tensor, int Rank> -struct TensorPrinter { - static void run (std::ostream& os, const Tensor& tensor) { - typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar; - typedef typename Tensor::Index Index; - const Index total_size = internal::array_prod(tensor.dimensions()); - if (total_size > 0) { - const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); - static const int layout = Tensor::Layout; - Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim); - os << matrix; - } - } -}; - - -// Print the tensor as a vector -template <typename Tensor> -struct TensorPrinter<Tensor, 1> { - static void run (std::ostream& os, const Tensor& tensor) { - typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar; - typedef typename Tensor::Index Index; - const Index total_size = internal::array_prod(tensor.dimensions()); - if (total_size > 0) { - Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size); - os << array; - } - } -}; - - -// Print the tensor as a scalar -template <typename Tensor> -struct TensorPrinter<Tensor, 0> { - static void run (std::ostream& os, const Tensor& tensor) { - os << tensor.coeff(0); - } -}; -} - -template <typename T> -std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccessors>& expr) { - typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator; - typedef typename Evaluator::Dimensions Dimensions; - - // Evaluate the expression if needed - TensorForcedEvalOp<const T> eval = expr.eval(); - Evaluator tensor(eval, DefaultDevice()); - tensor.evalSubExprsIfNeeded(NULL); - - // Print the result - static const int rank = internal::array_size<Dimensions>::value; - internal::TensorPrinter<Evaluator, rank>::run(os, tensor); - - // Cleanup. - tensor.cleanup(); - return os; -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h deleted file mode 100644 index 566856e..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ /dev/null @@ -1,509 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H -#define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H - -namespace Eigen { - -/** \class TensorImagePatch - * \ingroup CXX11_Tensor_Module - * - * \brief Patch extraction specialized for image processing. - * This assumes that the input has a least 3 dimensions ordered as follow: - * 1st dimension: channels (of size d) - * 2nd dimension: rows (of size r) - * 3rd dimension: columns (of size c) - * There can be additional dimensions such as time (for video) or batch (for - * bulk processing after the first 3. - * Calling the image patch code with patch_rows and patch_cols is equivalent - * to calling the regular patch extraction code with parameters d, patch_rows, - * patch_cols, and 1 for all the additional dimensions. - */ -namespace internal { -template<DenseIndex Rows, DenseIndex Cols, typename XprType> -struct traits<TensorImagePatchOp<Rows, Cols, XprType> > : public traits<XprType> -{ - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions + 1; - static const int Layout = XprTraits::Layout; -}; - -template<DenseIndex Rows, DenseIndex Cols, typename XprType> -struct eval<TensorImagePatchOp<Rows, Cols, XprType>, Eigen::Dense> -{ - typedef const TensorImagePatchOp<Rows, Cols, XprType>& type; -}; - -template<DenseIndex Rows, DenseIndex Cols, typename XprType> -struct nested<TensorImagePatchOp<Rows, Cols, XprType>, 1, typename eval<TensorImagePatchOp<Rows, Cols, XprType> >::type> -{ - typedef TensorImagePatchOp<Rows, Cols, XprType> type; -}; - -} // end namespace internal - -template<DenseIndex Rows, DenseIndex Cols, typename XprType> -class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorImagePatchOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorImagePatchOp>::type Nested; - typedef typename Eigen::internal::traits<TensorImagePatchOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorImagePatchOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, - DenseIndex row_strides, DenseIndex col_strides, - DenseIndex in_row_strides, DenseIndex in_col_strides, - DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, - PaddingType padding_type, Scalar padding_value) - : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), - m_row_strides(row_strides), m_col_strides(col_strides), - m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), - m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), - m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), - m_padding_type(padding_type), m_padding_value(padding_value) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, - DenseIndex row_strides, DenseIndex col_strides, - DenseIndex in_row_strides, DenseIndex in_col_strides, - DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, - DenseIndex padding_top, DenseIndex padding_bottom, - DenseIndex padding_left, DenseIndex padding_right, - Scalar padding_value) - : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), - m_row_strides(row_strides), m_col_strides(col_strides), - m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), - m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), - m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom), - m_padding_left(padding_left), m_padding_right(padding_right), - m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} - - EIGEN_DEVICE_FUNC - DenseIndex patch_rows() const { return m_patch_rows; } - EIGEN_DEVICE_FUNC - DenseIndex patch_cols() const { return m_patch_cols; } - EIGEN_DEVICE_FUNC - DenseIndex row_strides() const { return m_row_strides; } - EIGEN_DEVICE_FUNC - DenseIndex col_strides() const { return m_col_strides; } - EIGEN_DEVICE_FUNC - DenseIndex in_row_strides() const { return m_in_row_strides; } - EIGEN_DEVICE_FUNC - DenseIndex in_col_strides() const { return m_in_col_strides; } - EIGEN_DEVICE_FUNC - DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } - EIGEN_DEVICE_FUNC - DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } - EIGEN_DEVICE_FUNC - bool padding_explicit() const { return m_padding_explicit; } - EIGEN_DEVICE_FUNC - DenseIndex padding_top() const { return m_padding_top; } - EIGEN_DEVICE_FUNC - DenseIndex padding_bottom() const { return m_padding_bottom; } - EIGEN_DEVICE_FUNC - DenseIndex padding_left() const { return m_padding_left; } - EIGEN_DEVICE_FUNC - DenseIndex padding_right() const { return m_padding_right; } - EIGEN_DEVICE_FUNC - PaddingType padding_type() const { return m_padding_type; } - EIGEN_DEVICE_FUNC - Scalar padding_value() const { return m_padding_value; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const DenseIndex m_patch_rows; - const DenseIndex m_patch_cols; - const DenseIndex m_row_strides; - const DenseIndex m_col_strides; - const DenseIndex m_in_row_strides; - const DenseIndex m_in_col_strides; - const DenseIndex m_row_inflate_strides; - const DenseIndex m_col_inflate_strides; - const bool m_padding_explicit; - const DenseIndex m_padding_top; - const DenseIndex m_padding_bottom; - const DenseIndex m_padding_left; - const DenseIndex m_padding_right; - const PaddingType m_padding_type; - const Scalar m_padding_value; -}; - -// Eval as rvalue -template<DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device> -struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device> -{ - typedef TensorImagePatchOp<Rows, Cols, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - static const int NumDims = NumInputDims + 1; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, - Device> Self; - typedef TensorEvaluator<ArgType, Device> Impl; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE); - - m_paddingValue = op.padding_value(); - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - - // Caches a few variables. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputDepth = input_dims[0]; - m_inputRows = input_dims[1]; - m_inputCols = input_dims[2]; - } else { - m_inputDepth = input_dims[NumInputDims-1]; - m_inputRows = input_dims[NumInputDims-2]; - m_inputCols = input_dims[NumInputDims-3]; - } - - m_row_strides = op.row_strides(); - m_col_strides = op.col_strides(); - - // Input strides and effective input/patch size - m_in_row_strides = op.in_row_strides(); - m_in_col_strides = op.in_col_strides(); - m_row_inflate_strides = op.row_inflate_strides(); - m_col_inflate_strides = op.col_inflate_strides(); - // The "effective" input rows and input cols are the input rows and cols - // after inflating them with zeros. - // For examples, a 2x3 matrix with row_inflate_strides and - // col_inflate_strides of 2 comes from: - // A B C - // D E F - // - // to a matrix is 3 x 5: - // - // A . B . C - // . . . . . - // D . E . F - - m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; - m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; - m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); - m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); - - if (op.padding_explicit()) { - m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); - m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); - m_rowPaddingTop = op.padding_top(); - m_colPaddingLeft = op.padding_left(); - } else { - // Computing padding from the type - switch (op.padding_type()) { - case PADDING_VALID: - m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); - m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); - // Calculate the padding - m_rowPaddingTop = numext::maxi<Index>(0, ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2); - m_colPaddingLeft = numext::maxi<Index>(0, ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2); - break; - case PADDING_SAME: - m_outputRows = numext::ceil(m_input_rows_eff / static_cast<float>(m_row_strides)); - m_outputCols = numext::ceil(m_input_cols_eff / static_cast<float>(m_col_strides)); - // Calculate the padding - m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2; - m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2; - break; - default: - eigen_assert(false && "unexpected padding"); - } - } - eigen_assert(m_outputRows > 0); - eigen_assert(m_outputCols > 0); - - // Dimensions for result of extraction. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - // ColMajor - // 0: depth - // 1: patch_rows - // 2: patch_cols - // 3: number of patches - // 4 and beyond: anything else (such as batch). - m_dimensions[0] = input_dims[0]; - m_dimensions[1] = op.patch_rows(); - m_dimensions[2] = op.patch_cols(); - m_dimensions[3] = m_outputRows * m_outputCols; - for (int i = 4; i < NumDims; ++i) { - m_dimensions[i] = input_dims[i-1]; - } - } else { - // RowMajor - // NumDims-1: depth - // NumDims-2: patch_rows - // NumDims-3: patch_cols - // NumDims-4: number of patches - // NumDims-5 and beyond: anything else (such as batch). - m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; - m_dimensions[NumDims-2] = op.patch_rows(); - m_dimensions[NumDims-3] = op.patch_cols(); - m_dimensions[NumDims-4] = m_outputRows * m_outputCols; - for (int i = NumDims-5; i >= 0; --i) { - m_dimensions[i] = input_dims[i]; - } - } - - // Strides for moving the patch in various dimensions. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_colStride = m_dimensions[1]; - m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0]; - m_otherStride = m_patchStride * m_dimensions[3]; - } else { - m_colStride = m_dimensions[NumDims-2]; - m_patchStride = m_colStride * m_dimensions[NumDims-3] * m_dimensions[NumDims-1]; - m_otherStride = m_patchStride * m_dimensions[NumDims-4]; - } - - // Strides for navigating through the input tensor. - m_rowInputStride = m_inputDepth; - m_colInputStride = m_inputDepth * m_inputRows; - m_patchInputStride = m_inputDepth * m_inputRows * m_inputCols; - - // Fast representations of different variables. - m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride); - m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride); - m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride); - m_fastInflateRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides); - m_fastInflateColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides); - m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff); - - // Number of patches in the width dimension. - m_fastOutputRows = internal::TensorIntDivisor<Index>(m_outputRows); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]); - } else { - m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]); - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - // Patch index corresponding to the passed in index. - const Index patchIndex = index / m_fastPatchStride; - // Find the offset of the element wrt the location of the first element. - const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; - - // Other ways to index this element. - const Index otherIndex = (NumDims == 4) ? 0 : index / m_fastOtherStride; - const Index patch2DIndex = (NumDims == 4) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; - - // Calculate col index in the input original tensor. - const Index colIndex = patch2DIndex / m_fastOutputRows; - const Index colOffset = patchOffset / m_fastColStride; - const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; - const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInflateColStride) : 0); - if (inputCol < 0 || inputCol >= m_input_cols_eff || - ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { - return Scalar(m_paddingValue); - } - - // Calculate row index in the original input tensor. - const Index rowIndex = patch2DIndex - colIndex * m_outputRows; - const Index rowOffset = patchOffset - colOffset * m_colStride; - const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; - const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInflateRowStride) : 0); - if (inputRow < 0 || inputRow >= m_input_rows_eff || - ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { - return Scalar(m_paddingValue); - } - - const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; - const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; - - const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex * m_patchInputStride; - return m_impl.coeff(inputIndex); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) { - return packetWithPossibleZero(index); - } - - const Index indices[2] = {index, index + PacketSize - 1}; - const Index patchIndex = indices[0] / m_fastPatchStride; - if (patchIndex != indices[1] / m_fastPatchStride) { - return packetWithPossibleZero(index); - } - const Index otherIndex = (NumDims == 4) ? 0 : indices[0] / m_fastOtherStride; - eigen_assert(otherIndex == indices[1] / m_fastOtherStride); - - // Find the offset of the element wrt the location of the first element. - const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, - (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; - - const Index patch2DIndex = (NumDims == 4) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; - eigen_assert(patch2DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); - - const Index colIndex = patch2DIndex / m_fastOutputRows; - const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride}; - - // Calculate col indices in the original input tensor. - const Index inputCols[2] = {colIndex * m_col_strides + colOffsets[0] - - m_colPaddingLeft, colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; - if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { - return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); - } - - if (inputCols[0] == inputCols[1]) { - const Index rowIndex = patch2DIndex - colIndex * m_outputRows; - const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride}; - eigen_assert(rowOffsets[0] <= rowOffsets[1]); - // Calculate col indices in the original input tensor. - const Index inputRows[2] = {rowIndex * m_row_strides + rowOffsets[0] - - m_rowPaddingTop, rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; - - if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { - return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); - } - - if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) { - // no padding - const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; - const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; - const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex * m_patchInputStride; - return m_impl.template packet<Unaligned>(inputIndex); - } - } - - return packetWithPossibleZero(index); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - - Index rowPaddingTop() const { return m_rowPaddingTop; } - Index colPaddingLeft() const { return m_colPaddingLeft; } - Index outputRows() const { return m_outputRows; } - Index outputCols() const { return m_outputCols; } - Index userRowStride() const { return m_row_strides; } - Index userColStride() const { return m_col_strides; } - Index userInRowStride() const { return m_in_row_strides; } - Index userInColStride() const { return m_in_col_strides; } - Index rowInflateStride() const { return m_row_inflate_strides; } - Index colInflateStride() const { return m_col_inflate_strides; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - // We conservatively estimate the cost for the code path where the computed - // index is inside the original image and - // TensorEvaluator<ArgType, Device>::CoordAccess is false. - const double compute_cost = 3 * TensorOpCost::DivCost<Index>() + - 6 * TensorOpCost::MulCost<Index>() + - 8 * TensorOpCost::MulCost<Index>(); - return m_impl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); - } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const - { - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - for (int i = 0; i < PacketSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - Dimensions m_dimensions; - - Index m_otherStride; - Index m_patchStride; - Index m_colStride; - Index m_row_strides; - Index m_col_strides; - - Index m_in_row_strides; - Index m_in_col_strides; - Index m_row_inflate_strides; - Index m_col_inflate_strides; - - Index m_input_rows_eff; - Index m_input_cols_eff; - Index m_patch_rows_eff; - Index m_patch_cols_eff; - - internal::TensorIntDivisor<Index> m_fastOtherStride; - internal::TensorIntDivisor<Index> m_fastPatchStride; - internal::TensorIntDivisor<Index> m_fastColStride; - internal::TensorIntDivisor<Index> m_fastInflateRowStride; - internal::TensorIntDivisor<Index> m_fastInflateColStride; - internal::TensorIntDivisor<Index> m_fastInputColsEff; - - Index m_rowInputStride; - Index m_colInputStride; - Index m_patchInputStride; - - Index m_inputDepth; - Index m_inputRows; - Index m_inputCols; - - Index m_outputRows; - Index m_outputCols; - - Index m_rowPaddingTop; - Index m_colPaddingLeft; - - internal::TensorIntDivisor<Index> m_fastOutputRows; - internal::TensorIntDivisor<Index> m_fastOutputDepth; - - Scalar m_paddingValue; - - TensorEvaluator<ArgType, Device> m_impl; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h deleted file mode 100644 index 3209fec..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ /dev/null @@ -1,725 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H -#define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H - - -#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES - -#define EIGEN_HAS_INDEX_LIST - -namespace Eigen { - -/** \internal - * - * \class TensorIndexList - * \ingroup CXX11_Tensor_Module - * - * \brief Set of classes used to encode a set of Tensor dimensions/indices. - * - * The indices in the list can be known at compile time or at runtime. A mix - * of static and dynamic indices can also be provided if needed. The tensor - * code will attempt to take advantage of the indices that are known at - * compile time to optimize the code it generates. - * - * This functionality requires a c++11 compliant compiler. If your compiler - * is older you need to use arrays of indices instead. - * - * Several examples are provided in the cxx11_tensor_index_list.cpp file. - * - * \sa Tensor - */ - -template <DenseIndex n> -struct type2index { - static const DenseIndex value = n; - EIGEN_DEVICE_FUNC constexpr operator DenseIndex() const { return n; } - EIGEN_DEVICE_FUNC void set(DenseIndex val) { - eigen_assert(val == n); - } -}; - -// This can be used with IndexPairList to get compile-time constant pairs, -// such as IndexPairList<type2indexpair<1,2>, type2indexpair<3,4>>(). -template <DenseIndex f, DenseIndex s> -struct type2indexpair { - static const DenseIndex first = f; - static const DenseIndex second = s; - - constexpr EIGEN_DEVICE_FUNC operator IndexPair<DenseIndex>() const { - return IndexPair<DenseIndex>(f, s); - } - - EIGEN_DEVICE_FUNC void set(const IndexPair<DenseIndex>& val) { - eigen_assert(val.first == f); - eigen_assert(val.second == s); - } -}; - - -template<DenseIndex n> struct NumTraits<type2index<n> > -{ - typedef DenseIndex Real; - enum { - IsComplex = 0, - RequireInitialization = false, - ReadCost = 1, - AddCost = 1, - MulCost = 1 - }; - - EIGEN_DEVICE_FUNC static inline Real epsilon() { return 0; } - EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return 0; } - EIGEN_DEVICE_FUNC static inline Real highest() { return n; } - EIGEN_DEVICE_FUNC static inline Real lowest() { return n; } -}; - -namespace internal { -template <typename T> -EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) { - val = new_val; -} -template <DenseIndex n> -EIGEN_DEVICE_FUNC void update_value(type2index<n>& val, DenseIndex new_val) { - val.set(new_val); -} - -template <typename T> -EIGEN_DEVICE_FUNC void update_value(T& val, IndexPair<DenseIndex> new_val) { - val = new_val; -} -template <DenseIndex f, DenseIndex s> -EIGEN_DEVICE_FUNC void update_value(type2indexpair<f, s>& val, IndexPair<DenseIndex> new_val) { - val.set(new_val); -} - - -template <typename T> -struct is_compile_time_constant { - static constexpr bool value = false; -}; - -template <DenseIndex idx> -struct is_compile_time_constant<type2index<idx> > { - static constexpr bool value = true; -}; -template <DenseIndex idx> -struct is_compile_time_constant<const type2index<idx> > { - static constexpr bool value = true; -}; -template <DenseIndex idx> -struct is_compile_time_constant<type2index<idx>& > { - static constexpr bool value = true; -}; -template <DenseIndex idx> -struct is_compile_time_constant<const type2index<idx>& > { - static constexpr bool value = true; -}; - -template <DenseIndex f, DenseIndex s> -struct is_compile_time_constant<type2indexpair<f, s> > { - static constexpr bool value = true; -}; -template <DenseIndex f, DenseIndex s> -struct is_compile_time_constant<const type2indexpair<f, s> > { - static constexpr bool value = true; -}; -template <DenseIndex f, DenseIndex s> -struct is_compile_time_constant<type2indexpair<f, s>& > { - static constexpr bool value = true; -}; -template <DenseIndex f, DenseIndex s> -struct is_compile_time_constant<const type2indexpair<f, s>& > { - static constexpr bool value = true; -}; - - -template<typename... T> -struct IndexTuple; - -template<typename T, typename... O> -struct IndexTuple<T, O...> { - EIGEN_DEVICE_FUNC constexpr IndexTuple() : head(), others() { } - EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v, const O... o) : head(v), others(o...) { } - - constexpr static int count = 1 + sizeof...(O); - T head; - IndexTuple<O...> others; - typedef T Head; - typedef IndexTuple<O...> Other; -}; - -template<typename T> - struct IndexTuple<T> { - EIGEN_DEVICE_FUNC constexpr IndexTuple() : head() { } - EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v) : head(v) { } - - constexpr static int count = 1; - T head; - typedef T Head; -}; - - -template<int N, typename... T> -struct IndexTupleExtractor; - -template<int N, typename T, typename... O> -struct IndexTupleExtractor<N, T, O...> { - - typedef typename IndexTupleExtractor<N-1, O...>::ValType ValType; - - EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple<T, O...>& val) { - return IndexTupleExtractor<N-1, O...>::get_val(val.others); - } - - EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple<T, O...>& val) { - return IndexTupleExtractor<N-1, O...>::get_val(val.others); - } - template <typename V> - EIGEN_DEVICE_FUNC static void set_val(IndexTuple<T, O...>& val, V& new_val) { - IndexTupleExtractor<N-1, O...>::set_val(val.others, new_val); - } - -}; - -template<typename T, typename... O> - struct IndexTupleExtractor<0, T, O...> { - - typedef T ValType; - - EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple<T, O...>& val) { - return val.head; - } - EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple<T, O...>& val) { - return val.head; - } - template <typename V> - EIGEN_DEVICE_FUNC static void set_val(IndexTuple<T, O...>& val, V& new_val) { - val.head = new_val; - } -}; - - - -template <int N, typename T, typename... O> -EIGEN_DEVICE_FUNC constexpr typename IndexTupleExtractor<N, T, O...>::ValType& array_get(IndexTuple<T, O...>& tuple) { - return IndexTupleExtractor<N, T, O...>::get_val(tuple); -} -template <int N, typename T, typename... O> -EIGEN_DEVICE_FUNC constexpr const typename IndexTupleExtractor<N, T, O...>::ValType& array_get(const IndexTuple<T, O...>& tuple) { - return IndexTupleExtractor<N, T, O...>::get_val(tuple); -} -template <typename T, typename... O> - struct array_size<IndexTuple<T, O...> > { - static const size_t value = IndexTuple<T, O...>::count; -}; -template <typename T, typename... O> - struct array_size<const IndexTuple<T, O...> > { - static const size_t value = IndexTuple<T, O...>::count; -}; - - - - -template <DenseIndex Idx, typename ValueT> -struct tuple_coeff { - template <typename... T> - EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex i, const IndexTuple<T...>& t) { - // return array_get<Idx>(t) * (i == Idx) + tuple_coeff<Idx-1>::get(i, t) * (i != Idx); - return (i == Idx ? array_get<Idx>(t) : tuple_coeff<Idx-1, ValueT>::get(i, t)); - } - template <typename... T> - EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple<T...>& t, const ValueT& value) { - if (i == Idx) { - update_value(array_get<Idx>(t), value); - } else { - tuple_coeff<Idx-1, ValueT>::set(i, t, value); - } - } - - template <typename... T> - EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple<T...>& t) { - return ((i == Idx) & is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value) || - tuple_coeff<Idx-1, ValueT>::value_known_statically(i, t); - } - - template <typename... T> - EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple<T...>& t) { - return is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value && - tuple_coeff<Idx-1, ValueT>::values_up_to_known_statically(t); - } - - template <typename... T> - EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple<T...>& t) { - return is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value && - is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value && - array_get<Idx>(t) > array_get<Idx-1>(t) && - tuple_coeff<Idx-1, ValueT>::values_up_to_statically_known_to_increase(t); - } -}; - -template <typename ValueT> -struct tuple_coeff<0, ValueT> { - template <typename... T> - EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex /*i*/, const IndexTuple<T...>& t) { - // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr - return array_get<0>(t)/* * (i == 0)*/; - } - template <typename... T> - EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple<T...>& t, const ValueT value) { - eigen_assert (i == 0); - update_value(array_get<0>(t), value); - } - template <typename... T> - EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple<T...>&) { - return is_compile_time_constant<typename IndexTupleExtractor<0, T...>::ValType>::value & (i == 0); - } - - template <typename... T> - EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple<T...>&) { - return is_compile_time_constant<typename IndexTupleExtractor<0, T...>::ValType>::value; - } - - template <typename... T> - EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple<T...>&) { - return true; - } -}; -} // namespace internal - - - -template<typename FirstType, typename... OtherTypes> -struct IndexList : internal::IndexTuple<FirstType, OtherTypes...> { - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex operator[] (const DenseIndex i) const { - return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::get(i, *this); - } - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex get(const DenseIndex i) const { - return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::get(i, *this); - } - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const DenseIndex value) { - return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::set(i, *this, value); - } - - EIGEN_DEVICE_FUNC constexpr IndexList(const internal::IndexTuple<FirstType, OtherTypes...>& other) : internal::IndexTuple<FirstType, OtherTypes...>(other) { } - EIGEN_DEVICE_FUNC constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple<FirstType, OtherTypes...>(first, other...) { } - EIGEN_DEVICE_FUNC constexpr IndexList() : internal::IndexTuple<FirstType, OtherTypes...>() { } - - EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const { - return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::value_known_statically(i, *this); - } - EIGEN_DEVICE_FUNC constexpr bool all_values_known_statically() const { - return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::values_up_to_known_statically(*this); - } - - EIGEN_DEVICE_FUNC constexpr bool values_statically_known_to_increase() const { - return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::values_up_to_statically_known_to_increase(*this); - } -}; - - -template<typename FirstType, typename... OtherTypes> -constexpr IndexList<FirstType, OtherTypes...> make_index_list(FirstType val1, OtherTypes... other_vals) { - return IndexList<FirstType, OtherTypes...>(val1, other_vals...); -} - - -template<typename FirstType, typename... OtherTypes> -struct IndexPairList : internal::IndexTuple<FirstType, OtherTypes...> { - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr IndexPair<DenseIndex> operator[] (const DenseIndex i) const { - return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, IndexPair<DenseIndex>>::get(i, *this); - } - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const IndexPair<DenseIndex> value) { - return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...>>::value-1, IndexPair<DenseIndex> >::set(i, *this, value); - } - - EIGEN_DEVICE_FUNC constexpr IndexPairList(const internal::IndexTuple<FirstType, OtherTypes...>& other) : internal::IndexTuple<FirstType, OtherTypes...>(other) { } - EIGEN_DEVICE_FUNC constexpr IndexPairList() : internal::IndexTuple<FirstType, OtherTypes...>() { } - - EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const { - return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::value_known_statically(i, *this); - } -}; - -namespace internal { - -template<typename FirstType, typename... OtherTypes> size_t array_prod(const IndexList<FirstType, OtherTypes...>& sizes) { - size_t result = 1; - for (int i = 0; i < array_size<IndexList<FirstType, OtherTypes...> >::value; ++i) { - result *= sizes[i]; - } - return result; -} - -template<typename FirstType, typename... OtherTypes> struct array_size<IndexList<FirstType, OtherTypes...> > { - static const size_t value = array_size<IndexTuple<FirstType, OtherTypes...> >::value; -}; -template<typename FirstType, typename... OtherTypes> struct array_size<const IndexList<FirstType, OtherTypes...> > { - static const size_t value = array_size<IndexTuple<FirstType, OtherTypes...> >::value; -}; - -template<typename FirstType, typename... OtherTypes> struct array_size<IndexPairList<FirstType, OtherTypes...> > { - static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value; -}; -template<typename FirstType, typename... OtherTypes> struct array_size<const IndexPairList<FirstType, OtherTypes...> > { - static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value; -}; - -template<DenseIndex N, typename FirstType, typename... OtherTypes> EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(IndexList<FirstType, OtherTypes...>& a) { - return IndexTupleExtractor<N, FirstType, OtherTypes...>::get_val(a); -} -template<DenseIndex N, typename FirstType, typename... OtherTypes> EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(const IndexList<FirstType, OtherTypes...>& a) { - return IndexTupleExtractor<N, FirstType, OtherTypes...>::get_val(a); -} - -template <typename T> -struct index_known_statically_impl { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_known_statically_impl<IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i) { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_known_statically_impl<const IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i) { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i); - } -}; - - -template <typename T> -struct all_indices_known_statically_impl { - static constexpr bool run() { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct all_indices_known_statically_impl<IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run() { - return IndexList<FirstType, OtherTypes...>().all_values_known_statically(); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct all_indices_known_statically_impl<const IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run() { - return IndexList<FirstType, OtherTypes...>().all_values_known_statically(); - } -}; - - -template <typename T> -struct indices_statically_known_to_increase_impl { - EIGEN_DEVICE_FUNC static constexpr bool run() { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> - struct indices_statically_known_to_increase_impl<IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run() { - return Eigen::IndexList<FirstType, OtherTypes...>().values_statically_known_to_increase(); - } -}; - -template <typename FirstType, typename... OtherTypes> - struct indices_statically_known_to_increase_impl<const IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run() { - return Eigen::IndexList<FirstType, OtherTypes...>().values_statically_known_to_increase(); - } -}; - - -template <typename Tx> -struct index_statically_eq_impl { - EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_eq_impl<IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexList<FirstType, OtherTypes...>().get(i) == value); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_eq_impl<const IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexList<FirstType, OtherTypes...>().get(i) == value); - } -}; - - -template <typename T> -struct index_statically_ne_impl { - EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_ne_impl<IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexList<FirstType, OtherTypes...>().get(i) != value); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_ne_impl<const IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexList<FirstType, OtherTypes...>().get(i) != value); - } -}; - - -template <typename T> -struct index_statically_gt_impl { - EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_gt_impl<IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexList<FirstType, OtherTypes...>().get(i) > value); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_gt_impl<const IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexList<FirstType, OtherTypes...>().get(i) > value); - } -}; - - - -template <typename T> -struct index_statically_lt_impl { - EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_lt_impl<IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexList<FirstType, OtherTypes...>().get(i) < value); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_statically_lt_impl<const IndexList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexList<FirstType, OtherTypes...>().get(i) < value); - } -}; - - - -template <typename Tx> -struct index_pair_first_statically_eq_impl { - EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_pair_first_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_pair_first_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value); - } -}; - - - -template <typename Tx> -struct index_pair_second_statically_eq_impl { - EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_pair_second_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value); - } -}; - -template <typename FirstType, typename... OtherTypes> -struct index_pair_second_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > { - EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { - return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) & - (IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value); - } -}; - - -} // end namespace internal -} // end namespace Eigen - -#else - -namespace Eigen { -namespace internal { - -template <typename T> -struct index_known_statically_impl { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { - return false; - } -}; - -template <typename T> -struct all_indices_known_statically_impl { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { - return false; - } -}; - -template <typename T> -struct indices_statically_known_to_increase_impl { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { - return false; - } -}; - -template <typename T> -struct index_statically_eq_impl { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename T> -struct index_statically_ne_impl { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename T> -struct index_statically_gt_impl { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename T> -struct index_statically_lt_impl { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename Tx> -struct index_pair_first_statically_eq_impl { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { - return false; - } -}; - -template <typename Tx> -struct index_pair_second_statically_eq_impl { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { - return false; - } -}; - - - -} // end namespace internal -} // end namespace Eigen - -#endif - - -namespace Eigen { -namespace internal { -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_known_statically(DenseIndex i) { - return index_known_statically_impl<T>::run(i); -} - -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool all_indices_known_statically() { - return all_indices_known_statically_impl<T>::run(); -} - -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool indices_statically_known_to_increase() { - return indices_statically_known_to_increase_impl<T>::run(); -} - -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_eq(DenseIndex i, DenseIndex value) { - return index_statically_eq_impl<T>::run(i, value); -} - -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_ne(DenseIndex i, DenseIndex value) { - return index_statically_ne_impl<T>::run(i, value); -} - -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_gt(DenseIndex i, DenseIndex value) { - return index_statically_gt_impl<T>::run(i, value); -} - -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_lt(DenseIndex i, DenseIndex value) { - return index_statically_lt_impl<T>::run(i, value); -} - -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_first_statically_eq(DenseIndex i, DenseIndex value) { - return index_pair_first_statically_eq_impl<T>::run(i, value); -} - -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_second_statically_eq(DenseIndex i, DenseIndex value) { - return index_pair_second_statically_eq_impl<T>::run(i, value); -} - -} // end namespace internal -} // end namespace Eigen - - -#endif // EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h deleted file mode 100644 index f391fb9..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h +++ /dev/null @@ -1,229 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Ke Yang <yangke@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H -#define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H - -namespace Eigen { - -/** \class TensorInflation - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor inflation class. - * - * - */ -namespace internal { -template<typename Strides, typename XprType> -struct traits<TensorInflationOp<Strides, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Strides, typename XprType> -struct eval<TensorInflationOp<Strides, XprType>, Eigen::Dense> -{ - typedef const TensorInflationOp<Strides, XprType>& type; -}; - -template<typename Strides, typename XprType> -struct nested<TensorInflationOp<Strides, XprType>, 1, typename eval<TensorInflationOp<Strides, XprType> >::type> -{ - typedef TensorInflationOp<Strides, XprType> type; -}; - -} // end namespace internal - -template<typename Strides, typename XprType> -class TensorInflationOp : public TensorBase<TensorInflationOp<Strides, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorInflationOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorInflationOp>::type Nested; - typedef typename Eigen::internal::traits<TensorInflationOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorInflationOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorInflationOp(const XprType& expr, const Strides& strides) - : m_xpr(expr), m_strides(strides) {} - - EIGEN_DEVICE_FUNC - const Strides& strides() const { return m_strides; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const Strides m_strides; -}; - -// Eval as rvalue -template<typename Strides, typename ArgType, typename Device> -struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device> -{ - typedef TensorInflationOp<Strides, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_strides(op.strides()) - { - m_dimensions = m_impl.dimensions(); - // Expand each dimension to the inflated dimension. - for (int i = 0; i < NumDims; ++i) { - m_dimensions[i] = (m_dimensions[i] - 1) * op.strides()[i] + 1; - } - - // Remember the strides for fast division. - for (int i = 0; i < NumDims; ++i) { - m_fastStrides[i] = internal::TensorIntDivisor<Index>(m_strides[i]); - } - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_outputStrides[0] = 1; - m_inputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - } - } else { // RowMajor - m_outputStrides[NumDims-1] = 1; - m_inputStrides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - // Computes the input index given the output index. Returns true if the output - // index doesn't fall into a hole. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool getInputIndex(Index index, Index* inputIndex) const - { - eigen_assert(index < dimensions().TotalSize()); - *inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - if (idx != idx / m_fastStrides[i] * m_strides[i]) { - return false; - } - *inputIndex += idx / m_strides[i] * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - if (index != index / m_fastStrides[0] * m_strides[0]) { - return false; - } - *inputIndex += index / m_strides[0]; - return true; - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i]; - if (idx != idx / m_fastStrides[i] * m_strides[i]) { - return false; - } - *inputIndex += idx / m_strides[i] * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) { - return false; - } - *inputIndex += index / m_strides[NumDims - 1]; - } - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - Index inputIndex = 0; - if (getInputIndex(index, &inputIndex)) { - return m_impl.coeff(inputIndex); - } else { - return Scalar(0); - } - } - - // TODO(yangke): optimize this function so that we can detect and produce - // all-zero packets - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - for (int i = 0; i < PacketSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - const double compute_cost = NumDims * (3 * TensorOpCost::DivCost<Index>() + - 3 * TensorOpCost::MulCost<Index>() + - 2 * TensorOpCost::AddCost<Index>()); - const double input_size = m_impl.dimensions().TotalSize(); - const double output_size = m_dimensions.TotalSize(); - if (output_size == 0) - return TensorOpCost(); - return m_impl.costPerCoeff(vectorized) + - TensorOpCost(sizeof(CoeffReturnType) * input_size / output_size, 0, - compute_cost, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; - const Strides m_strides; - array<internal::TensorIntDivisor<Index>, NumDims> m_fastStrides; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h deleted file mode 100644 index 33edc49..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h +++ /dev/null @@ -1,82 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H -#define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H - -#if EIGEN_HAS_VARIADIC_TEMPLATES - -#include <initializer_list> - -namespace Eigen { - -/** \class TensorInitializer - * \ingroup CXX11_Tensor_Module - * - * \brief Helper template to initialize Tensors from std::initializer_lists. - */ -namespace internal { - -template <typename Derived, int N> -struct Initializer { - typedef std::initializer_list< - typename Initializer<Derived, N - 1>::InitList> InitList; - - static void run(TensorEvaluator<Derived, DefaultDevice>& tensor, - Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices, - const InitList& vals) { - int i = 0; - for (auto v : vals) { - (*indices)[traits<Derived>::NumDimensions - N] = i++; - Initializer<Derived, N - 1>::run(tensor, indices, v); - } - } -}; - -template <typename Derived> -struct Initializer<Derived, 1> { - typedef std::initializer_list<typename traits<Derived>::Scalar> InitList; - - static void run(TensorEvaluator<Derived, DefaultDevice>& tensor, - Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices, - const InitList& vals) { - int i = 0; - // There is likely a faster way to do that than iterating. - for (auto v : vals) { - (*indices)[traits<Derived>::NumDimensions - 1] = i++; - tensor.coeffRef(*indices) = v; - } - } -}; - -template <typename Derived> -struct Initializer<Derived, 0> { - typedef typename traits<Derived>::Scalar InitList; - - static void run(TensorEvaluator<Derived, DefaultDevice>& tensor, - Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>*, - const InitList& v) { - tensor.coeffRef(0) = v; - } -}; - - -template <typename Derived, int N> -void initialize_tensor(TensorEvaluator<Derived, DefaultDevice>& tensor, - const typename Initializer<Derived, traits<Derived>::NumDimensions>::InitList& vals) { - Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions> indices; - Initializer<Derived, traits<Derived>::NumDimensions>::run(tensor, &indices, vals); -} - -} // namespace internal -} // namespace Eigen - -#endif // EIGEN_HAS_VARIADIC_TEMPLATES - -#endif // EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h deleted file mode 100644 index ede3939..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ /dev/null @@ -1,253 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H -#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H - - -namespace Eigen { - -/** \internal - * - * \class TensorIntDiv - * \ingroup CXX11_Tensor_Module - * - * \brief Fast integer division by a constant. - * - * See the paper from Granlund and Montgomery for explanation. - * (at http://dx.doi.org/10.1145/773473.178249) - * - * \sa Tensor - */ - -namespace internal { - -namespace { - - // Note: result is undefined if val == 0 - template <typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val) - { -#ifdef __CUDA_ARCH__ - return __clz(val); -#elif EIGEN_COMP_MSVC - unsigned long index; - _BitScanReverse(&index, val); - return 31 - index; -#else - EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); - return __builtin_clz(static_cast<uint32_t>(val)); -#endif - } - - template <typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val) - { -#ifdef __CUDA_ARCH__ - return __clzll(val); -#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64 - unsigned long index; - _BitScanReverse64(&index, val); - return 63 - index; -#elif EIGEN_COMP_MSVC - // MSVC's _BitScanReverse64 is not available for 32bits builds. - unsigned int lo = (unsigned int)(val&0xffffffff); - unsigned int hi = (unsigned int)((val>>32)&0xffffffff); - int n; - if(hi==0) - n = 32 + count_leading_zeros<unsigned int>(lo); - else - n = count_leading_zeros<unsigned int>(hi); - return n; -#else - EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); - return __builtin_clzll(static_cast<uint64_t>(val)); -#endif - } - - template <typename T> - struct UnsignedTraits { - typedef typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type type; - }; - - template <typename T> - struct DividerTraits { - typedef typename UnsignedTraits<T>::type type; - static const int N = sizeof(T) * 8; - }; - - template <typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) { -#if defined(__CUDA_ARCH__) - return __umulhi(a, b); -#else - return (static_cast<uint64_t>(a) * b) >> 32; -#endif - } - - template <typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { -#if defined(__CUDA_ARCH__) - return __umul64hi(a, b); -#elif defined(__SIZEOF_INT128__) - __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); - return static_cast<uint64_t>(v >> 64); -#else - return (TensorUInt128<static_val<0>, uint64_t>(a) * TensorUInt128<static_val<0>, uint64_t>(b)).upper(); -#endif - } - - template <int N, typename T> - struct DividerHelper { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) { - EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE); - return static_cast<uint32_t>((static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1); - } - }; - - template <typename T> - struct DividerHelper<64, T> { - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { -#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) - return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); -#else - const uint64_t shift = 1ULL << log_div; - TensorUInt128<uint64_t, uint64_t> result = TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - - TensorUInt128<static_val<1>, static_val<0> >(1, 0) - + TensorUInt128<static_val<0>, static_val<1> >(1); - return static_cast<uint64_t>(result); -#endif - } - }; -} - - -template <typename T, bool div_gt_one = false> -struct TensorIntDivisor { - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { - multiplier = 0; - shift1 = 0; - shift2 = 0; - } - - // Must have 0 < divider < 2^31. This is relaxed to - // 0 < divider < 2^63 when using 64-bit indices on platforms that support - // the __uint128_t type. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) { - const int N = DividerTraits<T>::N; - eigen_assert(static_cast<typename UnsignedTraits<T>::type>(divider) < NumTraits<UnsignedType>::highest()/2); - eigen_assert(divider > 0); - - // fast ln2 - const int leading_zeros = count_leading_zeros(static_cast<UnsignedType>(divider)); - int log_div = N - leading_zeros; - // if divider is a power of two then log_div is 1 more than it should be. - if ((static_cast<typename UnsignedTraits<T>::type>(1) << (log_div-1)) == static_cast<typename UnsignedTraits<T>::type>(divider)) - log_div--; - - multiplier = DividerHelper<N, T>::computeMultiplier(log_div, divider); - shift1 = log_div > 1 ? 1 : log_div; - shift2 = log_div > 1 ? log_div-1 : 0; - } - - // Must have 0 <= numerator. On platforms that dont support the __uint128_t - // type numerator should also be less than 2^32-1. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { - eigen_assert(static_cast<typename UnsignedTraits<T>::type>(numerator) < NumTraits<UnsignedType>::highest()/2); - //eigen_assert(numerator >= 0); // this is implicitly asserted by the line above - - UnsignedType t1 = muluh(multiplier, numerator); - UnsignedType t = (static_cast<UnsignedType>(numerator) - t1) >> shift1; - return (t1 + t) >> shift2; - } - - private: - typedef typename DividerTraits<T>::type UnsignedType; - UnsignedType multiplier; - int32_t shift1; - int32_t shift2; -}; - - -// Optimized version for signed 32 bit integers. -// Derived from Hacker's Delight. -// Only works for divisors strictly greater than one -template <> -class TensorIntDivisor<int32_t, true> { - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { - magic = 0; - shift = 0; - } - // Must have 2 <= divider - EIGEN_DEVICE_FUNC TensorIntDivisor(int32_t divider) { - eigen_assert(divider >= 2); - calcMagic(divider); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const { -#ifdef __CUDA_ARCH__ - return (__umulhi(magic, n) >> shift); -#else - uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n); - return (static_cast<uint32_t>(v >> 32) >> shift); -#endif - } - -private: - // Compute the magic numbers. See Hacker's Delight section 10 for an in - // depth explanation. - EIGEN_DEVICE_FUNC void calcMagic(int32_t d) { - const unsigned two31 = 0x80000000; // 2**31. - unsigned ad = d; - unsigned t = two31 + (ad >> 31); - unsigned anc = t - 1 - t%ad; // Absolute value of nc. - int p = 31; // Init. p. - unsigned q1 = two31/anc; // Init. q1 = 2**p/|nc|. - unsigned r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|). - unsigned q2 = two31/ad; // Init. q2 = 2**p/|d|. - unsigned r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|). - unsigned delta = 0; - do { - p = p + 1; - q1 = 2*q1; // Update q1 = 2**p/|nc|. - r1 = 2*r1; // Update r1 = rem(2**p, |nc|). - if (r1 >= anc) { // (Must be an unsigned - q1 = q1 + 1; // comparison here). - r1 = r1 - anc;} - q2 = 2*q2; // Update q2 = 2**p/|d|. - r2 = 2*r2; // Update r2 = rem(2**p, |d|). - if (r2 >= ad) { // (Must be an unsigned - q2 = q2 + 1; // comparison here). - r2 = r2 - ad;} - delta = ad - r2; - } while (q1 < delta || (q1 == delta && r1 == 0)); - - magic = (unsigned)(q2 + 1); - shift = p - 32; - } - - uint32_t magic; - int32_t shift; -}; - - -template <typename T, bool div_gt_one> -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) { - return divisor.divide(numerator); -} - - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h deleted file mode 100644 index cd0109e..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +++ /dev/null @@ -1,209 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H -#define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H - -namespace Eigen { - -/** \class TensorLayoutSwap - * \ingroup CXX11_Tensor_Module - * - * \brief Swap the layout from col-major to row-major, or row-major - * to col-major, and invert the order of the dimensions. - * - * Beware: the dimensions are reversed by this operation. If you want to - * preserve the ordering of the dimensions, you need to combine this - * operation with a shuffle. - * - * \example: - * Tensor<float, 2, ColMajor> input(2, 4); - * Tensor<float, 2, RowMajor> output = input.swap_layout(); - * eigen_assert(output.dimension(0) == 4); - * eigen_assert(output.dimension(1) == 2); - * - * array<int, 2> shuffle(1, 0); - * output = input.swap_layout().shuffle(shuffle); - * eigen_assert(output.dimension(0) == 2); - * eigen_assert(output.dimension(1) == 4); - * - */ -namespace internal { -template<typename XprType> -struct traits<TensorLayoutSwapOp<XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = traits<XprType>::NumDimensions; - static const int Layout = (traits<XprType>::Layout == ColMajor) ? RowMajor : ColMajor; -}; - -template<typename XprType> -struct eval<TensorLayoutSwapOp<XprType>, Eigen::Dense> -{ - typedef const TensorLayoutSwapOp<XprType>& type; -}; - -template<typename XprType> -struct nested<TensorLayoutSwapOp<XprType>, 1, typename eval<TensorLayoutSwapOp<XprType> >::type> -{ - typedef TensorLayoutSwapOp<XprType> type; -}; - -} // end namespace internal - - - -template<typename XprType> -class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename Eigen::internal::nested<TensorLayoutSwapOp>::type Nested; - typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorLayoutSwapOp(const XprType& expr) - : m_xpr(expr) {} - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const TensorLayoutSwapOp& other) - { - typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; -}; - - -// Eval as rvalue -template<typename ArgType, typename Device> -struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> -{ - typedef TensorLayoutSwapOp<ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor, - CoordAccess = false, // to be implemented - RawAccess = TensorEvaluator<ArgType, Device>::RawAccess - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - for(int i = 0; i < NumDims; ++i) { - m_dimensions[i] = m_impl.dimensions()[NumDims-1-i]; - } - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - return m_impl.evalSubExprsIfNeeded(data); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(index); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_impl.template packet<LoadMode>(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - return m_impl.costPerCoeff(vectorized); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return m_impl.data(); } - - const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - - protected: - TensorEvaluator<ArgType, Device> m_impl; - Dimensions m_dimensions; -}; - - -// Eval as lvalue -template<typename ArgType, typename Device> - struct TensorEvaluator<TensorLayoutSwapOp<ArgType>, Device> - : public TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> -{ - typedef TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> Base; - typedef TensorLayoutSwapOp<ArgType> XprType; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor, - CoordAccess = false // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - return this->m_impl.coeffRef(index); - } - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - this->m_impl.template writePacket<StoreMode>(index, x); - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h deleted file mode 100644 index ee0078b..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h +++ /dev/null @@ -1,54 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H -#define EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H - - -/** use this macro in sfinae selection in templated functions - * - * template<typename T, - * typename std::enable_if< isBanana<T>::value , int >::type = 0 - * > - * void foo(){} - * - * becomes => - * - * template<typename TopoType, - * SFINAE_ENABLE_IF( isBanana<T>::value ) - * > - * void foo(){} - */ - -// SFINAE requires variadic templates -#ifndef __CUDACC__ -#if EIGEN_HAS_VARIADIC_TEMPLATES - // SFINAE doesn't work for gcc <= 4.7 - #ifdef EIGEN_COMP_GNUC - #if EIGEN_GNUC_AT_LEAST(4,8) - #define EIGEN_HAS_SFINAE - #endif - #else - #define EIGEN_HAS_SFINAE - #endif -#endif -#endif - -#define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \ - typename internal::enable_if< ( __condition__ ) , int >::type = 0 - - -#if EIGEN_HAS_CONSTEXPR -#define EIGEN_CONSTEXPR constexpr -#else -#define EIGEN_CONSTEXPR -#endif - - -#endif diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h deleted file mode 100644 index e4fc86a..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ /dev/null @@ -1,323 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H -#define EIGEN_CXX11_TENSOR_TENSOR_MAP_H - -namespace Eigen { - -// FIXME use proper doxygen documentation (e.g. \tparam MakePointer_) - -/** \class TensorMap - * \ingroup CXX11_Tensor_Module - * - * \brief A tensor expression mapping an existing array of data. - * - */ -/// `template <class> class MakePointer_` is added to convert the host pointer to the device pointer. -/// It is added due to the fact that for our device compiler `T*` is not allowed. -/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer `T`. -/// This is done through our `MakePointer_` class. By default the Type in the `MakePointer_<T>` is `T*` . -/// Therefore, by adding the default value, we managed to convert the type and it does not break any -/// existing code as its default value is `T*`. -template<typename PlainObjectType, int Options_, template <class> class MakePointer_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_, MakePointer_> > -{ - public: - typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self; - typedef typename PlainObjectType::Base Base; - typedef typename Eigen::internal::nested<Self>::type Nested; - typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind; - typedef typename internal::traits<PlainObjectType>::Index Index; - typedef typename internal::traits<PlainObjectType>::Scalar Scalar; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename Base::CoeffReturnType CoeffReturnType; - - /* typedef typename internal::conditional< - bool(internal::is_lvalue<PlainObjectType>::value), - Scalar *, - const Scalar *>::type - PointerType;*/ - typedef typename MakePointer_<Scalar>::Type PointerType; - typedef PointerType PointerArgType; - - static const int Options = Options_; - - static const Index NumIndices = PlainObjectType::NumIndices; - typedef typename PlainObjectType::Dimensions Dimensions; - - enum { - IsAligned = ((int(Options_)&Aligned)==Aligned), - Layout = PlainObjectType::Layout, - CoordAccess = true, - RawAccess = true - }; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr), m_dimensions() { - // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) { - // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) { - // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. - EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) { - EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) { - EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) { - EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) { - EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const array<Index, NumIndices>& dimensions) - : m_data(dataPtr), m_dimensions(dimensions) - { } - - template <typename Dimensions> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions) - : m_data(dataPtr), m_dimensions(dimensions) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PlainObjectType& tensor) - : m_data(tensor.data()), m_dimensions(tensor.dimensions()) - { } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index rank() const { return m_dimensions.rank(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_dimensions[n]; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE PointerType data() { return m_data; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const PointerType data() const { return m_data; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const - { - // eigen_assert(checkIndexRange(indices)); - if (PlainObjectType::Options&RowMajor) { - const Index index = m_dimensions.IndexOfRowMajor(indices); - return m_data[index]; - } else { - const Index index = m_dimensions.IndexOfColMajor(indices); - return m_data[index]; - } - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()() const - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) - return m_data[0]; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const - { - eigen_internal_assert(index >= 0 && index < size()); - return m_data[index]; - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const - { - EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (PlainObjectType::Options&RowMajor) { - const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - return m_data[index]; - } else { - const Index index = m_dimensions.IndexOfColMajor(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); - return m_data[index]; - } - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i1 + i0 * m_dimensions[1]; - return m_data[index]; - } else { - const Index index = i0 + i1 * m_dimensions[0]; - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); - return m_data[index]; - } - } -#endif - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices) - { - // eigen_assert(checkIndexRange(indices)); - if (PlainObjectType::Options&RowMajor) { - const Index index = m_dimensions.IndexOfRowMajor(indices); - return m_data[index]; - } else { - const Index index = m_dimensions.IndexOfColMajor(indices); - return m_data[index]; - } - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()() - { - EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) - return m_data[0]; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index index) - { - eigen_internal_assert(index >= 0 && index < size()); - return m_data[index]; - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) - { - static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); - const std::size_t NumDims = sizeof...(otherIndices) + 2; - if (PlainObjectType::Options&RowMajor) { - const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumDims>{{firstIndex, secondIndex, otherIndices...}}); - return m_data[index]; - } else { - const Index index = m_dimensions.IndexOfColMajor(array<Index, NumDims>{{firstIndex, secondIndex, otherIndices...}}); - return m_data[index]; - } - } -#else - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i1 + i0 * m_dimensions[1]; - return m_data[index]; - } else { - const Index index = i0 + i1 * m_dimensions[0]; - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); - return m_data[index]; - } - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) - { - if (PlainObjectType::Options&RowMajor) { - const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); - return m_data[index]; - } else { - const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); - return m_data[index]; - } - } -#endif - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Self& operator=(const Self& other) - { - typedef TensorAssignOp<Self, const Self> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Self& operator=(const OtherDerived& other) - { - typedef TensorAssignOp<Self, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - private: - typename MakePointer_<Scalar>::Type m_data; - Dimensions m_dimensions; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_MAP_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h deleted file mode 100644 index 615559d..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ /dev/null @@ -1,218 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H -#define EIGEN_CXX11_TENSOR_TENSOR_META_H - -namespace Eigen { - -template<bool cond> struct Cond {}; - -template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -const T1& choose(Cond<true>, const T1& first, const T2&) { - return first; -} - -template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -const T2& choose(Cond<false>, const T1&, const T2& second) { - return second; -} - - -template <typename T, typename X, typename Y> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -T divup(const X x, const Y y) { - return static_cast<T>((x + y - 1) / y); -} - -template <typename T> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -T divup(const T x, const T y) { - return static_cast<T>((x + y - 1) / y); -} - -template <size_t n> struct max_n_1 { - static const size_t size = n; -}; -template <> struct max_n_1<0> { - static const size_t size = 1; -}; - - -// Default packet types -template <typename Scalar, typename Device> -struct PacketType : internal::packet_traits<Scalar> { - typedef typename internal::packet_traits<Scalar>::type type; -}; - -// For CUDA packet types when using a GpuDevice -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16) -template <> -struct PacketType<half, GpuDevice> { - typedef half2 type; - static const int size = 2; - enum { - HasAdd = 1, - HasSub = 1, - HasMul = 1, - HasNegate = 1, - HasAbs = 1, - HasArg = 0, - HasAbs2 = 0, - HasMin = 1, - HasMax = 1, - HasConj = 0, - HasSetLinear = 0, - HasBlend = 0, - - HasDiv = 1, - HasSqrt = 1, - HasRsqrt = 1, - HasExp = 1, - HasLog = 1, - HasLog1p = 0, - HasLog10 = 0, - HasPow = 1, - }; -}; -#endif - -#if defined(EIGEN_USE_SYCL) -template <typename T> - struct PacketType<T, SyclDevice> { - typedef T type; - static const int size = 1; - enum { - HasAdd = 0, - HasSub = 0, - HasMul = 0, - HasNegate = 0, - HasAbs = 0, - HasArg = 0, - HasAbs2 = 0, - HasMin = 0, - HasMax = 0, - HasConj = 0, - HasSetLinear = 0, - HasBlend = 0 - }; -}; -#endif - - -// Tuple mimics std::pair but works on e.g. nvcc. -template <typename U, typename V> struct Tuple { - public: - U first; - V second; - - typedef U first_type; - typedef V second_type; - - EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Tuple() : first(), second() {} - - EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Tuple(const U& f, const V& s) : first(f), second(s) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Tuple& operator= (const Tuple& rhs) { - if (&rhs == this) return *this; - first = rhs.first; - second = rhs.second; - return *this; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void swap(Tuple& rhs) { - using numext::swap; - swap(first, rhs.first); - swap(second, rhs.second); - } -}; - -template <typename U, typename V> -EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -bool operator==(const Tuple<U, V>& x, const Tuple<U, V>& y) { - return (x.first == y.first && x.second == y.second); -} - -template <typename U, typename V> -EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -bool operator!=(const Tuple<U, V>& x, const Tuple<U, V>& y) { - return !(x == y); -} - - -// Can't use std::pairs on cuda devices -template <typename Idx> struct IndexPair { - EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) {} - EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Idx f, Idx s) : first(f), second(s) {} - - EIGEN_DEVICE_FUNC void set(IndexPair<Idx> val) { - first = val.first; - second = val.second; - } - - Idx first; - Idx second; -}; - - -#ifdef EIGEN_HAS_SFINAE -namespace internal { - - template<typename IndexType, Index... Is> - EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - array<Index, sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, Is...>) { - return { idx[Is]... }; - } - template<typename IndexType> - EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - array<Index, 0> customIndices2Array(IndexType&, numeric_list<Index>) { - return array<Index, 0>(); - } - - /** Make an array (for index/dimensions) out of a custom index */ - template<typename Index, std::size_t NumIndices, typename IndexType> - EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - array<Index, NumIndices> customIndices2Array(IndexType& idx) { - return customIndices2Array(idx, typename gen_numeric_list<Index, NumIndices>::type{}); - } - - - template <typename B, typename D> - struct is_base_of - { - - typedef char (&yes)[1]; - typedef char (&no)[2]; - - template <typename BB, typename DD> - struct Host - { - operator BB*() const; - operator DD*(); - }; - - template<typename T> - static yes check(D*, T); - static no check(B*, int); - - static const bool value = sizeof(check(Host<B,D>(), int())) == sizeof(yes); - }; - -} -#endif - - - -} // namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_META_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h deleted file mode 100644 index d34f1e3..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ /dev/null @@ -1,888 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H -#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H - -namespace Eigen { - -/** \class TensorReshaping - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor reshaping class. - * - * - */ -namespace internal { -template<typename NewDimensions, typename XprType> -struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = array_size<NewDimensions>::value; - static const int Layout = XprTraits::Layout; -}; - -template<typename NewDimensions, typename XprType> -struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense> -{ - typedef const TensorReshapingOp<NewDimensions, XprType>& type; -}; - -template<typename NewDimensions, typename XprType> -struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type> -{ - typedef TensorReshapingOp<NewDimensions, XprType> type; -}; - -} // end namespace internal - - - -template<typename NewDimensions, typename XprType> -class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims) - : m_xpr(expr), m_dims(dims) {} - - EIGEN_DEVICE_FUNC - const NewDimensions& dimensions() const { return m_dims; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other) - { - typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const NewDimensions m_dims; -}; - - -// Eval as rvalue -template<typename NewDimensions, typename ArgType, typename Device> -struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> -{ - typedef TensorReshapingOp<NewDimensions, ArgType> XprType; - typedef NewDimensions Dimensions; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = TensorEvaluator<ArgType, Device>::RawAccess - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_dimensions(op.dimensions()) - { - // The total size of the reshaped tensor must be equal to the total size - // of the input tensor. - eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions())); - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - return m_impl.evalSubExprsIfNeeded(data); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(index); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - return m_impl.template packet<LoadMode>(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - return m_impl.costPerCoeff(vectorized); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); } - - EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - - protected: - TensorEvaluator<ArgType, Device> m_impl; - NewDimensions m_dimensions; -}; - - -// Eval as lvalue -template<typename NewDimensions, typename ArgType, typename Device> - struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device> - : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> - -{ - typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base; - typedef TensorReshapingOp<NewDimensions, ArgType> XprType; - typedef NewDimensions Dimensions; - - enum { - IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = TensorEvaluator<ArgType, Device>::RawAccess - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - return this->m_impl.coeffRef(index); - } - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - this->m_impl.template writePacket<StoreMode>(index, x); - } -}; - - -/** \class TensorSlicing - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor slicing class. - * - * - */ -namespace internal { -template<typename StartIndices, typename Sizes, typename XprType> -struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = array_size<StartIndices>::value; - static const int Layout = XprTraits::Layout; -}; - -template<typename StartIndices, typename Sizes, typename XprType> -struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense> -{ - typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type; -}; - -template<typename StartIndices, typename Sizes, typename XprType> -struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type> -{ - typedef TensorSlicingOp<StartIndices, Sizes, XprType> type; -}; - -} // end namespace internal - - - -template<typename StartIndices, typename Sizes, typename XprType> -class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes) - : m_xpr(expr), m_indices(indices), m_sizes(sizes) {} - - EIGEN_DEVICE_FUNC - const StartIndices& startIndices() const { return m_indices; } - EIGEN_DEVICE_FUNC - const Sizes& sizes() const { return m_sizes; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other) - { - typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - - protected: - typename XprType::Nested m_xpr; - const StartIndices m_indices; - const Sizes m_sizes; -}; - - -// Fixme: figure out the exact threshold -namespace { -template <typename Index, typename Device> struct MemcpyTriggerForSlicing { - EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { } - EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; } - - private: - Index threshold_; -}; - -// It is very expensive to start the memcpy kernel on GPU: we therefore only -// use it for large copies. -#ifdef EIGEN_USE_GPU -template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice> { - EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { } - EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; } -}; -#endif -} - -// Eval as rvalue -template<typename StartIndices, typename Sizes, typename ArgType, typename Device> -struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> -{ - typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType; - static const int NumDims = internal::array_size<Sizes>::value; - - enum { - // Alignment can't be guaranteed at compile time since it depends on the - // slice offsets and sizes. - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) - { - for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { - eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); - } - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - const Sizes& output_dims = op.sizes(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - } - - // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); - } - } else { - m_inputStrides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - } - - // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed. - m_outputStrides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); - } - } - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef Sizes Dimensions; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { - m_impl.evalSubExprsIfNeeded(NULL); - if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) { - Index contiguous_values = 1; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumDims; ++i) { - contiguous_values *= dimensions()[i]; - if (dimensions()[i] != m_impl.dimensions()[i]) { - break; - } - } - } else { - for (int i = NumDims-1; i >= 0; --i) { - contiguous_values *= dimensions()[i]; - if (dimensions()[i] != m_impl.dimensions()[i]) { - break; - } - } - } - // Use memcpy if it's going to be faster than using the regular evaluation. - const MemcpyTriggerForSlicing<Index, Device> trigger(m_device); - if (trigger(contiguous_values)) { - Scalar* src = (Scalar*)m_impl.data(); - for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { - Index offset = srcCoeff(i); - m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar)); - } - return false; - } - } - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(srcCoeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); - - Index inputIndices[] = {0, 0}; - Index indices[] = {index, index + packetSize - 1}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / m_fastOutputStrides[i]; - const Index idx1 = indices[1] / m_fastOutputStrides[i]; - inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; - inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; - indices[0] -= idx0 * m_outputStrides[i]; - indices[1] -= idx1 * m_outputStrides[i]; - } - inputIndices[0] += (indices[0] + m_offsets[0]); - inputIndices[1] += (indices[1] + m_offsets[0]); - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx0 = indices[0] / m_fastOutputStrides[i]; - const Index idx1 = indices[1] / m_fastOutputStrides[i]; - inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; - inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; - indices[0] -= idx0 * m_outputStrides[i]; - indices[1] -= idx1 * m_outputStrides[i]; - } - inputIndices[0] += (indices[0] + m_offsets[NumDims-1]); - inputIndices[1] += (indices[1] + m_offsets[NumDims-1]); - } - if (inputIndices[1] - inputIndices[0] == packetSize - 1) { - PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); - return rslt; - } - else { - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize]; - values[0] = m_impl.coeff(inputIndices[0]); - values[packetSize-1] = m_impl.coeff(inputIndices[1]); - for (int i = 1; i < packetSize-1; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); - } - - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { - Scalar* result = m_impl.data(); - if (result) { - Index offset = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumDims; ++i) { - if (m_dimensions[i] != m_impl.dimensions()[i]) { - offset += m_offsets[i] * m_inputStrides[i]; - for (int j = i+1; j < NumDims; ++j) { - if (m_dimensions[j] > 1) { - return NULL; - } - offset += m_offsets[j] * m_inputStrides[j]; - } - break; - } - } - } else { - for (int i = NumDims - 1; i >= 0; --i) { - if (m_dimensions[i] != m_impl.dimensions()[i]) { - offset += m_offsets[i] * m_inputStrides[i]; - for (int j = i-1; j >= 0; --j) { - if (m_dimensions[j] > 1) { - return NULL; - } - offset += m_offsets[j] * m_inputStrides[j]; - } - break; - } - } - } - return result + offset; - } - return NULL; - } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const - { - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_fastOutputStrides[i]; - inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - inputIndex += (index + m_offsets[0]); - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_fastOutputStrides[i]; - inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - inputIndex += (index + m_offsets[NumDims-1]); - } - return inputIndex; - } - - array<Index, NumDims> m_outputStrides; - array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; - const Device& m_device; - Dimensions m_dimensions; - const StartIndices m_offsets; -}; - - -// Eval as lvalue -template<typename StartIndices, typename Sizes, typename ArgType, typename Device> -struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> - : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> -{ - typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base; - typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType; - static const int NumDims = internal::array_size<Sizes>::value; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef Sizes Dimensions; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - return this->m_impl.coeffRef(this->srcCoeff(index)); - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - Index inputIndices[] = {0, 0}; - Index indices[] = {index, index + packetSize - 1}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; - const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; - inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; - inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; - indices[0] -= idx0 * this->m_outputStrides[i]; - indices[1] -= idx1 * this->m_outputStrides[i]; - } - inputIndices[0] += (indices[0] + this->m_offsets[0]); - inputIndices[1] += (indices[1] + this->m_offsets[0]); - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; - const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; - inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; - inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; - indices[0] -= idx0 * this->m_outputStrides[i]; - indices[1] -= idx1 * this->m_outputStrides[i]; - } - inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]); - inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]); - } - if (inputIndices[1] - inputIndices[0] == packetSize - 1) { - this->m_impl.template writePacket<StoreMode>(inputIndices[0], x); - } - else { - EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - this->m_impl.coeffRef(inputIndices[0]) = values[0]; - this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; - for (int i = 1; i < packetSize-1; ++i) { - this->coeffRef(index+i) = values[i]; - } - } - } -}; - - - -namespace internal { -template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> -struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = array_size<StartIndices>::value; - static const int Layout = XprTraits::Layout; -}; - -template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> -struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense> -{ - typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type; -}; - -template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> -struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type> -{ - typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type; -}; - -} // end namespace internal - - -template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> -class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > -{ - public: - typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename internal::nested<TensorStridingSlicingOp>::type Nested; - typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind; - typedef typename internal::traits<TensorStridingSlicingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp( - const XprType& expr, const StartIndices& startIndices, - const StopIndices& stopIndices, const Strides& strides) - : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices), - m_strides(strides) {} - - EIGEN_DEVICE_FUNC - const StartIndices& startIndices() const { return m_startIndices; } - EIGEN_DEVICE_FUNC - const StartIndices& stopIndices() const { return m_stopIndices; } - EIGEN_DEVICE_FUNC - const StartIndices& strides() const { return m_strides; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other) - { - typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run( - assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const StartIndices m_startIndices; - const StopIndices m_stopIndices; - const Strides m_strides; -}; - -// Eval as rvalue -template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device> -struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> -{ - typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType; - static const int NumDims = internal::array_size<Strides>::value; - - enum { - // Alignment can't be guaranteed at compile time since it depends on the - // slice offsets and sizes. - IsAligned = false, - PacketAccess = false, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()) - { - // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero - DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped; - for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { - eigen_assert(m_strides[i] != 0 && "0 stride is invalid"); - if(m_strides[i]>0){ - startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]); - stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]); - }else{ - /* implies m_strides[i]<0 by assert */ - startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1); - stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1); - } - m_startIndices[i] = startIndicesClamped[i]; - } - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - - // check for degenerate intervals and compute output tensor shape - bool degenerate = false;; - for(int i = 0; i < NumDims; i++){ - Index interval = stopIndicesClamped[i] - startIndicesClamped[i]; - if(interval == 0 || ((interval<0) != (m_strides[i]<0))){ - m_dimensions[i] = 0; - degenerate = true; - }else{ - m_dimensions[i] = interval / m_strides[i] - + (interval % m_strides[i] != 0 ? 1 : 0); - eigen_assert(m_dimensions[i] >= 0); - } - } - Strides output_dims = m_dimensions; - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputStrides[0] = m_strides[0]; - m_offsets[0] = startIndicesClamped[0]; - Index previousDimProduct = 1; - for (int i = 1; i < NumDims; ++i) { - previousDimProduct *= input_dims[i-1]; - m_inputStrides[i] = previousDimProduct * m_strides[i]; - m_offsets[i] = startIndicesClamped[i] * previousDimProduct; - } - - // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; - // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); - } - } else { - m_inputStrides[NumDims-1] = m_strides[NumDims-1]; - m_offsets[NumDims-1] = startIndicesClamped[NumDims-1]; - Index previousDimProduct = 1; - for (int i = NumDims - 2; i >= 0; --i) { - previousDimProduct *= input_dims[i+1]; - m_inputStrides[i] = previousDimProduct * m_strides[i]; - m_offsets[i] = startIndicesClamped[i] * previousDimProduct; - } - - m_outputStrides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; - // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash - m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); - } - } - m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1), - device.lastLevelCacheSize() / - sizeof(Scalar)); - } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::remove_const<Scalar>::type ScalarNonConst; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef Strides Dimensions; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(srcCoeff(index)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { - return NULL; - } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const - { - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i >= 0; --i) { - const Index idx = index / m_fastOutputStrides[i]; - inputIndex += idx * m_inputStrides[i] + m_offsets[i]; - index -= idx * m_outputStrides[i]; - } - } else { - for (int i = 0; i < NumDims; ++i) { - const Index idx = index / m_fastOutputStrides[i]; - inputIndex += idx * m_inputStrides[i] + m_offsets[i]; - index -= idx * m_outputStrides[i]; - } - } - return inputIndex; - } - - static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) { - return numext::maxi(min, numext::mini(max,value)); - } - - array<Index, NumDims> m_outputStrides; - array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; - const Device& m_device; - DSizes<Index, NumDims> m_startIndices; // clamped startIndices - DSizes<Index, NumDims> m_dimensions; - DSizes<Index, NumDims> m_offsets; // offset in a flattened shape - const Strides m_strides; - std::size_t m_block_total_size_max; -}; - -// Eval as lvalue -template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device> -struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> - : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> -{ - typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base; - typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType; - static const int NumDims = internal::array_size<Strides>::value; - - enum { - IsAligned = false, - PacketAccess = false, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename internal::remove_const<Scalar>::type ScalarNonConst; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef Strides Dimensions; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - return this->m_impl.coeffRef(this->srcCoeff(index)); - } -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h deleted file mode 100644 index 647bcf1..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ /dev/null @@ -1,397 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H -#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H - -namespace Eigen { - -/** \class TensorPadding - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor padding class. - * At the moment only padding with a constant value is supported. - * - */ -namespace internal { -template<typename PaddingDimensions, typename XprType> -struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename PaddingDimensions, typename XprType> -struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense> -{ - typedef const TensorPaddingOp<PaddingDimensions, XprType>& type; -}; - -template<typename PaddingDimensions, typename XprType> -struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type> -{ - typedef TensorPaddingOp<PaddingDimensions, XprType> type; -}; - -} // end namespace internal - - - -template<typename PaddingDimensions, typename XprType> -class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value) - : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {} - - EIGEN_DEVICE_FUNC - const PaddingDimensions& padding() const { return m_padding_dims; } - EIGEN_DEVICE_FUNC - Scalar padding_value() const { return m_padding_value; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const PaddingDimensions m_padding_dims; - const Scalar m_padding_value; -}; - - -// Eval as rvalue -template<typename PaddingDimensions, typename ArgType, typename Device> -struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device> -{ - typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<PaddingDimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = true, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = true, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()) - { - // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead - // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector - // of 1 element first and then pad. - EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - - // Compute dimensions - m_dimensions = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - m_dimensions[i] += m_padding[i].first + m_padding[i].second; - } - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputStrides[0] = 1; - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - } - m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1]; - } else { - m_inputStrides[NumDims - 1] = 1; - m_outputStrides[NumDims] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1]; - } - m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0]; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - eigen_assert(index < dimensions().TotalSize()); - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - if (isPaddingAtIndexForDim(idx, i)) { - return m_paddingValue; - } - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - if (isPaddingAtIndexForDim(index, 0)) { - return m_paddingValue; - } - inputIndex += (index - m_padding[0].first); - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i+1]; - if (isPaddingAtIndexForDim(idx, i)) { - return m_paddingValue; - } - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - index -= idx * m_outputStrides[i+1]; - } - if (isPaddingAtIndexForDim(index, NumDims-1)) { - return m_paddingValue; - } - inputIndex += (index - m_padding[NumDims-1].first); - } - return m_impl.coeff(inputIndex); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return packetColMajor(index); - } - return packetRowMajor(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - TensorOpCost cost = m_impl.costPerCoeff(vectorized); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumDims; ++i) - updateCostPerDimension(cost, i, i == 0); - } else { - for (int i = NumDims - 1; i >= 0; --i) - updateCostPerDimension(cost, i, i == NumDims - 1); - } - return cost; - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - private: - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim( - Index index, int dim_index) const { -#if defined(EIGEN_HAS_INDEX_LIST) - return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) && - index < m_padding[dim_index].first) || - (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) && - index >= m_dimensions[dim_index] - m_padding[dim_index].second); -#else - return (index < m_padding[dim_index].first) || - (index >= m_dimensions[dim_index] - m_padding[dim_index].second); -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero( - int dim_index) const { -#if defined(EIGEN_HAS_INDEX_LIST) - return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0); -#else - EIGEN_UNUSED_VARIABLE(dim_index); - return false; -#endif - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero( - int dim_index) const { -#if defined(EIGEN_HAS_INDEX_LIST) - return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0); -#else - EIGEN_UNUSED_VARIABLE(dim_index); - return false; -#endif - } - - - void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const { - const double in = static_cast<double>(m_impl.dimensions()[i]); - const double out = in + m_padding[i].first + m_padding[i].second; - if (out == 0) - return; - const double reduction = in / out; - cost *= reduction; - if (first) { - cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() + - reduction * (1 * TensorOpCost::AddCost<Index>())); - } else { - cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() + - 2 * TensorOpCost::MulCost<Index>() + - reduction * (2 * TensorOpCost::MulCost<Index>() + - 1 * TensorOpCost::DivCost<Index>())); - } - } - - protected: - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - const Index initialIndex = index; - Index inputIndex = 0; - for (int i = NumDims - 1; i > 0; --i) { - const Index first = index; - const Index last = index + PacketSize - 1; - const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i]; - const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i]; - const Index lastPaddedRight = m_outputStrides[i+1]; - - if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { - // all the coefficient are between the 2 padding zones. - const Index idx = index / m_outputStrides[i]; - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - else { - // Every other case - return packetWithPossibleZero(initialIndex); - } - } - - const Index last = index + PacketSize - 1; - const Index first = index; - const Index lastPaddedLeft = m_padding[0].first; - const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second); - const Index lastPaddedRight = m_outputStrides[1]; - - if (!isLeftPaddingCompileTimeZero(0) && last < lastPaddedLeft) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (!isRightPaddingCompileTimeZero(0) && first >= firstPaddedRight && last < lastPaddedRight) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { - // all the coefficient are between the 2 padding zones. - inputIndex += (index - m_padding[0].first); - return m_impl.template packet<Unaligned>(inputIndex); - } - // Every other case - return packetWithPossibleZero(initialIndex); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - const Index initialIndex = index; - Index inputIndex = 0; - - for (int i = 0; i < NumDims - 1; ++i) { - const Index first = index; - const Index last = index + PacketSize - 1; - const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1]; - const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1]; - const Index lastPaddedRight = m_outputStrides[i]; - - if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { - // all the coefficient are between the 2 padding zones. - const Index idx = index / m_outputStrides[i+1]; - inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; - index -= idx * m_outputStrides[i+1]; - } - else { - // Every other case - return packetWithPossibleZero(initialIndex); - } - } - - const Index last = index + PacketSize - 1; - const Index first = index; - const Index lastPaddedLeft = m_padding[NumDims-1].first; - const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second); - const Index lastPaddedRight = m_outputStrides[NumDims-1]; - - if (!isLeftPaddingCompileTimeZero(NumDims-1) && last < lastPaddedLeft) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if (!isRightPaddingCompileTimeZero(NumDims-1) && first >= firstPaddedRight && last < lastPaddedRight) { - // all the coefficient are in the padding zone. - return internal::pset1<PacketReturnType>(m_paddingValue); - } - else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { - // all the coefficient are between the 2 padding zones. - inputIndex += (index - m_padding[NumDims-1].first); - return m_impl.template packet<Unaligned>(inputIndex); - } - // Every other case - return packetWithPossibleZero(initialIndex); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const - { - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - for (int i = 0; i < PacketSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - Dimensions m_dimensions; - array<Index, NumDims+1> m_outputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; - PaddingDimensions m_padding; - - Scalar m_paddingValue; -}; - - - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h deleted file mode 100644 index 886a254..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +++ /dev/null @@ -1,269 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H -#define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H - -namespace Eigen { - -/** \class TensorPatch - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor patch class. - * - * - */ -namespace internal { -template<typename PatchDim, typename XprType> -struct traits<TensorPatchOp<PatchDim, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions + 1; - static const int Layout = XprTraits::Layout; -}; - -template<typename PatchDim, typename XprType> -struct eval<TensorPatchOp<PatchDim, XprType>, Eigen::Dense> -{ - typedef const TensorPatchOp<PatchDim, XprType>& type; -}; - -template<typename PatchDim, typename XprType> -struct nested<TensorPatchOp<PatchDim, XprType>, 1, typename eval<TensorPatchOp<PatchDim, XprType> >::type> -{ - typedef TensorPatchOp<PatchDim, XprType> type; -}; - -} // end namespace internal - - - -template<typename PatchDim, typename XprType> -class TensorPatchOp : public TensorBase<TensorPatchOp<PatchDim, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorPatchOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorPatchOp>::type Nested; - typedef typename Eigen::internal::traits<TensorPatchOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorPatchOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPatchOp(const XprType& expr, const PatchDim& patch_dims) - : m_xpr(expr), m_patch_dims(patch_dims) {} - - EIGEN_DEVICE_FUNC - const PatchDim& patch_dims() const { return m_patch_dims; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const PatchDim m_patch_dims; -}; - - -// Eval as rvalue -template<typename PatchDim, typename ArgType, typename Device> -struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device> -{ - typedef TensorPatchOp<PatchDim, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - Index num_patches = 1; - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - const PatchDim& patch_dims = op.patch_dims(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < NumDims-1; ++i) { - m_dimensions[i] = patch_dims[i]; - num_patches *= (input_dims[i] - patch_dims[i] + 1); - } - m_dimensions[NumDims-1] = num_patches; - - m_inputStrides[0] = 1; - m_patchStrides[0] = 1; - for (int i = 1; i < NumDims-1; ++i) { - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_patchStrides[i] = m_patchStrides[i-1] * (input_dims[i-1] - patch_dims[i-1] + 1); - } - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - } - } else { - for (int i = 0; i < NumDims-1; ++i) { - m_dimensions[i+1] = patch_dims[i]; - num_patches *= (input_dims[i] - patch_dims[i] + 1); - } - m_dimensions[0] = num_patches; - - m_inputStrides[NumDims-2] = 1; - m_patchStrides[NumDims-2] = 1; - for (int i = NumDims-3; i >= 0; --i) { - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - m_patchStrides[i] = m_patchStrides[i+1] * (input_dims[i+1] - patch_dims[i+1] + 1); - } - m_outputStrides[NumDims-1] = 1; - for (int i = NumDims-2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0; - // Find the location of the first element of the patch. - Index patchIndex = index / m_outputStrides[output_stride_index]; - // Find the offset of the element wrt the location of the first element. - Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index]; - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 2; i > 0; --i) { - const Index patchIdx = patchIndex / m_patchStrides[i]; - patchIndex -= patchIdx * m_patchStrides[i]; - const Index offsetIdx = patchOffset / m_outputStrides[i]; - patchOffset -= offsetIdx * m_outputStrides[i]; - inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; - } - } else { - for (int i = 0; i < NumDims - 2; ++i) { - const Index patchIdx = patchIndex / m_patchStrides[i]; - patchIndex -= patchIdx * m_patchStrides[i]; - const Index offsetIdx = patchOffset / m_outputStrides[i+1]; - patchOffset -= offsetIdx * m_outputStrides[i+1]; - inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; - } - } - inputIndex += (patchIndex + patchOffset); - return m_impl.coeff(inputIndex); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0; - Index indices[2] = {index, index + PacketSize - 1}; - Index patchIndices[2] = {indices[0] / m_outputStrides[output_stride_index], - indices[1] / m_outputStrides[output_stride_index]}; - Index patchOffsets[2] = {indices[0] - patchIndices[0] * m_outputStrides[output_stride_index], - indices[1] - patchIndices[1] * m_outputStrides[output_stride_index]}; - - Index inputIndices[2] = {0, 0}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 2; i > 0; --i) { - const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], - patchIndices[1] / m_patchStrides[i]}; - patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; - patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; - - const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i], - patchOffsets[1] / m_outputStrides[i]}; - patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i]; - patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i]; - - inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; - inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; - } - } else { - for (int i = 0; i < NumDims - 2; ++i) { - const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], - patchIndices[1] / m_patchStrides[i]}; - patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; - patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; - - const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i+1], - patchOffsets[1] / m_outputStrides[i+1]}; - patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i+1]; - patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i+1]; - - inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; - inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; - } - } - inputIndices[0] += (patchIndices[0] + patchOffsets[0]); - inputIndices[1] += (patchIndices[1] + patchOffsets[1]); - - if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { - PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); - return rslt; - } - else { - EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; - values[0] = m_impl.coeff(inputIndices[0]); - values[PacketSize-1] = m_impl.coeff(inputIndices[1]); - for (int i = 1; i < PacketSize-1; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - const double compute_cost = NumDims * (TensorOpCost::DivCost<Index>() + - TensorOpCost::MulCost<Index>() + - 2 * TensorOpCost::AddCost<Index>()); - return m_impl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims-1> m_inputStrides; - array<Index, NumDims-1> m_patchStrides; - - TensorEvaluator<ArgType, Device> m_impl; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_PATCH_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h deleted file mode 100644 index 1655a81..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +++ /dev/null @@ -1,276 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H -#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H - -namespace Eigen { -namespace internal { - -namespace { - -EIGEN_DEVICE_FUNC uint64_t get_random_seed() { -#ifdef __CUDA_ARCH__ - // We don't support 3d kernels since we currently only use 1 and - // 2d kernels. - assert(threadIdx.z == 0); - return clock64() + - blockIdx.x * blockDim.x + threadIdx.x + - gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); - -#elif defined _WIN32 - // Use the current time as a baseline. - SYSTEMTIME st; - GetSystemTime(&st); - int time = st.wSecond + 1000 * st.wMilliseconds; - // Mix in a random number to make sure that we get different seeds if - // we try to generate seeds faster than the clock resolution. - // We need 2 random values since the generator only generate 16 bits at - // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx) - int rnd1 = ::rand(); - int rnd2 = ::rand(); - uint64_t rnd = (rnd1 | rnd2 << 16) ^ time; - return rnd; - -#elif defined __APPLE__ - // Same approach as for win32, except that the random number generator - // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random). - uint64_t rnd = ::random() ^ mach_absolute_time(); - return rnd; - -#else - // Augment the current time with pseudo random number generation - // to ensure that we get different seeds if we try to generate seeds - // faster than the clock resolution. - timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - uint64_t rnd = ::random() ^ ts.tv_nsec; - return rnd; -#endif -} - -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) { - // TODO: Unify with the implementation in the non blocking thread pool. - uint64_t current = *state; - // Update the internal state - *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; - // Generate the random output (using the PCG-XSH-RS scheme) - return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61))); -} - -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) { - seed = seed ? seed : get_random_seed(); - return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; -} - -} // namespace - - -template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -T RandomToTypeUniform(uint64_t* state) { - unsigned rnd = PCG_XSH_RS_generator(state); - return static_cast<T>(rnd); -} - - -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state) { - Eigen::half result; - // Generate 10 random bits for the mantissa - unsigned rnd = PCG_XSH_RS_generator(state); - result.x = static_cast<uint16_t>(rnd & 0x3ffu); - // Set the exponent - result.x |= (static_cast<uint16_t>(15) << 10); - // Return the final result - return result - Eigen::half(1.0f); -} - - -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float RandomToTypeUniform<float>(uint64_t* state) { - typedef union { - uint32_t raw; - float fp; - } internal; - internal result; - // Generate 23 random bits for the mantissa mantissa - const unsigned rnd = PCG_XSH_RS_generator(state); - result.raw = rnd & 0x7fffffu; - // Set the exponent - result.raw |= (static_cast<uint32_t>(127) << 23); - // Return the final result - return result.fp - 1.0f; -} - -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double RandomToTypeUniform<double>(uint64_t* state) { - typedef union { - uint64_t raw; - double dp; - } internal; - internal result; - result.raw = 0; - // Generate 52 random bits for the mantissa - // First generate the upper 20 bits - unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu; - // The generate the lower 32 bits - unsigned rnd2 = PCG_XSH_RS_generator(state); - result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2; - // Set the exponent - result.raw |= (static_cast<uint64_t>(1023) << 52); - // Return the final result - return result.dp - 1.0; -} - -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state) { - return std::complex<float>(RandomToTypeUniform<float>(state), - RandomToTypeUniform<float>(state)); -} -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state) { - return std::complex<double>(RandomToTypeUniform<double>(state), - RandomToTypeUniform<double>(state)); -} - -template <typename T> class UniformRandomGenerator { - public: - static const bool PacketAccess = true; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( - uint64_t seed = 0) { - m_state = PCG_XSH_RS_state(seed); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( - const UniformRandomGenerator& other) { - m_state = other.m_state; - } - - template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T operator()(Index i) const { - uint64_t local_state = m_state + i; - T result = RandomToTypeUniform<T>(&local_state); - m_state = local_state; - return result; - } - - template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Packet packetOp(Index i) const { - const int packetSize = internal::unpacket_traits<Packet>::size; - EIGEN_ALIGN_MAX T values[packetSize]; - uint64_t local_state = m_state + i; - for (int j = 0; j < packetSize; ++j) { - values[j] = RandomToTypeUniform<T>(&local_state); - } - m_state = local_state; - return internal::pload<Packet>(values); - } - - private: - mutable uint64_t m_state; -}; - -template <typename Scalar> -struct functor_traits<UniformRandomGenerator<Scalar> > { - enum { - // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)). - Cost = 12 * NumTraits<Scalar>::AddCost * - ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)), - PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess - }; -}; - - - -template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -T RandomToTypeNormal(uint64_t* state) { - // Use the ratio of uniform method to generate numbers following a normal - // distribution. See for example Numerical Recipes chapter 7.3.9 for the - // details. - T u, v, q; - do { - u = RandomToTypeUniform<T>(state); - v = T(1.7156) * (RandomToTypeUniform<T>(state) - T(0.5)); - const T x = u - T(0.449871); - const T y = numext::abs(v) + T(0.386595); - q = x*x + y * (T(0.196)*y - T(0.25472)*x); - } while (q > T(0.27597) && - (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u)); - - return v/u; -} - -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state) { - return std::complex<float>(RandomToTypeNormal<float>(state), - RandomToTypeNormal<float>(state)); -} -template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state) { - return std::complex<double>(RandomToTypeNormal<double>(state), - RandomToTypeNormal<double>(state)); -} - - -template <typename T> class NormalRandomGenerator { - public: - static const bool PacketAccess = true; - - // Uses the given "seed" if non-zero, otherwise uses a random seed. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { - m_state = PCG_XSH_RS_state(seed); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( - const NormalRandomGenerator& other) { - m_state = other.m_state; - } - - template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T operator()(Index i) const { - uint64_t local_state = m_state + i; - T result = RandomToTypeNormal<T>(&local_state); - m_state = local_state; - return result; - } - - template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Packet packetOp(Index i) const { - const int packetSize = internal::unpacket_traits<Packet>::size; - EIGEN_ALIGN_MAX T values[packetSize]; - uint64_t local_state = m_state + i; - for (int j = 0; j < packetSize; ++j) { - values[j] = RandomToTypeNormal<T>(&local_state); - } - m_state = local_state; - return internal::pload<Packet>(values); - } - - private: - mutable uint64_t m_state; -}; - - -template <typename Scalar> -struct functor_traits<NormalRandomGenerator<Scalar> > { - enum { - // On average, we need to generate about 3 random numbers - // 15 mul, 8 add, 1.5 logs - Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost + - 15 * NumTraits<Scalar>::AddCost + 8 * NumTraits<Scalar>::AddCost + - 3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2, - PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess - }; -}; - - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h deleted file mode 100644 index 41d0d00..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ /dev/null @@ -1,781 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// Copyright (C) 2016 Mehdi Goli, Codeplay Software Ltd <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H -#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H - -namespace Eigen { - -/** \class TensorReduction - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor reduction class. - * - */ - -namespace internal { - template<typename Op, typename Dims, typename XprType,template <class> class MakePointer_ > - struct traits<TensorReductionOp<Op, Dims, XprType, MakePointer_> > - : traits<XprType> -{ - typedef traits<XprType> XprTraits; - typedef typename XprTraits::Scalar Scalar; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value; - static const int Layout = XprTraits::Layout; - - template <class T> struct MakePointer { - // Intermediate typedef to workaround MSVC issue. - typedef MakePointer_<T> MakePointerT; - typedef typename MakePointerT::Type Type; - }; -}; - -template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_> -struct eval<TensorReductionOp<Op, Dims, XprType, MakePointer_>, Eigen::Dense> -{ - typedef const TensorReductionOp<Op, Dims, XprType, MakePointer_>& type; -}; - -template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_> -struct nested<TensorReductionOp<Op, Dims, XprType, MakePointer_>, 1, typename eval<TensorReductionOp<Op, Dims, XprType, MakePointer_> >::type> -{ - typedef TensorReductionOp<Op, Dims, XprType, MakePointer_> type; -}; - - -template <typename OutputDims> struct DimInitializer { - template <typename InputDims, typename ReducedDims> EIGEN_DEVICE_FUNC - static void run(const InputDims& input_dims, - const array<bool, internal::array_size<InputDims>::value>& reduced, - OutputDims* output_dims, ReducedDims* reduced_dims) { - const int NumInputDims = internal::array_size<InputDims>::value; - int outputIndex = 0; - int reduceIndex = 0; - for (int i = 0; i < NumInputDims; ++i) { - if (reduced[i]) { - (*reduced_dims)[reduceIndex] = input_dims[i]; - ++reduceIndex; - } else { - (*output_dims)[outputIndex] = input_dims[i]; - ++outputIndex; - } - } - } -}; - -template <> struct DimInitializer<Sizes<> > { - template <typename InputDims, typename Index, size_t Rank> EIGEN_DEVICE_FUNC - static void run(const InputDims& input_dims, const array<bool, Rank>&, - Sizes<>*, array<Index, Rank>* reduced_dims) { - const int NumInputDims = internal::array_size<InputDims>::value; - for (int i = 0; i < NumInputDims; ++i) { - (*reduced_dims)[i] = input_dims[i]; - } - } -}; - - -template <typename ReducedDims, int NumTensorDims, int Layout> -struct are_inner_most_dims { - static const bool value = false; -}; -template <typename ReducedDims, int NumTensorDims, int Layout> -struct preserve_inner_most_dims { - static const bool value = false; -}; - -#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES -template <typename ReducedDims, int NumTensorDims> -struct are_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{ - static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>(); - static const bool tmp2 = index_statically_eq<ReducedDims>(0, 0); - static const bool tmp3 = index_statically_eq<ReducedDims>(array_size<ReducedDims>::value-1, array_size<ReducedDims>::value-1); - static const bool value = tmp1 & tmp2 & tmp3; -}; -template <typename ReducedDims, int NumTensorDims> -struct are_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{ - static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>(); - static const bool tmp2 = index_statically_eq<ReducedDims>(0, NumTensorDims - array_size<ReducedDims>::value); - static const bool tmp3 = index_statically_eq<ReducedDims>(array_size<ReducedDims>::value - 1, NumTensorDims - 1); - static const bool value = tmp1 & tmp2 & tmp3; - -}; -template <typename ReducedDims, int NumTensorDims> -struct preserve_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{ - static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>(); - static const bool tmp2 = index_statically_gt<ReducedDims>(0, 0); - static const bool value = tmp1 & tmp2; - -}; -template <typename ReducedDims, int NumTensorDims> -struct preserve_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{ - static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>(); - static const bool tmp2 = index_statically_lt<ReducedDims>(array_size<ReducedDims>::value - 1, NumTensorDims - 1); - static const bool value = tmp1 & tmp2; -}; -#endif - - -template <int DimIndex, typename Self, typename Op> -struct GenericDimReducer { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { - EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) { - const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; - GenericDimReducer<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum); - } - } -}; -template <typename Self, typename Op> -struct GenericDimReducer<0, Self, Op> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { - for (int j = 0; j < self.m_reducedDims[0]; ++j) { - const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; - reducer.reduce(self.m_impl.coeff(input), accum); - } - } -}; -template <typename Self, typename Op> -struct GenericDimReducer<-1, Self, Op> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index index, Op& reducer, typename Self::CoeffReturnType* accum) { - reducer.reduce(self.m_impl.coeff(index), accum); - } -}; - -template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> -struct InnerMostDimReducer { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { - typename Self::CoeffReturnType accum = reducer.initialize(); - for (typename Self::Index j = 0; j < numValuesToReduce; ++j) { - reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); - } - return reducer.finalize(accum); - } -}; - -template <typename Self, typename Op> -struct InnerMostDimReducer<Self, Op, true> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { - const int packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size; - const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize; - typename Self::PacketReturnType p = reducer.template initializePacket<typename Self::PacketReturnType>(); - for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) { - reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &p); - } - typename Self::CoeffReturnType accum = reducer.initialize(); - for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) { - reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); - } - return reducer.finalizeBoth(accum, p); - } -}; - -template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> -struct InnerMostDimPreserver { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { - eigen_assert(false && "should never be called"); - } -}; - -template <int DimIndex, typename Self, typename Op> -struct InnerMostDimPreserver<DimIndex, Self, Op, true> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { - EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) { - const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; - InnerMostDimPreserver<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum); - } - } -}; - -template <typename Self, typename Op> -struct InnerMostDimPreserver<0, Self, Op, true> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { - for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) { - const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; - reducer.reducePacket(self.m_impl.template packet<Unaligned>(input), accum); - } - } -}; -template <typename Self, typename Op> -struct InnerMostDimPreserver<-1, Self, Op, true> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { - eigen_assert(false && "should never be called"); - } -}; - -// Default full reducer -template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> -struct FullReducer { - static const bool HasOptimizedImplementation = false; - - static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) { - const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions()); - *output = InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer); - } -}; - - -#ifdef EIGEN_USE_THREADS -// Multithreaded full reducers -template <typename Self, typename Op, - bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)> -struct FullReducerShard { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Self& self, typename Self::Index firstIndex, - typename Self::Index numValuesToReduce, Op& reducer, - typename Self::CoeffReturnType* output) { - *output = InnerMostDimReducer<Self, Op, Vectorizable>::reduce( - self, firstIndex, numValuesToReduce, reducer); - } -}; - -// Multithreaded full reducer -template <typename Self, typename Op, bool Vectorizable> -struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> { - static const bool HasOptimizedImplementation = !Op::IsStateful; - static const int PacketSize = - unpacket_traits<typename Self::PacketReturnType>::size; - - // launch one reducer per thread and accumulate the result. - static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, - typename Self::CoeffReturnType* output) { - typedef typename Self::Index Index; - const Index num_coeffs = array_prod(self.m_impl.dimensions()); - if (num_coeffs == 0) { - *output = reducer.finalize(reducer.initialize()); - return; - } - const TensorOpCost cost = - self.m_impl.costPerCoeff(Vectorizable) + - TensorOpCost(0, 0, internal::functor_traits<Op>::Cost, Vectorizable, - PacketSize); - const int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads( - num_coeffs, cost, device.numThreads()); - if (num_threads == 1) { - *output = - InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer); - return; - } - const Index blocksize = - std::floor<Index>(static_cast<float>(num_coeffs) / num_threads); - const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; - eigen_assert(num_coeffs >= numblocks * blocksize); - - Barrier barrier(internal::convert_index<unsigned int>(numblocks)); - MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize()); - for (Index i = 0; i < numblocks; ++i) { - device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, Vectorizable>::run, - self, i * blocksize, blocksize, reducer, - &shards[i]); - } - typename Self::CoeffReturnType finalShard; - if (numblocks * blocksize < num_coeffs) { - finalShard = InnerMostDimReducer<Self, Op, Vectorizable>::reduce( - self, numblocks * blocksize, num_coeffs - numblocks * blocksize, - reducer); - } else { - finalShard = reducer.initialize(); - } - barrier.Wait(); - - for (Index i = 0; i < numblocks; ++i) { - reducer.reduce(shards[i], &finalShard); - } - *output = reducer.finalize(finalShard); - } -}; - -#endif - - -// Default inner reducer -template <typename Self, typename Op, typename Device> -struct InnerReducer { - static const bool HasOptimizedImplementation = false; - - EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { - eigen_assert(false && "Not implemented"); - return true; - } -}; - -// Default outer reducer -template <typename Self, typename Op, typename Device> -struct OuterReducer { - static const bool HasOptimizedImplementation = false; - - EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { - eigen_assert(false && "Not implemented"); - return true; - } -}; - - -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) -template <int B, int N, typename S, typename R, typename I> -__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); - - -#ifdef EIGEN_HAS_CUDA_FP16 -template <typename S, typename R, typename I> -__global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); -template <int B, int N, typename S, typename R, typename I> -__global__ void FullReductionKernelHalfFloat(R, const S, I, half*, half2*); -template <int NPT, typename S, typename R, typename I> -__global__ void InnerReductionKernelHalfFloat(R, const S, I, I, half*); - -#endif - -template <int NPT, typename S, typename R, typename I> -__global__ void InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); - -template <int NPT, typename S, typename R, typename I> -__global__ void OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); -#endif - -} // end namespace internal - - -template <typename Op, typename Dims, typename XprType, template <class> class MakePointer_> -class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType, MakePointer_>, ReadOnlyAccessors> { - public: - typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename Eigen::internal::nested<TensorReductionOp>::type Nested; - typedef typename Eigen::internal::traits<TensorReductionOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorReductionOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims) - { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const XprType& expression() const { return m_expr; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Dims& dims() const { return m_dims; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Op& reducer() const { return m_reducer; } - - protected: - typename XprType::Nested m_expr; - const Dims m_dims; - const Op m_reducer; -}; - - -// Eval as rvalue -template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> -struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> -{ - typedef TensorReductionOp<Op, Dims, ArgType, MakePointer_> XprType; - typedef typename XprType::Index Index; - typedef ArgType ChildType; - typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; - static const int NumInputDims = internal::array_size<InputDimensions>::value; - static const int NumReducedDims = internal::array_size<Dims>::value; - static const int NumOutputDims = NumInputDims - NumReducedDims; - typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions; - typedef typename XprType::Scalar Scalar; - typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self; - static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess; - typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = Self::InputPacketAccess && Op::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value; - static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value; - static const bool RunningFullReduction = (NumOutputDims==0); - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device), m_xpr_dims(op.dims()) - { - EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), - YOU_MADE_A_PROGRAMMING_MISTAKE); - - // Build the bitmap indicating if an input dimension is reduced or not. - for (int i = 0; i < NumInputDims; ++i) { - m_reduced[i] = false; - } - for (int i = 0; i < NumReducedDims; ++i) { - eigen_assert(op.dims()[i] >= 0); - eigen_assert(op.dims()[i] < NumInputDims); - m_reduced[op.dims()[i]] = true; - } - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - internal::DimInitializer<Dimensions>::run(input_dims, m_reduced, &m_dimensions, &m_reducedDims); - - // Precompute output strides. - if (NumOutputDims > 0) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_outputStrides[0] = 1; - for (int i = 1; i < NumOutputDims; ++i) { - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - } - } else { - m_outputStrides.back() = 1; - for (int i = NumOutputDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - } - } - } - - // Precompute input strides. - if (NumInputDims > 0) { - array<Index, NumInputDims> input_strides; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - input_strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - input_strides[i] = input_strides[i-1] * input_dims[i-1]; - } - } else { - input_strides.back() = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; - } - } - - int outputIndex = 0; - int reduceIndex = 0; - for (int i = 0; i < NumInputDims; ++i) { - if (m_reduced[i]) { - m_reducedStrides[reduceIndex] = input_strides[i]; - ++reduceIndex; - } else { - m_preservedStrides[outputIndex] = input_strides[i]; - ++outputIndex; - } - } - } - - // Special case for full reductions - if (NumOutputDims == 0) { - m_preservedStrides[0] = internal::array_prod(input_dims); - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool evalSubExprsIfNeeded(typename MakePointer_<CoeffReturnType>::Type data) { - m_impl.evalSubExprsIfNeeded(NULL); - - // Use the FullReducer if possible. - if ((RunningFullReduction && RunningOnSycl) ||(RunningFullReduction && - internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation && - ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || - !RunningOnGPU))) { - bool need_assign = false; - if (!data) { - m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType))); - data = m_result; - need_assign = true; - } - Op reducer(m_reducer); - internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data); - return need_assign; - } - else if(RunningOnSycl){ - const Index num_values_to_reduce = internal::array_prod(m_reducedDims); - const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); - if (!data) { - data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); - m_result = data; - } - Op reducer(m_reducer); - internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve); - return (m_result != NULL); - } - - // Attempt to use an optimized reduction. - else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) { - bool reducing_inner_dims = true; - for (int i = 0; i < NumReducedDims; ++i) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - reducing_inner_dims &= m_reduced[i]; - } else { - reducing_inner_dims &= m_reduced[NumInputDims - 1 - i]; - } - } - if (internal::InnerReducer<Self, Op, Device>::HasOptimizedImplementation && - (reducing_inner_dims || ReducingInnerMostDims)) { - const Index num_values_to_reduce = internal::array_prod(m_reducedDims); - const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); - if (!data) { - if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { - data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); - m_result = data; - } - else { - return true; - } - } - Op reducer(m_reducer); - if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { - if (m_result) { - m_device.deallocate(m_result); - m_result = NULL; - } - return true; - } else { - return (m_result != NULL); - } - } - - bool preserving_inner_dims = true; - for (int i = 0; i < NumReducedDims; ++i) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - preserving_inner_dims &= m_reduced[NumInputDims - 1 - i]; - } else { - preserving_inner_dims &= m_reduced[i]; - } - } - if (internal::OuterReducer<Self, Op, Device>::HasOptimizedImplementation && - preserving_inner_dims) { - const Index num_values_to_reduce = internal::array_prod(m_reducedDims); - const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); - if (!data) { - if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { - data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); - m_result = data; - } - else { - return true; - } - } - Op reducer(m_reducer); - if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { - if (m_result) { - m_device.deallocate(m_result); - m_result = NULL; - } - return true; - } else { - return (m_result != NULL); - } - } - } - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - if (m_result) { - m_device.deallocate(m_result); - m_result = NULL; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - if ((RunningOnSycl || RunningFullReduction || RunningOnGPU) && m_result) { - return *(m_result + index); - } - Op reducer(m_reducer); - if (ReducingInnerMostDims || RunningFullReduction) { - const Index num_values_to_reduce = - (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; - return internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstInput(index), - num_values_to_reduce, reducer); - } else { - typename Self::CoeffReturnType accum = reducer.initialize(); - internal::GenericDimReducer<NumReducedDims-1, Self, Op>::reduce(*this, firstInput(index), reducer, &accum); - return reducer.finalize(accum); - } - } - - // TODO(bsteiner): provide a more efficient implementation. - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions()))); - - if (RunningOnGPU && m_result) { - return internal::pload<PacketReturnType>(m_result + index); - } - - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - if (ReducingInnerMostDims) { - const Index num_values_to_reduce = - (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; - const Index firstIndex = firstInput(index); - for (Index i = 0; i < PacketSize; ++i) { - Op reducer(m_reducer); - values[i] = internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstIndex + i * num_values_to_reduce, - num_values_to_reduce, reducer); - } - } else if (PreservingInnerMostDims) { - const Index firstIndex = firstInput(index); - const int innermost_dim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : NumOutputDims - 1; - // TBD: extend this the the n innermost dimensions that we preserve. - if (((firstIndex % m_dimensions[innermost_dim]) + PacketSize - 1) < m_dimensions[innermost_dim]) { - Op reducer(m_reducer); - typename Self::PacketReturnType accum = reducer.template initializePacket<typename Self::PacketReturnType>(); - internal::InnerMostDimPreserver<NumReducedDims-1, Self, Op>::reduce(*this, firstIndex, reducer, &accum); - return reducer.finalizePacket(accum); - } else { - for (int i = 0; i < PacketSize; ++i) { - values[i] = coeff(index + i); - } - } - } else { - for (int i = 0; i < PacketSize; ++i) { - values[i] = coeff(index + i); - } - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - // Must be called after evalSubExprsIfNeeded(). - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - if (RunningFullReduction && m_result) { - return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); - } else { - const Index num_values_to_reduce = internal::array_prod(m_reducedDims); - const double compute_cost = num_values_to_reduce * internal::functor_traits<Op>::Cost; - return m_impl.costPerCoeff(vectorized) * num_values_to_reduce + - TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); - } - } - - EIGEN_DEVICE_FUNC typename MakePointer_<Scalar>::Type data() const { return m_result; } - /// required by sycl in order to extract the accessor - const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - /// added for sycl in order to construct the buffer from the sycl device - const Device& device() const{return m_device;} - /// added for sycl in order to re-construct the reduction eval on the device for the sub-kernel - const Dims& xprDims() const {return m_xpr_dims;} - - - private: - template <int, typename, typename> friend struct internal::GenericDimReducer; - template <typename, typename, bool> friend struct internal::InnerMostDimReducer; - template <int, typename, typename, bool> friend struct internal::InnerMostDimPreserver; - template <typename S, typename O, typename D, bool V> friend struct internal::FullReducer; -#ifdef EIGEN_USE_THREADS - template <typename S, typename O, bool V> friend struct internal::FullReducerShard; -#endif -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); -#ifdef EIGEN_HAS_CUDA_FP16 - template <typename S, typename R, typename I> friend void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); - template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); - template <int NPT, typename S, typename R, typename I> friend void internal::InnerReductionKernelHalfFloat(R, const S, I, I, half*); -#endif - template <int NPT, typename S, typename R, typename I> friend void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); - - template <int NPT, typename S, typename R, typename I> friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); -#endif - - template <typename S, typename O, typename D> friend struct internal::InnerReducer; - - // Returns the Index in the input tensor of the first value that needs to be - // used to compute the reduction at output index "index". - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { - if (ReducingInnerMostDims) { - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - return index * m_preservedStrides[0]; - } else { - return index * m_preservedStrides[NumPreservedStrides - 1]; - } - } - // TBD: optimize the case where we preserve the innermost dimensions. - Index startInput = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumOutputDims - 1; i > 0; --i) { - // This is index_i in the output tensor. - const Index idx = index / m_outputStrides[i]; - startInput += idx * m_preservedStrides[i]; - index -= idx * m_outputStrides[i]; - } - if (PreservingInnerMostDims) { - eigen_assert(m_preservedStrides[0] == 1); - startInput += index; - } else { - startInput += index * m_preservedStrides[0]; - } - } else { - for (int i = 0; i < NumOutputDims - 1; ++i) { - // This is index_i in the output tensor. - const Index idx = index / m_outputStrides[i]; - startInput += idx * m_preservedStrides[i]; - index -= idx * m_outputStrides[i]; - } - if (PreservingInnerMostDims) { - eigen_assert(m_preservedStrides[NumPreservedStrides - 1] == 1); - startInput += index; - } else { - startInput += index * m_preservedStrides[NumPreservedStrides - 1]; - } - } - return startInput; - } - - // Bitmap indicating if an input dimension is reduced or not. - array<bool, NumInputDims> m_reduced; - // Dimensions of the output of the operation. - Dimensions m_dimensions; - // Precomputed strides for the output tensor. - array<Index, NumOutputDims> m_outputStrides; - // Subset of strides of the input tensor for the non-reduced dimensions. - // Indexed by output dimensions. - static const int NumPreservedStrides = max_n_1<NumOutputDims>::size; - array<Index, NumPreservedStrides> m_preservedStrides; - - // Subset of strides of the input tensor for the reduced dimensions. - // Indexed by reduced dimensions. - array<Index, NumReducedDims> m_reducedStrides; - // Size of the input dimensions that are reduced. - // Indexed by reduced dimensions. - array<Index, NumReducedDims> m_reducedDims; - - // Evaluator for the input expression. - TensorEvaluator<ArgType, Device> m_impl; - - // Operation to apply for computing the reduction. - Op m_reducer; - - // For full reductions -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value; - static const bool RunningOnSycl = false; -#elif defined(EIGEN_USE_SYCL) -static const bool RunningOnSycl = internal::is_same<typename internal::remove_all<Device>::type, Eigen::SyclDevice>::value; -static const bool RunningOnGPU = false; -#else - static const bool RunningOnGPU = false; - static const bool RunningOnSycl = false; -#endif - typename MakePointer_<CoeffReturnType>::Type m_result; - - const Device& m_device; - const Dims& m_xpr_dims; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h deleted file mode 100644 index 65638b6..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ /dev/null @@ -1,750 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H -#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H - -namespace Eigen { -namespace internal { - - -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) -// Full reducers for GPU, don't vectorize for now - -// Reducer function that enables multiple cuda thread to safely accumulate at the same -// output address. It basically reads the current value of the output variable, and -// attempts to update it with the new value. If in the meantime another cuda thread -// updated the content of the output address it will try again. -template <typename T, typename R> -__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { -#if __CUDA_ARCH__ >= 300 - if (sizeof(T) == 4) - { - unsigned int oldval = *reinterpret_cast<unsigned int*>(output); - unsigned int newval = oldval; - reducer.reduce(accum, reinterpret_cast<T*>(&newval)); - if (newval == oldval) { - return; - } - unsigned int readback; - while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { - oldval = readback; - newval = oldval; - reducer.reduce(accum, reinterpret_cast<T*>(&newval)); - if (newval == oldval) { - return; - } - } - } - else if (sizeof(T) == 8) { - unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output); - unsigned long long newval = oldval; - reducer.reduce(accum, reinterpret_cast<T*>(&newval)); - if (newval == oldval) { - return; - } - unsigned long long readback; - while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { - oldval = readback; - newval = oldval; - reducer.reduce(accum, reinterpret_cast<T*>(&newval)); - if (newval == oldval) { - return; - } - } - } - else { - assert(0 && "Wordsize not supported"); - } -#else - assert(0 && "Shouldn't be called on unsupported device"); -#endif -} - -// We extend atomicExch to support extra data types -template <typename Type> -__device__ inline Type atomicExchCustom(Type* address, Type val) { - return atomicExch(address, val); -} - -template <> -__device__ inline double atomicExchCustom(double* address, double val) { - unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address); - return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val))); -} - -#ifdef EIGEN_HAS_CUDA_FP16 -template <template <typename T> class R> -__device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer) { - unsigned int oldval = *reinterpret_cast<unsigned int*>(output); - unsigned int newval = oldval; - reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval)); - if (newval == oldval) { - return; - } - unsigned int readback; - while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { - oldval = readback; - newval = oldval; - reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval)); - if (newval == oldval) { - return; - } - } -} -#endif - -template <> -__device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) { -#if __CUDA_ARCH__ >= 300 - atomicAdd(output, accum); -#else - assert(0 && "Shouldn't be called on unsupported device"); -#endif -} - - -template <typename CoeffType, typename Index> -__global__ void ReductionInitKernel(const CoeffType val, Index num_preserved_coeffs, CoeffType* output) { - const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; - const Index num_threads = blockDim.x * gridDim.x; - for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) { - output[i] = val; - } -} - - -template <int BlockSize, int NumPerThread, typename Self, - typename Reducer, typename Index> -__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, - typename Self::CoeffReturnType* output, unsigned int* semaphore) { -#if __CUDA_ARCH__ >= 300 - // Initialize the output value - const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; - if (gridDim.x == 1) { - if (first_index == 0) { - *output = reducer.initialize(); - } - } - else { - if (threadIdx.x == 0) { - unsigned int block = atomicCAS(semaphore, 0u, 1u); - if (block == 0) { - // We're the first block to run, initialize the output value - atomicExchCustom(output, reducer.initialize()); - __threadfence(); - atomicExch(semaphore, 2u); - } - else { - // Wait for the first block to initialize the output value. - // Use atomicCAS here to ensure that the reads aren't cached - unsigned int val; - do { - val = atomicCAS(semaphore, 2u, 2u); - } - while (val < 2u); - } - } - } - - __syncthreads(); - - eigen_assert(gridDim.x == 1 || *semaphore >= 2u); - - typename Self::CoeffReturnType accum = reducer.initialize(); - Index max_iter = numext::mini<Index>(num_coeffs - first_index, NumPerThread*BlockSize); - for (Index i = 0; i < max_iter; i+=BlockSize) { - const Index index = first_index + i; - eigen_assert(index < num_coeffs); - typename Self::CoeffReturnType val = input.m_impl.coeff(index); - reducer.reduce(val, &accum); - } - -#pragma unroll - for (int offset = warpSize/2; offset > 0; offset /= 2) { - reducer.reduce(__shfl_down(accum, offset, warpSize), &accum); - } - - if ((threadIdx.x & (warpSize - 1)) == 0) { - atomicReduce(output, accum, reducer); - } - - if (gridDim.x > 1 && threadIdx.x == 0) { - // Let the last block reset the semaphore - atomicInc(semaphore, gridDim.x + 1); - } -#else - assert(0 && "Shouldn't be called on unsupported device"); -#endif -} - - -#ifdef EIGEN_HAS_CUDA_FP16 -template <typename Self, - typename Reducer, typename Index> -__global__ void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half2* scratch) { - eigen_assert(blockDim.x == 1); - eigen_assert(gridDim.x == 1); - if (num_coeffs % 2 != 0) { - half last = input.m_impl.coeff(num_coeffs-1); - *scratch = __halves2half2(last, reducer.initialize()); - } else { - *scratch = reducer.template initializePacket<half2>(); - } -} - -template <typename Self, - typename Reducer, typename Index> -__global__ void ReductionInitKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half* output) { - const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; - const Index num_threads = blockDim.x * gridDim.x; - const Index num_packets = num_coeffs / 2; - for (Index i = thread_id; i < num_packets; i += num_threads) { - ((half2*)output)[i] = reducer.template initializePacket<half2>(); - } - - if (thread_id == 0 && num_coeffs % 2 != 0) { - output[num_coeffs-1] = reducer.initialize(); - } -} - -template <int BlockSize, int NumPerThread, typename Self, - typename Reducer, typename Index> -__global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, - half* output, half2* scratch) { - eigen_assert(NumPerThread % 2 == 0); - - const Index first_index = blockIdx.x * BlockSize * NumPerThread + 2*threadIdx.x; - - // Initialize the output value if it wasn't initialized by the ReductionInitKernel - if (gridDim.x == 1 && first_index == 0) { - if (num_coeffs % 2 != 0) { - half last = input.m_impl.coeff(num_coeffs-1); - *scratch = __halves2half2(last, reducer.initialize()); - } else { - *scratch = reducer.template initializePacket<half2>(); - } - __syncthreads(); - } - - half2 accum = reducer.template initializePacket<half2>(); - const Index max_iter = numext::mini<Index>((num_coeffs - first_index) / 2, NumPerThread*BlockSize / 2); - for (Index i = 0; i < max_iter; i += BlockSize) { - const Index index = first_index + 2*i; - eigen_assert(index + 1 < num_coeffs); - half2 val = input.m_impl.template packet<Unaligned>(index); - reducer.reducePacket(val, &accum); - } - -#pragma unroll - for (int offset = warpSize/2; offset > 0; offset /= 2) { - reducer.reducePacket(__shfl_down(accum, offset, warpSize), &accum); - } - - if ((threadIdx.x & (warpSize - 1)) == 0) { - atomicReduce(scratch, accum, reducer); - } - - __syncthreads(); - - if (gridDim.x == 1 && first_index == 0) { - half tmp = __low2half(*scratch); - reducer.reduce(__high2half(*scratch), &tmp); - *output = tmp; - } -} - -template <typename Op> -__global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) { - eigen_assert(threadIdx.x == 1); - half tmp = __low2half(*scratch); - reducer.reduce(__high2half(*scratch), &tmp); - *output = tmp; -} - -#endif - -template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void> -struct FullReductionLauncher { - static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) { - assert(false && "Should only be called on doubles, floats and half floats"); - } -}; - -// Specialization for float and double -template <typename Self, typename Op, typename OutputType, bool PacketAccess> -struct FullReductionLauncher< - Self, Op, OutputType, PacketAccess, - typename internal::enable_if< - internal::is_same<float, OutputType>::value || - internal::is_same<double, OutputType>::value, - void>::type> { - static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) { - typedef typename Self::Index Index; - typedef typename Self::CoeffReturnType Scalar; - const int block_size = 256; - const int num_per_thread = 128; - const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread); - - unsigned int* semaphore = NULL; - if (num_blocks > 1) { - semaphore = device.semaphore(); - } - - LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>), - num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore); - } -}; - -#ifdef EIGEN_HAS_CUDA_FP16 -template <typename Self, typename Op> -struct FullReductionLauncher<Self, Op, Eigen::half, false> { - static void run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index) { - assert(false && "Should not be called since there is no packet accessor"); - } -}; - -template <typename Self, typename Op> -struct FullReductionLauncher<Self, Op, Eigen::half, true> { - static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) { - typedef typename Self::Index Index; - - const int block_size = 256; - const int num_per_thread = 128; - const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread); - half2* scratch = static_cast<half2*>(device.scratchpad()); - - if (num_blocks > 1) { - // We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there - // won't be a race conditions between multiple thread blocks. - LAUNCH_CUDA_KERNEL((ReductionInitFullReduxKernelHalfFloat<Self, Op, Index>), - 1, 1, 0, device, reducer, self, num_coeffs, scratch); - } - - LAUNCH_CUDA_KERNEL((FullReductionKernelHalfFloat<block_size, num_per_thread, Self, Op, Index>), - num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, scratch); - - if (num_blocks > 1) { - LAUNCH_CUDA_KERNEL((ReductionCleanupKernelHalfFloat<Op>), - 1, 1, 0, device, reducer, output, scratch); - } - } -}; -#endif - - -template <typename Self, typename Op, bool Vectorizable> -struct FullReducer<Self, Op, GpuDevice, Vectorizable> { - // Unfortunately nvidia doesn't support well exotic types such as complex, - // so reduce the scope of the optimized version of the code to the simple cases - // of doubles, floats and half floats -#ifdef EIGEN_HAS_CUDA_FP16 - static const bool HasOptimizedImplementation = !Op::IsStateful && - (internal::is_same<typename Self::CoeffReturnType, float>::value || - internal::is_same<typename Self::CoeffReturnType, double>::value || - (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); -#else - static const bool HasOptimizedImplementation = !Op::IsStateful && - (internal::is_same<typename Self::CoeffReturnType, float>::value || - internal::is_same<typename Self::CoeffReturnType, double>::value); -#endif - - template <typename OutputType> - static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { - assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats"); - const Index num_coeffs = array_prod(self.m_impl.dimensions()); - // Don't crash when we're called with an input tensor of size 0. - if (num_coeffs == 0) { - return; - } - - FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs); - } -}; - - -template <int NumPerThread, typename Self, - typename Reducer, typename Index> -__global__ void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs, - typename Self::CoeffReturnType* output) { -#if __CUDA_ARCH__ >= 300 - typedef typename Self::CoeffReturnType Type; - eigen_assert(blockDim.y == 1); - eigen_assert(blockDim.z == 1); - eigen_assert(gridDim.y == 1); - eigen_assert(gridDim.z == 1); - - const int unroll_times = 16; - eigen_assert(NumPerThread % unroll_times == 0); - - const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread); - const Index num_input_blocks = input_col_blocks * num_preserved_coeffs; - - const Index num_threads = blockDim.x * gridDim.x; - const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; - - // Initialize the output values if they weren't initialized by the ReductionInitKernel - if (gridDim.x == 1) { - for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) { - output[i] = reducer.initialize(); - } - __syncthreads(); - } - - for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) { - const Index row = i / input_col_blocks; - - if (row < num_preserved_coeffs) { - const Index col_block = i % input_col_blocks; - const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x; - - Type reduced_val = reducer.initialize(); - - for (Index j = 0; j < NumPerThread; j += unroll_times) { - const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1); - if (last_col >= num_coeffs_to_reduce) { - for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) { - const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col); - reducer.reduce(val, &reduced_val); - } - break; - } else { - // Faster version of the loop with no branches after unrolling. -#pragma unroll - for (int k = 0; k < unroll_times; ++k) { - const Index col = col_begin + blockDim.x * (j + k); - reducer.reduce(input.m_impl.coeff(row * num_coeffs_to_reduce + col), &reduced_val); - } - } - } - -#pragma unroll - for (int offset = warpSize/2; offset > 0; offset /= 2) { - reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val); - } - - if ((threadIdx.x & (warpSize - 1)) == 0) { - atomicReduce(&(output[row]), reduced_val, reducer); - } - } - } -#else - assert(0 && "Shouldn't be called on unsupported device"); -#endif -} - -#ifdef EIGEN_HAS_CUDA_FP16 - -template <int NumPerThread, typename Self, - typename Reducer, typename Index> -__global__ void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs, - half* output) { - eigen_assert(blockDim.y == 1); - eigen_assert(blockDim.z == 1); - eigen_assert(gridDim.y == 1); - eigen_assert(gridDim.z == 1); - - const int unroll_times = 16; - eigen_assert(NumPerThread % unroll_times == 0); - eigen_assert(unroll_times % 2 == 0); - - const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2); - const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2); - - const Index num_threads = blockDim.x * gridDim.x; - const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; - - // Initialize the output values if they weren't initialized by the ReductionInitKernel - if (gridDim.x == 1) { - Index i = 2*thread_id; - for (; i + 1 < num_preserved_coeffs; i += 2*num_threads) { - half* loc = output + i; - *((half2*)loc) = reducer.template initializePacket<half2>(); - } - if (i < num_preserved_coeffs) { - output[i] = reducer.initialize(); - } - __syncthreads(); - } - - for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) { - const Index row = 2 * (i / input_col_blocks); - - if (row + 1 < num_preserved_coeffs) { - const Index col_block = i % input_col_blocks; - const Index col_begin = 2 * (col_block * blockDim.x * NumPerThread + threadIdx.x); - - half2 reduced_val1 = reducer.template initializePacket<half2>(); - half2 reduced_val2 = reducer.template initializePacket<half2>(); - - for (Index j = 0; j < NumPerThread; j += unroll_times) { - const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1) * 2; - if (last_col >= num_coeffs_to_reduce) { - Index col = col_begin + blockDim.x * j; - for (; col + 1 < num_coeffs_to_reduce; col += blockDim.x) { - const half2 val1 = input.m_impl.template packet<Unaligned>(row * num_coeffs_to_reduce + col); - reducer.reducePacket(val1, &reduced_val1); - const half2 val2 = input.m_impl.template packet<Unaligned>((row+1) * num_coeffs_to_reduce + col); - reducer.reducePacket(val2, &reduced_val2); - } - if (col < num_coeffs_to_reduce) { - // Peel; - const half last1 = input.m_impl.coeff(row * num_coeffs_to_reduce + col); - const half2 val1 = __halves2half2(last1, reducer.initialize()); - reducer.reducePacket(val1, &reduced_val1); - const half last2 = input.m_impl.coeff((row+1) * num_coeffs_to_reduce + col); - const half2 val2 = __halves2half2(last2, reducer.initialize()); - reducer.reducePacket(val2, &reduced_val2); - } - break; - } else { - // Faster version of the loop with no branches after unrolling. -#pragma unroll - for (int k = 0; k < unroll_times; ++k) { - const Index col = col_begin + blockDim.x * (j + k) * 2; - reducer.reducePacket(input.m_impl.template packet<Unaligned>(row * num_coeffs_to_reduce + col), &reduced_val1); - reducer.reducePacket(input.m_impl.template packet<Unaligned>((row + 1)* num_coeffs_to_reduce + col), &reduced_val2); - } - } - } - -#pragma unroll - for (int offset = warpSize/2; offset > 0; offset /= 2) { - reducer.reducePacket(__shfl_down(reduced_val1, offset, warpSize), &reduced_val1); - reducer.reducePacket(__shfl_down(reduced_val2, offset, warpSize), &reduced_val2); - } - - half val1 = __low2half(reduced_val1); - reducer.reduce(__high2half(reduced_val1), &val1); - half val2 = __low2half(reduced_val2); - reducer.reduce(__high2half(reduced_val2), &val2); - half2 val = __halves2half2(val1, val2); - - if ((threadIdx.x & (warpSize - 1)) == 0) { - half* loc = output + row; - atomicReduce((half2*)loc, val, reducer); - } - } - } -} - -#endif - -template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void> -struct InnerReductionLauncher { - static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) { - assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device"); - return true; - } -}; - -// Specialization for float and double -template <typename Self, typename Op, typename OutputType, bool PacketAccess> -struct InnerReductionLauncher< - Self, Op, OutputType, PacketAccess, - typename internal::enable_if< - internal::is_same<float, OutputType>::value || - internal::is_same<double, OutputType>::value, - void>::type> { - static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { - typedef typename Self::Index Index; - - const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals; - const int block_size = 256; - const int num_per_thread = 128; - const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread); - const int max_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / block_size; - const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); - - if (num_blocks > 1) { - // We initialize the outputs outside the reduction kernel when we can't be sure that there - // won't be a race conditions between multiple thread blocks. - const int dyn_blocks = divup<int>(num_preserved_vals, 1024); - const int max_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / 1024; - const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); - LAUNCH_CUDA_KERNEL((ReductionInitKernel<OutputType, Index>), - num_blocks, 1024, 0, device, reducer.initialize(), - num_preserved_vals, output); - } - - LAUNCH_CUDA_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>), - num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); - - return false; - } -}; - -#ifdef EIGEN_HAS_CUDA_FP16 -template <typename Self, typename Op> -struct InnerReductionLauncher<Self, Op, Eigen::half, false> { - static bool run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index, typename Self::Index) { - assert(false && "Should not be called since there is no packet accessor"); - return true; - } -}; - -template <typename Self, typename Op> -struct InnerReductionLauncher<Self, Op, Eigen::half, true> { - static bool run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { - typedef typename Self::Index Index; - - if (num_preserved_vals % 2 != 0) { - // Not supported yet, revert to the slower code path - return true; - } - - const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals; - const int block_size = /*256*/128; - const int num_per_thread = /*128*/64; - const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread); - const int max_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / block_size; - const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); - - if (num_blocks > 1) { - // We initialize the outputs outside the reduction kernel when we can't be sure that there - // won't be a race conditions between multiple thread blocks. - const int dyn_blocks = divup<int>(num_preserved_vals, 1024); - const int max_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / 1024; - const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); - LAUNCH_CUDA_KERNEL((ReductionInitKernelHalfFloat<Self, Op, Index>), - 1, 1, 0, device, reducer, self, num_preserved_vals, output); - } - - LAUNCH_CUDA_KERNEL((InnerReductionKernelHalfFloat<num_per_thread, Self, Op, Index>), - num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); - - return false; - } -}; -#endif - - -template <typename Self, typename Op> -struct InnerReducer<Self, Op, GpuDevice> { - // Unfortunately nvidia doesn't support well exotic types such as complex, - // so reduce the scope of the optimized version of the code to the simple case - // of floats and half floats. -#ifdef EIGEN_HAS_CUDA_FP16 - static const bool HasOptimizedImplementation = !Op::IsStateful && - (internal::is_same<typename Self::CoeffReturnType, float>::value || - internal::is_same<typename Self::CoeffReturnType, double>::value || - (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); -#else - static const bool HasOptimizedImplementation = !Op::IsStateful && - (internal::is_same<typename Self::CoeffReturnType, float>::value || - internal::is_same<typename Self::CoeffReturnType, double>::value); -#endif - - template <typename OutputType> - static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { - assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats"); - const Index num_coeffs = array_prod(self.m_impl.dimensions()); - // Don't crash when we're called with an input tensor of size 0. - if (num_coeffs == 0) { - return true; - } - // It's faster to use the usual code. - if (num_coeffs_to_reduce <= 128) { - return true; - } - - return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); - } -}; - -template <int NumPerThread, typename Self, - typename Reducer, typename Index> -__global__ void OuterReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs, - typename Self::CoeffReturnType* output) { - const Index num_threads = blockDim.x * gridDim.x; - const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x; - // Initialize the output values if they weren't initialized by the ReductionInitKernel - if (gridDim.x == 1) { - for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) { - output[i] = reducer.initialize(); - } - __syncthreads(); - } - - // Do the reduction. - const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread); - for (Index i = thread_id; i < max_iter; i += num_threads) { - const Index input_col = i % num_preserved_coeffs; - const Index input_row = (i / num_preserved_coeffs) * NumPerThread; - typename Self::CoeffReturnType reduced_val = reducer.initialize(); - const Index max_row = numext::mini(input_row + NumPerThread, num_coeffs_to_reduce); - for (Index j = input_row; j < max_row; j++) { - typename Self::CoeffReturnType val = input.m_impl.coeff(j * num_preserved_coeffs + input_col); - reducer.reduce(val, &reduced_val); - } - atomicReduce(&(output[input_col]), reduced_val, reducer); - } -} - - -template <typename Self, typename Op> -struct OuterReducer<Self, Op, GpuDevice> { - // Unfortunately nvidia doesn't support well exotic types such as complex, - // so reduce the scope of the optimized version of the code to the simple case - // of floats. - static const bool HasOptimizedImplementation = !Op::IsStateful && - (internal::is_same<typename Self::CoeffReturnType, float>::value || - internal::is_same<typename Self::CoeffReturnType, double>::value); - template <typename Device, typename OutputType> - static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) { - assert(false && "Should only be called to reduce doubles or floats on a gpu device"); - return true; - } - - static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { - typedef typename Self::Index Index; - - // It's faster to use the usual code. - if (num_coeffs_to_reduce <= 32) { - return true; - } - - const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals; - const int block_size = 256; - const int num_per_thread = 16; - const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread); - const int max_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / block_size; - const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); - - if (num_blocks > 1) { - // We initialize the outputs in the reduction kernel itself when we don't have to worry - // about race conditions between multiple thread blocks. - const int dyn_blocks = divup<int>(num_preserved_vals, 1024); - const int max_blocks = device.getNumCudaMultiProcessors() * - device.maxCudaThreadsPerMultiProcessor() / 1024; - const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); - LAUNCH_CUDA_KERNEL((ReductionInitKernel<float, Index>), - num_blocks, 1024, 0, device, reducer.initialize(), - num_preserved_vals, output); - } - - LAUNCH_CUDA_KERNEL((OuterReductionKernel<num_per_thread, Self, Op, Index>), - num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output); - - return false; - } -}; - -#endif - - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h deleted file mode 100644 index 3daecb0..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h +++ /dev/null @@ -1,242 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/***************************************************************** - * TensorSyclPlaceHolderExpr.h - * - * \brief: - * This is the specialisation of the placeholder expression based on the - * operation type - * -*****************************************************************/ - -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP - -namespace Eigen { -namespace internal { - -template<typename CoeffReturnType, typename KernelName> struct syclGenericBufferReducer{ -template<typename BufferTOut, typename BufferTIn> -static void run(BufferTOut* bufOut, BufferTIn& bufI, const Eigen::SyclDevice& dev, size_t length, size_t local){ - do { - auto f = [length, local, bufOut, &bufI](cl::sycl::handler& h) mutable { - cl::sycl::nd_range<1> r{cl::sycl::range<1>{std::max(length, local)}, - cl::sycl::range<1>{std::min(length, local)}}; - /* Two accessors are used: one to the buffer that is being reduced, - * and a second to local memory, used to store intermediate data. */ - auto aI = - bufI.template get_access<cl::sycl::access::mode::read_write>(h); - auto aOut = - bufOut->template get_access<cl::sycl::access::mode::discard_write>(h); - cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, - cl::sycl::access::target::local> - scratch(cl::sycl::range<1>(local), h); - - /* The parallel_for invocation chosen is the variant with an nd_item - * parameter, since the code requires barriers for correctness. */ - h.parallel_for<KernelName>( - r, [aOut, aI, scratch, local, length](cl::sycl::nd_item<1> id) { - size_t globalid = id.get_global(0); - size_t localid = id.get_local(0); - /* All threads collectively read from global memory into local. - * The barrier ensures all threads' IO is resolved before - * execution continues (strictly speaking, all threads within - * a single work-group - there is no co-ordination between - * work-groups, only work-items). */ - if (globalid < length) { - scratch[localid] = aI[globalid]; - } - id.barrier(cl::sycl::access::fence_space::local_space); - - /* Apply the reduction operation between the current local - * id and the one on the other half of the vector. */ - if (globalid < length) { - int min = (length < local) ? length : local; - for (size_t offset = min / 2; offset > 0; offset /= 2) { - if (localid < offset) { - scratch[localid] += scratch[localid + offset]; - } - id.barrier(cl::sycl::access::fence_space::local_space); - } - /* The final result will be stored in local id 0. */ - if (localid == 0) { - aI[id.get_group(0)] = scratch[localid]; - if((length<=local) && globalid ==0){ - aOut[globalid]=scratch[localid]; - } - } - } - }); - }; - dev.m_queue.submit(f); - dev.m_queue.throw_asynchronous(); - - /* At this point, you could queue::wait_and_throw() to ensure that - * errors are caught quickly. However, this would likely impact - * performance negatively. */ - length = length / local; - - } while (length > 1); - - - -} - -}; - -/// For now let's start with a full reducer -/// Self is useless here because in expression construction we are going to treat reduction as a leafnode. -/// we want to take reduction child and then build a construction and apply the full reducer function on it. Fullreducre applies the -/// reduction operation on the child of the reduction. once it is done the reduction is an empty shell and can be thrown away and treated as -// a leafNode. -template <typename Self, typename Op, bool Vectorizable> -struct FullReducer<Self, Op, const Eigen::SyclDevice, Vectorizable> { - - typedef typename Self::CoeffReturnType CoeffReturnType; - static const bool HasOptimizedImplementation = false; - - static void run(const Self& self, Op& reducer, const Eigen::SyclDevice& dev, CoeffReturnType* output) { - typedef const typename Self::ChildType HostExpr; /// this is the child of reduction - typedef typename TensorSycl::internal::createPlaceHolderExpression<HostExpr>::Type PlaceHolderExpr; - auto functors = TensorSycl::internal::extractFunctors(self.impl()); - int red_factor =256; /// initial reduction. If the size is less than red_factor we only creates one thread. - size_t inputSize =self.impl().dimensions().TotalSize(); - size_t rng = inputSize/red_factor; // the total number of thread initially is half the size of the input - size_t remaining = inputSize% red_factor; - if(rng ==0) { - red_factor=1; - }; - size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2; - size_t GRange=std::max((size_t )1, rng); - - // convert global range to power of 2 for redecution - GRange--; - GRange |= GRange >> 1; - GRange |= GRange >> 2; - GRange |= GRange >> 4; - GRange |= GRange >> 8; - GRange |= GRange >> 16; -#if __x86_64__ || __ppc64__ || _WIN64 - GRange |= GRange >> 32; -#endif - GRange++; - size_t outTileSize = tileSize; - /// if the shared memory is less than the GRange, we set shared_mem size to the TotalSize and in this case one kernel would be created for recursion to reduce all to one. - if (GRange < outTileSize) outTileSize=GRange; - // getting final out buffer at the moment the created buffer is true because there is no need for assign - auto out_buffer =dev.template get_sycl_buffer<typename Eigen::internal::remove_all<CoeffReturnType>::type>(self.dimensions().TotalSize(), output); - /// creating the shared memory for calculating reduction. - /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can - /// recursively apply reduction on it in order to reduce the whole. - auto temp_global_buffer =cl::sycl::buffer<CoeffReturnType, 1>(cl::sycl::range<1>(GRange)); - typedef typename Eigen::internal::remove_all<decltype(self.xprDims())>::type Dims; - Dims dims= self.xprDims(); - Op functor = reducer; - dev.m_queue.submit([&](cl::sycl::handler &cgh) { - // create a tuple of accessors from Evaluator - auto tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl()); - auto tmp_global_accessor = temp_global_buffer. template get_access<cl::sycl::access::mode::read_write, cl::sycl::access::target::global_buffer>(cgh); - - cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(outTileSize)), [=](cl::sycl::nd_item<1> itemID) { - typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr; - auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); - /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour - /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the - /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. - const auto device_self_expr= TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor); - /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is - /// the device_evaluator is detectable and recognisable on the device. - auto device_self_evaluator = Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice>(device_self_expr, Eigen::DefaultDevice()); - /// const cast added as a naive solution to solve the qualifier drop error - auto globalid=itemID.get_global_linear_id(); - - if(globalid<rng) - tmp_global_accessor.get_pointer()[globalid]=InnerMostDimReducer<decltype(device_self_evaluator), Op, false>::reduce(device_self_evaluator, red_factor*globalid, red_factor, const_cast<Op&>(functor)); - else - tmp_global_accessor.get_pointer()[globalid]=static_cast<CoeffReturnType>(0); - - if(remaining!=0 && globalid==0 ) - // this will add the rest of input buffer when the input size is not devidable to red_factor. - tmp_global_accessor.get_pointer()[globalid]+=InnerMostDimReducer<decltype(device_self_evaluator), Op, false>::reduce(device_self_evaluator, red_factor*(rng), remaining, const_cast<Op&>(functor)); - }); - }); - dev.m_queue.throw_asynchronous(); - -/// This is used to recursively reduce the tmp value to an element of 1; - syclGenericBufferReducer<CoeffReturnType,HostExpr>::run(out_buffer, temp_global_buffer,dev, GRange, outTileSize); - } - -}; - -template <typename Self, typename Op> -struct InnerReducer<Self, Op, const Eigen::SyclDevice> { - - typedef typename Self::CoeffReturnType CoeffReturnType; - static const bool HasOptimizedImplementation = false; - - static bool run(const Self& self, Op& reducer, const Eigen::SyclDevice& dev, CoeffReturnType* output, typename Self::Index , typename Self::Index num_coeffs_to_preserve) { - typedef const typename Self::ChildType HostExpr; /// this is the child of reduction - typedef typename TensorSycl::internal::createPlaceHolderExpression<HostExpr>::Type PlaceHolderExpr; - auto functors = TensorSycl::internal::extractFunctors(self.impl()); - - size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2; - - size_t GRange=num_coeffs_to_preserve; - if (tileSize>GRange) tileSize=GRange; - else if(GRange>tileSize){ - size_t xMode = GRange % tileSize; - if (xMode != 0) GRange += (tileSize - xMode); - } - // getting final out buffer at the moment the created buffer is true because there is no need for assign - /// creating the shared memory for calculating reduction. - /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can - /// recursively apply reduction on it in order to reduce the whole. - typedef typename Eigen::internal::remove_all<decltype(self.xprDims())>::type Dims; - Dims dims= self.xprDims(); - Op functor = reducer; - - dev.m_queue.submit([&](cl::sycl::handler &cgh) { - // create a tuple of accessors from Evaluator - auto tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl()); - auto output_accessor = dev.template get_sycl_accessor<cl::sycl::access::mode::discard_write>(num_coeffs_to_preserve,cgh, output); - - cgh.parallel_for<Self>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) { - typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr; - auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); - /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour - /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the - /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. - const auto device_self_expr= TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor); - /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is - /// the device_evaluator is detectable and recognisable on the device. - typedef Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice> DeiceSelf; - auto device_self_evaluator = Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice>(device_self_expr, Eigen::DefaultDevice()); - /// const cast added as a naive solution to solve the qualifier drop error - auto globalid=itemID.get_global_linear_id(); - if (globalid< static_cast<size_t>(num_coeffs_to_preserve)) { - typename DeiceSelf::CoeffReturnType accum = functor.initialize(); - GenericDimReducer<DeiceSelf::NumReducedDims-1, DeiceSelf, Op>::reduce(device_self_evaluator, device_self_evaluator.firstInput(globalid),const_cast<Op&>(functor), &accum); - functor.finalize(accum); - output_accessor.get_pointer()[globalid]= accum; - } - }); - }); - dev.m_queue.throw_asynchronous(); - return false; - } -}; - -} // end namespace internal -} // namespace Eigen - -#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h deleted file mode 100644 index 99245f7..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h +++ /dev/null @@ -1,429 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_REF_H -#define EIGEN_CXX11_TENSOR_TENSOR_REF_H - -namespace Eigen { - -namespace internal { - -template <typename Dimensions, typename Scalar> -class TensorLazyBaseEvaluator { - public: - TensorLazyBaseEvaluator() : m_refcount(0) { } - virtual ~TensorLazyBaseEvaluator() { } - - EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const = 0; - EIGEN_DEVICE_FUNC virtual const Scalar* data() const = 0; - - EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const = 0; - EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) = 0; - - void incrRefCount() { ++m_refcount; } - void decrRefCount() { --m_refcount; } - int refCount() const { return m_refcount; } - - private: - // No copy, no assigment; - TensorLazyBaseEvaluator(const TensorLazyBaseEvaluator& other); - TensorLazyBaseEvaluator& operator = (const TensorLazyBaseEvaluator& other); - - int m_refcount; -}; - - -template <typename Dimensions, typename Expr, typename Device> -class TensorLazyEvaluatorReadOnly : public TensorLazyBaseEvaluator<Dimensions, typename TensorEvaluator<Expr, Device>::Scalar> { - public: - // typedef typename TensorEvaluator<Expr, Device>::Dimensions Dimensions; - typedef typename TensorEvaluator<Expr, Device>::Scalar Scalar; - - TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) { - m_dims = m_impl.dimensions(); - m_impl.evalSubExprsIfNeeded(NULL); - } - virtual ~TensorLazyEvaluatorReadOnly() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const { - return m_dims; - } - EIGEN_DEVICE_FUNC virtual const Scalar* data() const { - return m_impl.data(); - } - - EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const { - return m_impl.coeff(index); - } - EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex /*index*/) { - eigen_assert(false && "can't reference the coefficient of a rvalue"); - return m_dummy; - }; - - protected: - TensorEvaluator<Expr, Device> m_impl; - Dimensions m_dims; - Scalar m_dummy; -}; - -template <typename Dimensions, typename Expr, typename Device> -class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> { - public: - typedef TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> Base; - typedef typename Base::Scalar Scalar; - - TensorLazyEvaluatorWritable(const Expr& expr, const Device& device) : Base(expr, device) { - } - virtual ~TensorLazyEvaluatorWritable() { - } - - EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) { - return this->m_impl.coeffRef(index); - } -}; - -template <typename Dimensions, typename Expr, typename Device> -class TensorLazyEvaluator : public internal::conditional<bool(internal::is_lvalue<Expr>::value), - TensorLazyEvaluatorWritable<Dimensions, Expr, Device>, - TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type { - public: - typedef typename internal::conditional<bool(internal::is_lvalue<Expr>::value), - TensorLazyEvaluatorWritable<Dimensions, Expr, Device>, - TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type Base; - typedef typename Base::Scalar Scalar; - - TensorLazyEvaluator(const Expr& expr, const Device& device) : Base(expr, device) { - } - virtual ~TensorLazyEvaluator() { - } -}; - -} // namespace internal - - -/** \class TensorRef - * \ingroup CXX11_Tensor_Module - * - * \brief A reference to a tensor expression - * The expression will be evaluated lazily (as much as possible). - * - */ -template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef<PlainObjectType> > -{ - public: - typedef TensorRef<PlainObjectType> Self; - typedef typename PlainObjectType::Base Base; - typedef typename Eigen::internal::nested<Self>::type Nested; - typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind; - typedef typename internal::traits<PlainObjectType>::Index Index; - typedef typename internal::traits<PlainObjectType>::Scalar Scalar; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename Base::CoeffReturnType CoeffReturnType; - typedef Scalar* PointerType; - typedef PointerType PointerArgType; - - static const Index NumIndices = PlainObjectType::NumIndices; - typedef typename PlainObjectType::Dimensions Dimensions; - - enum { - IsAligned = false, - PacketAccess = false, - Layout = PlainObjectType::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_STRONG_INLINE TensorRef() : m_evaluator(NULL) { - } - - template <typename Expression> - EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : m_evaluator(new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice())) { - m_evaluator->incrRefCount(); - } - - template <typename Expression> - EIGEN_STRONG_INLINE TensorRef& operator = (const Expression& expr) { - unrefEvaluator(); - m_evaluator = new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice()); - m_evaluator->incrRefCount(); - return *this; - } - - ~TensorRef() { - unrefEvaluator(); - } - - TensorRef(const TensorRef& other) : m_evaluator(other.m_evaluator) { - eigen_assert(m_evaluator->refCount() > 0); - m_evaluator->incrRefCount(); - } - - TensorRef& operator = (const TensorRef& other) { - if (this != &other) { - unrefEvaluator(); - m_evaluator = other.m_evaluator; - eigen_assert(m_evaluator->refCount() > 0); - m_evaluator->incrRefCount(); - } - return *this; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index rank() const { return m_evaluator->dimensions().size(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_evaluator->dimensions()[n]; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_evaluator->dimensions(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Index size() const { return m_evaluator->dimensions().TotalSize(); } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar* data() const { return m_evaluator->data(); } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index index) const - { - return m_evaluator->coeff(index); - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index firstIndex, IndexTypes... otherIndices) const - { - const std::size_t num_indices = (sizeof...(otherIndices) + 1); - const array<Index, num_indices> indices{{firstIndex, otherIndices...}}; - return coeff(indices); - } - template<typename... IndexTypes> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) - { - const std::size_t num_indices = (sizeof...(otherIndices) + 1); - const array<Index, num_indices> indices{{firstIndex, otherIndices...}}; - return coeffRef(indices); - } -#else - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1) const - { - array<Index, 2> indices; - indices[0] = i0; - indices[1] = i1; - return coeff(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2) const - { - array<Index, 3> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - return coeff(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3) const - { - array<Index, 4> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - indices[3] = i3; - return coeff(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const - { - array<Index, 5> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - indices[3] = i3; - indices[4] = i4; - return coeff(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1) - { - array<Index, 2> indices; - indices[0] = i0; - indices[1] = i1; - return coeffRef(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2) - { - array<Index, 3> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - return coeffRef(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) - { - array<Index, 4> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - indices[3] = i3; - return coeffRef(indices); - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2, Index i3, Index i4) - { - array<Index, 5> indices; - indices[0] = i0; - indices[1] = i1; - indices[2] = i2; - indices[3] = i3; - indices[4] = i4; - return coeffRef(indices); - } -#endif - - template <std::size_t NumIndices> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const - { - const Dimensions& dims = this->dimensions(); - Index index = 0; - if (PlainObjectType::Options & RowMajor) { - index += indices[0]; - for (size_t i = 1; i < NumIndices; ++i) { - index = index * dims[i] + indices[i]; - } - } else { - index += indices[NumIndices-1]; - for (int i = NumIndices-2; i >= 0; --i) { - index = index * dims[i] + indices[i]; - } - } - return m_evaluator->coeff(index); - } - template <std::size_t NumIndices> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices) - { - const Dimensions& dims = this->dimensions(); - Index index = 0; - if (PlainObjectType::Options & RowMajor) { - index += indices[0]; - for (size_t i = 1; i < NumIndices; ++i) { - index = index * dims[i] + indices[i]; - } - } else { - index += indices[NumIndices-1]; - for (int i = NumIndices-2; i >= 0; --i) { - index = index * dims[i] + indices[i]; - } - } - return m_evaluator->coeffRef(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return m_evaluator->coeff(index); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return m_evaluator->coeffRef(index); - } - - private: - EIGEN_STRONG_INLINE void unrefEvaluator() { - if (m_evaluator) { - m_evaluator->decrRefCount(); - if (m_evaluator->refCount() == 0) { - delete m_evaluator; - } - } - } - - internal::TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator; -}; - - -// evaluator for rvalues -template<typename Derived, typename Device> -struct TensorEvaluator<const TensorRef<Derived>, Device> -{ - typedef typename Derived::Index Index; - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename Derived::Dimensions Dimensions; - - enum { - IsAligned = false, - PacketAccess = false, - Layout = TensorRef<Derived>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const TensorRef<Derived>& m, const Device&) - : m_ref(m) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_ref.coeff(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_ref.coeffRef(index); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return m_ref.data(); } - - protected: - TensorRef<Derived> m_ref; -}; - - -// evaluator for lvalues -template<typename Derived, typename Device> -struct TensorEvaluator<TensorRef<Derived>, Device> : public TensorEvaluator<const TensorRef<Derived>, Device> -{ - typedef typename Derived::Index Index; - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Scalar CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef typename Derived::Dimensions Dimensions; - - typedef TensorEvaluator<const TensorRef<Derived>, Device> Base; - - enum { - IsAligned = false, - PacketAccess = false, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(TensorRef<Derived>& m, const Device& d) : Base(m, d) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return this->m_ref.coeffRef(index); - } -}; - - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_REF_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h deleted file mode 100644 index 14e392e..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ /dev/null @@ -1,288 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com> -// Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H -#define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H -namespace Eigen { - -/** \class TensorReverse - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor reverse elements class. - * - */ -namespace internal { -template<typename ReverseDimensions, typename XprType> -struct traits<TensorReverseOp<ReverseDimensions, - XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename ReverseDimensions, typename XprType> -struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense> -{ - typedef const TensorReverseOp<ReverseDimensions, XprType>& type; -}; - -template<typename ReverseDimensions, typename XprType> -struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1, - typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type> -{ - typedef TensorReverseOp<ReverseDimensions, XprType> type; -}; - -} // end namespace internal - -template<typename ReverseDimensions, typename XprType> -class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions, - XprType>, WriteAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested; - typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind - StorageKind; - typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp( - const XprType& expr, const ReverseDimensions& reverse_dims) - : m_xpr(expr), m_reverse_dims(reverse_dims) { } - - EIGEN_DEVICE_FUNC - const ReverseDimensions& reverse() const { return m_reverse_dims; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorReverseOp& operator = (const TensorReverseOp& other) - { - typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorReverseOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const ReverseDimensions m_reverse_dims; -}; - -// Eval as rvalue -template<typename ReverseDimensions, typename ArgType, typename Device> -struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device> -{ - typedef TensorReverseOp<ReverseDimensions, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<ReverseDimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, - const Device& device) - : m_impl(op.expression(), device), m_reverse(op.reverse()) - { - // Reversing a scalar isn't supported yet. It would be a no-op anyway. - EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - - // Compute strides - m_dimensions = m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_strides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_strides[i] = m_strides[i-1] * m_dimensions[i-1]; - } - } else { - m_strides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_strides[i] = m_strides[i+1] * m_dimensions[i+1]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex( - Index index) const { - eigen_assert(index < dimensions().TotalSize()); - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - Index idx = index / m_strides[i]; - index -= idx * m_strides[i]; - if (m_reverse[i]) { - idx = m_dimensions[i] - idx - 1; - } - inputIndex += idx * m_strides[i] ; - } - if (m_reverse[0]) { - inputIndex += (m_dimensions[0] - index - 1); - } else { - inputIndex += index; - } - } else { - for (int i = 0; i < NumDims - 1; ++i) { - Index idx = index / m_strides[i]; - index -= idx * m_strides[i]; - if (m_reverse[i]) { - idx = m_dimensions[i] - idx - 1; - } - inputIndex += idx * m_strides[i] ; - } - if (m_reverse[NumDims-1]) { - inputIndex += (m_dimensions[NumDims-1] - index - 1); - } else { - inputIndex += index; - } - } - return inputIndex; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff( - Index index) const { - return m_impl.coeff(reverseIndex(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - PacketReturnType packet(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - // TODO(ndjaitly): write a better packing routine that uses - // local structure. - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type - values[PacketSize]; - for (int i = 0; i < PacketSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + - 2 * TensorOpCost::MulCost<Index>() + - TensorOpCost::DivCost<Index>()); - for (int i = 0; i < NumDims; ++i) { - if (m_reverse[i]) { - compute_cost += 2 * TensorOpCost::AddCost<Index>(); - } - } - return m_impl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - Dimensions m_dimensions; - array<Index, NumDims> m_strides; - TensorEvaluator<ArgType, Device> m_impl; - ReverseDimensions m_reverse; -}; - -// Eval as lvalue - -template <typename ReverseDimensions, typename ArgType, typename Device> -struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device> - : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, - Device> { - typedef TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, - Device> Base; - typedef TensorReverseOp<ReverseDimensions, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<ReverseDimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, - const Device& device) - : Base(op, device) {} - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Dimensions& dimensions() const { return this->m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return this->m_impl.coeffRef(this->reverseIndex(index)); - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - // This code is pilfered from TensorMorphing.h - EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - for (int i = 0; i < PacketSize; ++i) { - this->coeffRef(index+i) = values[i]; - } - } - -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h deleted file mode 100644 index 8501466..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ /dev/null @@ -1,287 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Igor Babuschkin <igor@babuschk.in> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H -#define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H - -namespace Eigen { - -namespace internal { - -template <typename Op, typename XprType> -struct traits<TensorScanOp<Op, XprType> > - : public traits<XprType> { - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Op, typename XprType> -struct eval<TensorScanOp<Op, XprType>, Eigen::Dense> -{ - typedef const TensorScanOp<Op, XprType>& type; -}; - -template<typename Op, typename XprType> -struct nested<TensorScanOp<Op, XprType>, 1, - typename eval<TensorScanOp<Op, XprType> >::type> -{ - typedef TensorScanOp<Op, XprType> type; -}; -} // end namespace internal - -/** \class TensorScan - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor scan class. - */ -template <typename Op, typename XprType> -class TensorScanOp - : public TensorBase<TensorScanOp<Op, XprType>, ReadOnlyAccessors> { -public: - typedef typename Eigen::internal::traits<TensorScanOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorScanOp>::type Nested; - typedef typename Eigen::internal::traits<TensorScanOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorScanOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp( - const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op()) - : m_expr(expr), m_axis(axis), m_accumulator(op), m_exclusive(exclusive) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Index axis() const { return m_axis; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const XprType& expression() const { return m_expr; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Op accumulator() const { return m_accumulator; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - bool exclusive() const { return m_exclusive; } - -protected: - typename XprType::Nested m_expr; - const Index m_axis; - const Op m_accumulator; - const bool m_exclusive; -}; - -template <typename Self, typename Reducer, typename Device> -struct ScanLauncher; - -// Eval as rvalue -template <typename Op, typename ArgType, typename Device> -struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { - - typedef TensorScanOp<Op, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self; - - enum { - IsAligned = false, - PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - RawAccess = true - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, - const Device& device) - : m_impl(op.expression(), device), - m_device(device), - m_exclusive(op.exclusive()), - m_accumulator(op.accumulator()), - m_size(m_impl.dimensions()[op.axis()]), - m_stride(1), - m_output(NULL) { - - // Accumulating a scalar isn't supported. - EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - eigen_assert(op.axis() >= 0 && op.axis() < NumDims); - - // Compute stride of scan axis - const Dimensions& dims = m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = 0; i < op.axis(); ++i) { - m_stride = m_stride * dims[i]; - } - } else { - for (int i = NumDims - 1; i > op.axis(); --i) { - m_stride = m_stride * dims[i]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_impl.dimensions(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& stride() const { - return m_stride; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& size() const { - return m_size; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op& accumulator() const { - return m_accumulator; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool exclusive() const { - return m_exclusive; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& inner() const { - return m_impl; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { - return m_device; - } - - EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { - m_impl.evalSubExprsIfNeeded(NULL); - ScanLauncher<Self, Op, Device> launcher; - if (data) { - launcher(*this, data); - return false; - } - - const Index total_size = internal::array_prod(dimensions()); - m_output = static_cast<CoeffReturnType*>(m_device.allocate(total_size * sizeof(Scalar))); - launcher(*this, m_output); - return true; - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { - return internal::ploadt<PacketReturnType, LoadMode>(m_output + index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const - { - return m_output; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_output[index]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { - return TensorOpCost(sizeof(CoeffReturnType), 0, 0); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - if (m_output != NULL) { - m_device.deallocate(m_output); - m_output = NULL; - } - m_impl.cleanup(); - } - -protected: - TensorEvaluator<ArgType, Device> m_impl; - const Device& m_device; - const bool m_exclusive; - Op m_accumulator; - const Index m_size; - Index m_stride; - CoeffReturnType* m_output; -}; - -// CPU implementation of scan -// TODO(ibab) This single-threaded implementation should be parallelized, -// at least by running multiple scans at the same time. -template <typename Self, typename Reducer, typename Device> -struct ScanLauncher { - void operator()(Self& self, typename Self::CoeffReturnType *data) { - Index total_size = internal::array_prod(self.dimensions()); - - // We fix the index along the scan axis to 0 and perform a - // scan per remaining entry. The iteration is split into two nested - // loops to avoid an integer division by keeping track of each idx1 and idx2. - for (Index idx1 = 0; idx1 < total_size; idx1 += self.stride() * self.size()) { - for (Index idx2 = 0; idx2 < self.stride(); idx2++) { - // Calculate the starting offset for the scan - Index offset = idx1 + idx2; - - // Compute the scan along the axis, starting at the calculated offset - typename Self::CoeffReturnType accum = self.accumulator().initialize(); - for (Index idx3 = 0; idx3 < self.size(); idx3++) { - Index curr = offset + idx3 * self.stride(); - - if (self.exclusive()) { - data[curr] = self.accumulator().finalize(accum); - self.accumulator().reduce(self.inner().coeff(curr), &accum); - } else { - self.accumulator().reduce(self.inner().coeff(curr), &accum); - data[curr] = self.accumulator().finalize(accum); - } - } - } - } - } -}; - -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - -// GPU implementation of scan -// TODO(ibab) This placeholder implementation performs multiple scans in -// parallel, but it would be better to use a parallel scan algorithm and -// optimize memory access. -template <typename Self, typename Reducer> -__global__ void ScanKernel(Self self, Index total_size, typename Self::CoeffReturnType* data) { - // Compute offset as in the CPU version - Index val = threadIdx.x + blockIdx.x * blockDim.x; - Index offset = (val / self.stride()) * self.stride() * self.size() + val % self.stride(); - - if (offset + (self.size() - 1) * self.stride() < total_size) { - // Compute the scan along the axis, starting at the calculated offset - typename Self::CoeffReturnType accum = self.accumulator().initialize(); - for (Index idx = 0; idx < self.size(); idx++) { - Index curr = offset + idx * self.stride(); - if (self.exclusive()) { - data[curr] = self.accumulator().finalize(accum); - self.accumulator().reduce(self.inner().coeff(curr), &accum); - } else { - self.accumulator().reduce(self.inner().coeff(curr), &accum); - data[curr] = self.accumulator().finalize(accum); - } - } - } - __syncthreads(); - -} - -template <typename Self, typename Reducer> -struct ScanLauncher<Self, Reducer, GpuDevice> { - void operator()(const Self& self, typename Self::CoeffReturnType* data) { - Index total_size = internal::array_prod(self.dimensions()); - Index num_blocks = (total_size / self.size() + 63) / 64; - Index block_size = 64; - LAUNCH_CUDA_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0, self.device(), self, total_size, data); - } -}; -#endif // EIGEN_USE_GPU && __CUDACC__ - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_SCAN_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h deleted file mode 100644 index 113c060..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ /dev/null @@ -1,264 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H -#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H - -namespace Eigen { - -/** \class TensorShuffling - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor shuffling class. - * - * - */ -namespace internal { -template<typename Shuffle, typename XprType> -struct traits<TensorShufflingOp<Shuffle, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Shuffle, typename XprType> -struct eval<TensorShufflingOp<Shuffle, XprType>, Eigen::Dense> -{ - typedef const TensorShufflingOp<Shuffle, XprType>& type; -}; - -template<typename Shuffle, typename XprType> -struct nested<TensorShufflingOp<Shuffle, XprType>, 1, typename eval<TensorShufflingOp<Shuffle, XprType> >::type> -{ - typedef TensorShufflingOp<Shuffle, XprType> type; -}; - -} // end namespace internal - - - -template<typename Shuffle, typename XprType> -class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorShufflingOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorShufflingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle) - : m_xpr(expr), m_shuffle(shuffle) {} - - EIGEN_DEVICE_FUNC - const Shuffle& shufflePermutation() const { return m_shuffle; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorShufflingOp& operator = (const TensorShufflingOp& other) - { - typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorShufflingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const Shuffle m_shuffle; -}; - - -// Eval as rvalue -template<typename Shuffle, typename ArgType, typename Device> -struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> -{ - typedef TensorShufflingOp<Shuffle, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - const Shuffle& shuffle = op.shufflePermutation(); - for (int i = 0; i < NumDims; ++i) { - m_dimensions[i] = input_dims[shuffle[i]]; - } - - array<Index, NumDims> inputStrides; - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - inputStrides[0] = 1; - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - inputStrides[i] = inputStrides[i - 1] * input_dims[i - 1]; - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - } - } else { - inputStrides[NumDims - 1] = 1; - m_outputStrides[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1]; - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - } - } - - for (int i = 0; i < NumDims; ++i) { - m_inputStrides[i] = inputStrides[shuffle[i]]; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(srcCoeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - for (int i = 0; i < PacketSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() + - 2 * TensorOpCost::MulCost<Index>() + - TensorOpCost::DivCost<Index>()); - return m_impl.costPerCoeff(vectorized) + - TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const { - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - inputIndex += idx * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - return inputIndex + index * m_inputStrides[0]; - } else { - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i]; - inputIndex += idx * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - return inputIndex + index * m_inputStrides[NumDims - 1]; - } - } - - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; -}; - - -// Eval as lvalue -template<typename Shuffle, typename ArgType, typename Device> -struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device> - : public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> -{ - typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Base; - - typedef TensorShufflingOp<Shuffle, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) - { } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) - { - return this->m_impl.coeffRef(this->srcCoeff(index)); - } - - template <int StoreMode> EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - internal::pstore<CoeffReturnType, PacketReturnType>(values, x); - for (int i = 0; i < PacketSize; ++i) { - this->coeffRef(index+i) = values[i]; - } - } -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h deleted file mode 100644 index e6a666f..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ /dev/null @@ -1,146 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// Copyright (C) 2014-2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSORSTORAGE_H -#define EIGEN_CXX11_TENSOR_TENSORSTORAGE_H - -#ifdef EIGEN_TENSOR_STORAGE_CTOR_PLUGIN - #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN EIGEN_TENSOR_STORAGE_CTOR_PLUGIN; -#else - #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN -#endif - -namespace Eigen { - -/** \internal - * - * \class TensorStorage - * \ingroup CXX11_Tensor_Module - * - * \brief Stores the data of a tensor - * - * This class stores the data of fixed-size, dynamic-size or mixed tensors - * in a way as compact as possible. - * - * \sa Tensor - */ -template<typename T, typename Dimensions, int Options> class TensorStorage; - - -// Pure fixed-size storage -template<typename T, typename FixedDimensions, int Options_> -class TensorStorage -{ - private: - static const std::size_t Size = FixedDimensions::total_size; - - // Allocate an array of size at least one to prevent compiler warnings. - static const std::size_t MinSize = max_n_1<Size>::size; - EIGEN_ALIGN_MAX T m_data[MinSize]; - - FixedDimensions m_dimensions; - - public: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorStorage() { - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T *data() { return m_data; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T *data() const { return m_data; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const FixedDimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE DenseIndex size() const { return m_dimensions.TotalSize(); } -}; - - -// pure dynamic -template<typename T, typename IndexType, int NumIndices_, int Options_> -class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_> -{ - public: - typedef IndexType Index; - typedef DSizes<IndexType, NumIndices_> Dimensions; - typedef TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_> Self; - - EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() { - if (NumIndices_ == 0) { - m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(1); - } - } - EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert) - : m_data(0), m_dimensions(internal::template repeat<NumIndices_, Index>(0)) {} - EIGEN_DEVICE_FUNC TensorStorage(Index size, const array<Index, NumIndices_>& dimensions) - : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_dimensions(dimensions) - { EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - template <typename... DenseIndex> - EIGEN_DEVICE_FUNC TensorStorage(DenseIndex... indices) : m_dimensions(indices...) { - m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(m_dimensions)); - } -#endif - - EIGEN_DEVICE_FUNC TensorStorage(const Self& other) - : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(other.m_dimensions))) - , m_dimensions(other.m_dimensions) - { - internal::smart_copy(other.m_data, other.m_data+internal::array_prod(other.m_dimensions), m_data); - } - EIGEN_DEVICE_FUNC Self& operator=(const Self& other) - { - if (this != &other) { - Self tmp(other); - this->swap(tmp); - } - return *this; - } - - EIGEN_DEVICE_FUNC ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); } - EIGEN_DEVICE_FUNC void swap(Self& other) - { numext::swap(m_data,other.m_data); numext::swap(m_dimensions,other.m_dimensions); } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {return m_dimensions;} - - EIGEN_DEVICE_FUNC void resize(Index size, const array<Index, NumIndices_>& nbDimensions) - { - const Index currentSz = internal::array_prod(m_dimensions); - if(size != currentSz) - { - internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, currentSz); - if (size) - m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size); - else if (NumIndices_ == 0) { - m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(1); - } - else - m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - } - m_dimensions = nbDimensions; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T *data() { return m_data; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T *data() const { return m_data; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } - - private: - T *m_data; - Dimensions m_dimensions; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSORSTORAGE_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h deleted file mode 100644 index 6c35bfd..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +++ /dev/null @@ -1,338 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H -#define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H - -namespace Eigen { - -/** \class TensorStriding - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor striding class. - * - * - */ -namespace internal { -template<typename Strides, typename XprType> -struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType> -{ - typedef typename XprType::Scalar Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions; - static const int Layout = XprTraits::Layout; -}; - -template<typename Strides, typename XprType> -struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense> -{ - typedef const TensorStridingOp<Strides, XprType>& type; -}; - -template<typename Strides, typename XprType> -struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type> -{ - typedef TensorStridingOp<Strides, XprType> type; -}; - -} // end namespace internal - - - -template<typename Strides, typename XprType> -class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> > -{ - public: - typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested; - typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims) - : m_xpr(expr), m_dims(dims) {} - - EIGEN_DEVICE_FUNC - const Strides& strides() const { return m_dims; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorStridingOp& operator = (const TensorStridingOp& other) - { - typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - template<typename OtherDerived> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE TensorStridingOp& operator = (const OtherDerived& other) - { - typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign; - Assign assign(*this, other); - internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); - return *this; - } - - protected: - typename XprType::Nested m_xpr; - const Strides m_dims; -}; - - -// Eval as rvalue -template<typename Strides, typename ArgType, typename Device> -struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> -{ - typedef TensorStridingOp<Strides, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - m_dimensions = m_impl.dimensions(); - for (int i = 0; i < NumDims; ++i) { - m_dimensions[i] = ceilf(static_cast<float>(m_dimensions[i]) / op.strides()[i]); - } - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_outputStrides[0] = 1; - m_inputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; - m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; - m_inputStrides[i-1] *= op.strides()[i-1]; - } - m_inputStrides[NumDims-1] *= op.strides()[NumDims-1]; - } else { // RowMajor - m_outputStrides[NumDims-1] = 1; - m_inputStrides[NumDims-1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; - m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; - m_inputStrides[i+1] *= op.strides()[i+1]; - } - m_inputStrides[0] *= op.strides()[0]; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return m_impl.coeff(srcCoeff(index)); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - Index inputIndices[] = {0, 0}; - Index indices[] = {index, index + PacketSize - 1}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / m_outputStrides[i]; - const Index idx1 = indices[1] / m_outputStrides[i]; - inputIndices[0] += idx0 * m_inputStrides[i]; - inputIndices[1] += idx1 * m_inputStrides[i]; - indices[0] -= idx0 * m_outputStrides[i]; - indices[1] -= idx1 * m_outputStrides[i]; - } - inputIndices[0] += indices[0] * m_inputStrides[0]; - inputIndices[1] += indices[1] * m_inputStrides[0]; - } else { // RowMajor - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx0 = indices[0] / m_outputStrides[i]; - const Index idx1 = indices[1] / m_outputStrides[i]; - inputIndices[0] += idx0 * m_inputStrides[i]; - inputIndices[1] += idx1 * m_inputStrides[i]; - indices[0] -= idx0 * m_outputStrides[i]; - indices[1] -= idx1 * m_outputStrides[i]; - } - inputIndices[0] += indices[0] * m_inputStrides[NumDims-1]; - inputIndices[1] += indices[1] * m_inputStrides[NumDims-1]; - } - if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { - PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); - return rslt; - } - else { - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - values[0] = m_impl.coeff(inputIndices[0]); - values[PacketSize-1] = m_impl.coeff(inputIndices[1]); - for (int i = 1; i < PacketSize-1; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { - double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() + - TensorOpCost::MulCost<Index>() + - TensorOpCost::DivCost<Index>()) + - TensorOpCost::MulCost<Index>(); - if (vectorized) { - compute_cost *= 2; // packet() computes two indices - } - const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1); - return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) + - // Computation is not vectorized per se, but it is done once per packet. - TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const - { - Index inputIndex = 0; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - inputIndex += idx * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - inputIndex += index * m_inputStrides[0]; - } else { // RowMajor - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx = index / m_outputStrides[i]; - inputIndex += idx * m_inputStrides[i]; - index -= idx * m_outputStrides[i]; - } - inputIndex += index * m_inputStrides[NumDims-1]; - } - return inputIndex; - } - - Dimensions m_dimensions; - array<Index, NumDims> m_outputStrides; - array<Index, NumDims> m_inputStrides; - TensorEvaluator<ArgType, Device> m_impl; -}; - - -// Eval as lvalue -template<typename Strides, typename ArgType, typename Device> -struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device> - : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> -{ - typedef TensorStridingOp<Strides, ArgType> XprType; - typedef TensorEvaluator<const XprType, Device> Base; - // typedef typename XprType::Index Index; - static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - // typedef DSizes<Index, NumDims> Dimensions; - - enum { - IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, // to be implemented - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : Base(op, device) { } - - typedef typename XprType::Index Index; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return this->m_impl.coeffRef(this->srcCoeff(index)); - } - - template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketReturnType& x) - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize()); - - Index inputIndices[] = {0, 0}; - Index indices[] = {index, index + PacketSize - 1}; - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - for (int i = NumDims - 1; i > 0; --i) { - const Index idx0 = indices[0] / this->m_outputStrides[i]; - const Index idx1 = indices[1] / this->m_outputStrides[i]; - inputIndices[0] += idx0 * this->m_inputStrides[i]; - inputIndices[1] += idx1 * this->m_inputStrides[i]; - indices[0] -= idx0 * this->m_outputStrides[i]; - indices[1] -= idx1 * this->m_outputStrides[i]; - } - inputIndices[0] += indices[0] * this->m_inputStrides[0]; - inputIndices[1] += indices[1] * this->m_inputStrides[0]; - } else { // RowMajor - for (int i = 0; i < NumDims - 1; ++i) { - const Index idx0 = indices[0] / this->m_outputStrides[i]; - const Index idx1 = indices[1] / this->m_outputStrides[i]; - inputIndices[0] += idx0 * this->m_inputStrides[i]; - inputIndices[1] += idx1 * this->m_inputStrides[i]; - indices[0] -= idx0 * this->m_outputStrides[i]; - indices[1] -= idx1 * this->m_outputStrides[i]; - } - inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1]; - inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1]; - } - if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { - this->m_impl.template writePacket<Unaligned>(inputIndices[0], x); - } - else { - EIGEN_ALIGN_MAX Scalar values[PacketSize]; - internal::pstore<Scalar, PacketReturnType>(values, x); - this->m_impl.coeffRef(inputIndices[0]) = values[0]; - this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1]; - for (int i = 1; i < PacketSize-1; ++i) { - this->coeffRef(index+i) = values[i]; - } - } - } -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h deleted file mode 100644 index bb8800d..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h +++ /dev/null @@ -1,82 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: eigen@codeplay.com -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// General include header of SYCL target for Tensor Module -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H - -#ifdef EIGEN_USE_SYCL - -// global pointer to set different attribute state for a class -template <class T> -struct MakeGlobalPointer { - typedef typename cl::sycl::global_ptr<T>::pointer_t Type; -}; - -// global pointer to set different attribute state for a class -template <class T> -struct MakeLocalPointer { - typedef typename cl::sycl::local_ptr<T>::pointer_t Type; -}; - - -namespace Eigen { -namespace TensorSycl { -namespace internal { - -/// This struct is used for special expression nodes with no operations (for example assign and selectOP). - struct NoOP; - -template<bool IsConst, typename T> struct GetType{ - typedef const T Type; -}; -template<typename T> struct GetType<false, T>{ - typedef T Type; -}; - -} -} -} - -// tuple construction -#include "TensorSyclTuple.h" - -// counting number of leaf at compile time -#include "TensorSyclLeafCount.h" - -// The index PlaceHolder takes the actual expression and replaces the actual -// data on it with the place holder. It uses the same pre-order expression tree -// traverse as the leaf count in order to give the right access number to each -// node in the expression -#include "TensorSyclPlaceHolderExpr.h" - -// creation of an accessor tuple from a tuple of SYCL buffers -#include "TensorSyclExtractAccessor.h" - -// this is used to change the address space type in tensor map for GPU -#include "TensorSyclConvertToDeviceExpression.h" - -// this is used to extract the functors -#include "TensorSyclExtractFunctors.h" - -// this is used to create tensormap on the device -// this is used to construct the expression on the device -#include "TensorSyclExprConstructor.h" - -/// this is used for extracting tensor reduction -#include "TensorReductionSycl.h" - -// kernel execution using fusion -#include "TensorSyclRun.h" - -#endif // end of EIGEN_USE_SYCL -#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h deleted file mode 100644 index 8729c86..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h +++ /dev/null @@ -1,121 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/***************************************************************** - * TensorSyclConvertToDeviceExpression.h - * - * \brief: - * Conversion from host pointer to device pointer - * inside leaf nodes of the expression. - * -*****************************************************************/ - -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP - -namespace Eigen { -namespace TensorSycl { -namespace internal { - -/// \struct ConvertToDeviceExpression -/// \brief This struct is used to convert the MakePointer in the host expression -/// to the MakeGlobalPointer for the device expression. For the leafNodes -/// containing the pointer. This is due to the fact that the address space of -/// the pointer T* is different on the host and the device. -template <typename Expr> -struct ConvertToDeviceExpression; - -template<template<class...> class NonOpCategory, bool IsConst, typename... Args> -struct NonOpConversion{ - typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type...> >::Type Type; -}; - - -template<template<class, template <class> class > class NonOpCategory, bool IsConst, typename Args> -struct DeviceConvertor{ - typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type, MakeGlobalPointer> >::Type Type; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorMap -#define TENSORMAPCONVERT(CVQual)\ -template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_>\ -struct ConvertToDeviceExpression<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_> > {\ - typedef CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer> Type;\ -}; - -TENSORMAPCONVERT(const) -TENSORMAPCONVERT() -#undef TENSORMAPCONVERT - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, TensorBroadcastingOp -#define CATEGORYCONVERT(CVQual)\ -template <template<class, class...> class Category, typename OP, typename... subExprs>\ -struct ConvertToDeviceExpression<CVQual Category<OP, subExprs...> > {\ - typedef CVQual Category<OP, typename ConvertToDeviceExpression<subExprs>::Type... > Type;\ -}; -CATEGORYCONVERT(const) -CATEGORYCONVERT() -#undef CATEGORYCONVERT - - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorCwiseSelectOp -#define SELECTOPCONVERT(CVQual, Res)\ -template <typename IfExpr, typename ThenExpr, typename ElseExpr>\ -struct ConvertToDeviceExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >\ -: NonOpConversion<TensorSelectOp, Res, IfExpr, ThenExpr, ElseExpr> {}; -SELECTOPCONVERT(const, true) -SELECTOPCONVERT(, false) -#undef SELECTOPCONVERT - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const AssingOP -#define ASSIGNCONVERT(CVQual, Res)\ -template <typename LHSExpr, typename RHSExpr>\ -struct ConvertToDeviceExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr> >\ -: NonOpConversion<TensorAssignOp, Res, LHSExpr, RHSExpr>{}; - -ASSIGNCONVERT(const, true) -ASSIGNCONVERT(, false) -#undef ASSIGNCONVERT - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is either TensorForcedEvalOp or TensorEvalToOp -#define KERNELBROKERCONVERT(CVQual, Res, ExprNode)\ -template <typename Expr>\ -struct ConvertToDeviceExpression<CVQual ExprNode<Expr> > \ -: DeviceConvertor<ExprNode, Res, Expr>{}; - -KERNELBROKERCONVERT(const, true, TensorForcedEvalOp) -KERNELBROKERCONVERT(, false, TensorForcedEvalOp) -KERNELBROKERCONVERT(const, true, TensorEvalToOp) -KERNELBROKERCONVERT(, false, TensorEvalToOp) -#undef KERNELBROKERCONVERT - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node type is TensorReductionOp -#define KERNELBROKERCONVERTREDUCTION(CVQual)\ -template <typename OP, typename Dim, typename subExpr, template <class> class MakePointer_>\ -struct ConvertToDeviceExpression<CVQual TensorReductionOp<OP, Dim, subExpr, MakePointer_> > {\ - typedef CVQual TensorReductionOp<OP, Dim, typename ConvertToDeviceExpression<subExpr>::Type, MakeGlobalPointer> Type;\ -}; - -KERNELBROKERCONVERTREDUCTION(const) -KERNELBROKERCONVERTREDUCTION() -#undef KERNELBROKERCONVERTREDUCTION - -} // namespace internal -} // namespace TensorSycl -} // namespace Eigen - -#endif // UNSUPPORTED_EIGEN_CXX1 diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h deleted file mode 100644 index 983f631..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h +++ /dev/null @@ -1,239 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/***************************************************************** - * TensorSyclExprConstructor.h - * - * \brief: - * This file re-create an expression on the SYCL device in order - * to use the original tensor evaluator. - * -*****************************************************************/ - -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP - -namespace Eigen { -namespace TensorSycl { -namespace internal { -/// this class is used by EvalToOp in order to create an lhs expression which is -/// a pointer from an accessor on device-only buffer -template <typename PtrType, size_t N, typename... Params> -struct EvalToLHSConstructor { - PtrType expr; - EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t): expr((&(*(utility::tuple::get<N>(t).get_pointer())))) {} -}; - -/// struct ExprConstructor is used to reconstruct the expression on the device and -/// recreate the expression with MakeGlobalPointer containing the device address -/// space for the TensorMap pointers used in eval function. -/// It receives the original expression type, the functor of the node, the tuple -/// of accessors, and the device expression type to re-instantiate the -/// expression tree for the device -template <typename OrigExpr, typename IndexExpr, typename... Params> -struct ExprConstructor; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorMap -#define TENSORMAP(CVQual)\ -template <typename Scalar_, int Options_, int Options2_, int Options3_, int NumIndices_, typename IndexType_,\ -template <class> class MakePointer_, size_t N, typename... Params>\ -struct ExprConstructor< CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer>,\ -CVQual PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options3_, MakePointer_>, N>, Params...>{\ - typedef CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer> Type;\ - Type expr;\ - template <typename FuncDetector>\ - ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\ - : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\ -}; - -TENSORMAP(const) -TENSORMAP() -#undef TENSORMAP - -#define UNARYCATEGORY(CVQual)\ -template <template<class, class> class UnaryCategory, typename OP, typename OrigRHSExpr, typename RHSExpr, typename... Params>\ -struct ExprConstructor<CVQual UnaryCategory<OP, OrigRHSExpr>, CVQual UnaryCategory<OP, RHSExpr>, Params...> {\ - typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_type;\ - my_type rhsExpr;\ - typedef CVQual UnaryCategory<OP, typename my_type::Type> Type;\ - Type expr;\ - template <typename FuncDetector>\ - ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ - : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}\ -}; - -UNARYCATEGORY(const) -UNARYCATEGORY() -#undef UNARYCATEGORY - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorBinaryOp -#define BINARYCATEGORY(CVQual)\ -template <template<class, class, class> class BinaryCategory, typename OP, typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr,\ -typename RHSExpr, typename... Params>\ -struct ExprConstructor<CVQual BinaryCategory<OP, OrigLHSExpr, OrigRHSExpr>, CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Params...> {\ - typedef ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\ - typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\ - typedef CVQual BinaryCategory<OP, typename my_left_type::Type, typename my_right_type::Type> Type;\ - my_left_type lhsExpr;\ - my_right_type rhsExpr;\ - Type expr;\ - template <typename FuncDetector>\ - ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ - : lhsExpr(funcD.lhsExpr, t),rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {}\ -}; - -BINARYCATEGORY(const) -BINARYCATEGORY() -#undef BINARYCATEGORY - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorCwiseTernaryOp -#define TERNARYCATEGORY(CVQual)\ -template <template <class, class, class, class> class TernaryCategory, typename OP, typename OrigArg1Expr, typename OrigArg2Expr,typename OrigArg3Expr,\ -typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename... Params>\ -struct ExprConstructor<CVQual TernaryCategory<OP, OrigArg1Expr, OrigArg2Expr, OrigArg3Expr>, CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Params...> {\ - typedef ExprConstructor<OrigArg1Expr, Arg1Expr, Params...> my_arg1_type;\ - typedef ExprConstructor<OrigArg2Expr, Arg2Expr, Params...> my_arg2_type;\ - typedef ExprConstructor<OrigArg3Expr, Arg3Expr, Params...> my_arg3_type;\ - typedef CVQual TernaryCategory<OP, typename my_arg1_type::Type, typename my_arg2_type::Type, typename my_arg3_type::Type> Type;\ - my_arg1_type arg1Expr;\ - my_arg2_type arg2Expr;\ - my_arg3_type arg3Expr;\ - Type expr;\ - template <typename FuncDetector>\ - ExprConstructor(FuncDetector &funcD,const utility::tuple::Tuple<Params...> &t)\ - : arg1Expr(funcD.arg1Expr, t), arg2Expr(funcD.arg2Expr, t), arg3Expr(funcD.arg3Expr, t), expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {}\ -}; - -TERNARYCATEGORY(const) -TERNARYCATEGORY() -#undef TERNARYCATEGORY - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorCwiseSelectOp -#define SELECTOP(CVQual)\ -template <typename OrigIfExpr, typename OrigThenExpr, typename OrigElseExpr, typename IfExpr, typename ThenExpr, typename ElseExpr, typename... Params>\ -struct ExprConstructor< CVQual TensorSelectOp<OrigIfExpr, OrigThenExpr, OrigElseExpr>, CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Params...> {\ - typedef ExprConstructor<OrigIfExpr, IfExpr, Params...> my_if_type;\ - typedef ExprConstructor<OrigThenExpr, ThenExpr, Params...> my_then_type;\ - typedef ExprConstructor<OrigElseExpr, ElseExpr, Params...> my_else_type;\ - typedef CVQual TensorSelectOp<typename my_if_type::Type, typename my_then_type::Type, typename my_else_type::Type> Type;\ - my_if_type ifExpr;\ - my_then_type thenExpr;\ - my_else_type elseExpr;\ - Type expr;\ - template <typename FuncDetector>\ - ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ - : ifExpr(funcD.ifExpr, t), thenExpr(funcD.thenExpr, t), elseExpr(funcD.elseExpr, t), expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {}\ -}; - -SELECTOP(const) -SELECTOP() -#undef SELECTOP - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// const TensorAssignOp -#define ASSIGN(CVQual)\ -template <typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr, typename RHSExpr, typename... Params>\ -struct ExprConstructor<CVQual TensorAssignOp<OrigLHSExpr, OrigRHSExpr>, CVQual TensorAssignOp<LHSExpr, RHSExpr>, Params...> {\ - typedef ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\ - typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\ - typedef CVQual TensorAssignOp<typename my_left_type::Type, typename my_right_type::Type> Type;\ - my_left_type lhsExpr;\ - my_right_type rhsExpr;\ - Type expr;\ - template <typename FuncDetector>\ - ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ - : lhsExpr(funcD.lhsExpr, t), rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr) {}\ - }; - - ASSIGN(const) - ASSIGN() - #undef ASSIGN -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorEvalToOp -#define EVALTO(CVQual)\ -template <typename OrigExpr, typename Expr, typename... Params>\ -struct ExprConstructor<CVQual TensorEvalToOp<OrigExpr, MakeGlobalPointer>, CVQual TensorEvalToOp<Expr>, Params...> {\ - typedef ExprConstructor<OrigExpr, Expr, Params...> my_expr_type;\ - typedef typename TensorEvalToOp<OrigExpr, MakeGlobalPointer>::PointerType my_buffer_type;\ - typedef CVQual TensorEvalToOp<typename my_expr_type::Type, MakeGlobalPointer> Type;\ - my_expr_type nestedExpression;\ - EvalToLHSConstructor<my_buffer_type, 0, Params...> buffer;\ - Type expr;\ - template <typename FuncDetector>\ - ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\ - : nestedExpression(funcD.rhsExpr, t), buffer(t), expr(buffer.expr, nestedExpression.expr) {}\ -}; - -EVALTO(const) -EVALTO() -#undef EVALTO - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorForcedEvalOp -#define FORCEDEVAL(CVQual)\ -template <typename OrigExpr, typename DevExpr, size_t N, typename... Params>\ -struct ExprConstructor<CVQual TensorForcedEvalOp<OrigExpr, MakeGlobalPointer>,\ -CVQual PlaceHolder<CVQual TensorForcedEvalOp<DevExpr>, N>, Params...> {\ - typedef CVQual TensorMap<Tensor<typename TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::Scalar,\ - TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::NumDimensions, 0, typename TensorForcedEvalOp<DevExpr>::Index>, 0, MakeGlobalPointer> Type;\ - Type expr;\ - template <typename FuncDetector>\ - ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\ - : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\ -}; - -FORCEDEVAL(const) -FORCEDEVAL() -#undef FORCEDEVAL - -template <bool Conds, size_t X , size_t Y > struct ValueCondition { - static const size_t Res =X; -}; -template<size_t X, size_t Y> struct ValueCondition<false, X , Y> { - static const size_t Res =Y; -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is TensorReductionOp -#define SYCLREDUCTIONEXPR(CVQual)\ -template <typename OP, typename Dim, typename OrigExpr, typename DevExpr, size_t N, typename... Params>\ -struct ExprConstructor<CVQual TensorReductionOp<OP, Dim, OrigExpr, MakeGlobalPointer>,\ -CVQual PlaceHolder<CVQual TensorReductionOp<OP, Dim, DevExpr>, N>, Params...> {\ - static const size_t NumIndices= ValueCondition< TensorReductionOp<OP, Dim, DevExpr, MakeGlobalPointer>::NumDimensions==0, 1, TensorReductionOp<OP, Dim, DevExpr, MakeGlobalPointer>::NumDimensions >::Res;\ - typedef CVQual TensorMap<Tensor<typename TensorReductionOp<OP, Dim, DevExpr, MakeGlobalPointer>::Scalar,\ - NumIndices, 0, typename TensorReductionOp<OP, Dim, DevExpr>::Index>, 0, MakeGlobalPointer> Type;\ - Type expr;\ - template <typename FuncDetector>\ - ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\ - : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\ -}; - -SYCLREDUCTIONEXPR(const) -SYCLREDUCTIONEXPR() -#undef SYCLREDUCTIONEXPR - -/// template deduction for \ref ExprConstructor struct -template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params> -auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple<Params...> &t) - -> decltype(ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t)) { - return ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t); -} - -} /// namespace TensorSycl -} /// namespace internal -} /// namespace Eigen - - -#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h deleted file mode 100644 index cc18fcd..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h +++ /dev/null @@ -1,204 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/***************************************************************** - * TensorSyclExtractAccessor.h - * - * \brief: - * ExtractAccessor takes Expression placeHolder expression and the tuple of sycl - * buffers as an input. Using pre-order tree traversal, ExtractAccessor - * recursively calls itself for its children in the expression tree. The - * leaf node in the PlaceHolder expression is nothing but a container preserving - * the order of the actual data in the tuple of sycl buffer. By invoking the - * extract accessor for the PlaceHolder<N>, an accessor is created for the Nth - * buffer in the tuple of buffers. This accessor is then added as an Nth - * element in the tuple of accessors. In this case we preserve the order of data - * in the expression tree. - * - * This is the specialisation of extract accessor method for different operation - * type in the PlaceHolder expression. - * -*****************************************************************/ - -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP - -namespace Eigen { -namespace TensorSycl { -namespace internal { -/// struct ExtractAccessor: Extract Accessor Class is used to extract the -/// accessor from a buffer. -/// Depending on the type of the leaf node we can get a read accessor or a -/// read_write accessor -template <typename Evaluator> -struct ExtractAccessor; - -struct AccessorConstructor{ - template<typename Arg> static inline auto getTuple(cl::sycl::handler& cgh, Arg eval) - -> decltype(ExtractAccessor<Arg>::getTuple(cgh, eval)) { - return ExtractAccessor<Arg>::getTuple(cgh, eval); - } - - template<typename Arg1, typename Arg2> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1, Arg2 eval2) - -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2))) { - return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2)); - } - template<typename Arg1, typename Arg2, typename Arg3> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1 , Arg2 eval2 , Arg3 eval3) - -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)))) { - return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3))); - } - template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval) - -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM, - typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data()))){ - return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM, typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data())); - } -}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is -/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp and const TensorBroadcastingOp -template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){ - return AccessorConstructor::getTuple(cgh, eval.impl()); - } -}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp -template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> > -: ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorCwiseBinaryOp -template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ - return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); - } -}; -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseBinaryOp -template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > -: ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is -/// const TensorCwiseTernaryOp -template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){ - return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl()); - } -}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseTernaryOp -template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is -/// const TensorCwiseSelectOp. This is a special case where there is no OP -template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){ - return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl()); - } -}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is -/// TensorCwiseSelectOp. This is a special case where there is no OP -template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorAssignOp -template <typename LHSExpr, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> eval) - -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){ - return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl()); - } -}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorAssignOp -template <typename LHSExpr, typename RHSExpr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorMap -#define TENSORMAPEXPR(CVQual, ACCType)\ -template <typename PlainObjectType, int Options_, typename Dev>\ -struct ExtractAccessor<TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> > {\ - static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> eval)\ - -> decltype(AccessorConstructor::template getAccessor<ACCType>(cgh, eval)){\ - return AccessorConstructor::template getAccessor<ACCType>(cgh, eval);\ - }\ -}; -TENSORMAPEXPR(const, cl::sycl::access::mode::read) -TENSORMAPEXPR(, cl::sycl::access::mode::read_write) -#undef TENSORMAPEXPR - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorForcedEvalOp -template <typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> eval) - -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){ - return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval); - } -}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorForcedEvalOp -template <typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorForcedEvalOp<Expr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorEvalToOp -template <typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<const TensorEvalToOp<Expr>, Dev> eval) - -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){ - return utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl())); - } -}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorEvalToOp -template <typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorEvalToOp<Expr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> >{}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorReductionOp -template <typename OP, typename Dim, typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> > { - static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> eval) - -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){ - return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval); - } -}; - -/// specialisation of the \ref ExtractAccessor struct when the node type is TensorReductionOp -template <typename OP, typename Dim, typename Expr, typename Dev> -struct ExtractAccessor<TensorEvaluator<TensorReductionOp<OP, Dim, Expr>, Dev> > -: ExtractAccessor<TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> >{}; - -/// template deduction for \ref ExtractAccessor -template <typename Evaluator> -auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr) --> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, expr)) { - return ExtractAccessor<Evaluator>::getTuple(cgh, expr); -} - -} /// namespace TensorSycl -} /// namespace internal -} /// namespace Eigen -#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h deleted file mode 100644 index 9edd38e..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h +++ /dev/null @@ -1,177 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/***************************************************************** - * TensorSyclextractFunctors.h - * - * \brief: - * Used to extract all the functors allocated to each node of the expression -*tree. - * -*****************************************************************/ - -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP - -namespace Eigen { -namespace TensorSycl { -namespace internal { -/// struct FunctorExtractor: This struct is used to extract the functors -/// constructed on -/// the host-side, to pack them and reuse them in reconstruction of the -/// expression on the device. -/// We have to do that as in Eigen the functors are not stateless so we cannot -/// re-instantiate them on the device. -/// We have to pass instantiated functors to the device. -// This struct is used for leafNode (TensorMap) and nodes behaving like leafNode (TensorForcedEval). -template <typename Evaluator> struct FunctorExtractor{ - typedef typename Evaluator::Dimensions Dimensions; - const Dimensions m_dimensions; - const Dimensions& dimensions() const { return m_dimensions; } - FunctorExtractor(const Evaluator& expr) - : m_dimensions(expr.dimensions()) {} - -}; - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp, and const TensorBroadcastingOp -template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > { - FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; - OP func; - FunctorExtractor(const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev>& expr) - : rhsExpr(expr.impl()), func(expr.functor()) {} -}; -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, and TensorBroadcastingOp -template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> > -: FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> >{}; - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseBinaryOp -template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > { - FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr; - FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; - OP func; - FunctorExtractor(const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& expr) - : lhsExpr(expr.left_impl()),rhsExpr(expr.right_impl()),func(expr.functor()) {} -}; - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseBinaryOp -template <template <class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > -: FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{}; - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseTernaryOp -template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,typename Dev> -struct FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > { - FunctorExtractor<TensorEvaluator<Arg1Expr, Dev> > arg1Expr; - FunctorExtractor<TensorEvaluator<Arg2Expr, Dev> > arg2Expr; - FunctorExtractor<TensorEvaluator<Arg3Expr, Dev> > arg3Expr; - OP func; - FunctorExtractor(const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr) - : arg1Expr(expr.arg1Impl()), arg2Expr(expr.arg2Impl()), arg3Expr(expr.arg3Impl()), func(expr.functor()) {} -}; - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// TensorCwiseTernaryOp -template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev> -struct FunctorExtractor<TensorEvaluator< TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > -:FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{}; - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated. -template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> -struct FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > { - FunctorExtractor<TensorEvaluator<IfExpr, Dev> > ifExpr; - FunctorExtractor<TensorEvaluator<ThenExpr, Dev> > thenExpr; - FunctorExtractor<TensorEvaluator<ElseExpr, Dev> > elseExpr; - FunctorExtractor(const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr) - : ifExpr(expr.cond_impl()), thenExpr(expr.then_impl()), elseExpr(expr.else_impl()) {} -}; - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated -template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > -:FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {}; - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorAssignOp. This is an specialisation without OP so it has to be separated. -template <typename LHSExpr, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > { - FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr; - FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; - FunctorExtractor(const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr) - : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {} -}; - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// TensorAssignOp. This is an specialisation without OP so it has to be separated. -template <typename LHSExpr, typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> > -:FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{}; - - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// const TensorEvalToOp, This is an specialisation without OP so it has to be separated. -template <typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > { - FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr; - FunctorExtractor(const TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev>& expr) - : rhsExpr(expr.impl()) {} -}; - -/// specialisation of the \ref FunctorExtractor struct when the node type is -/// TensorEvalToOp. This is a specialisation without OP so it has to be separated. -template <typename RHSExpr, typename Dev> -struct FunctorExtractor<TensorEvaluator<TensorEvalToOp<RHSExpr>, Dev> > -: FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {}; - -template<typename Dim, size_t NumOutputDim> struct DimConstr { -template<typename InDim> - static inline Dim getDim(InDim dims ) {return dims;} -}; - -template<typename Dim> struct DimConstr<Dim, 0> { - template<typename InDim> - static inline Dim getDim(InDim dims ) {return Dim(dims.TotalSize());} -}; - -template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> -struct FunctorExtractor<TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{ - typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Evaluator; - typedef typename Eigen::internal::conditional<Evaluator::NumOutputDims==0, DSizes<typename Evaluator::Index, 1>, typename Evaluator::Dimensions >::type Dimensions; - const Dimensions m_dimensions; - const Dimensions& dimensions() const { return m_dimensions; } - FunctorExtractor(const TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>& expr) - : m_dimensions(DimConstr<Dimensions, Evaluator::NumOutputDims>::getDim(expr.dimensions())) {} -}; - - -template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> -struct FunctorExtractor<TensorEvaluator<TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>> -: FunctorExtractor<TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{}; -/// template deduction function for FunctorExtractor -template <typename Evaluator> -auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> { - return FunctorExtractor<Evaluator>(evaluator); -} -} // namespace internal -} // namespace TensorSycl -} // namespace Eigen - -#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h deleted file mode 100644 index 25d1fac..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h +++ /dev/null @@ -1,114 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/***************************************************************** - * TensorSyclLeafCount.h - * - * \brief: - * The leaf count used the pre-order expression tree traverse in order to name - * count the number of leaf nodes in the expression - * -*****************************************************************/ - -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP - -namespace Eigen { -namespace TensorSycl { -namespace internal { -/// \brief LeafCount used to counting terminal nodes. The total number of -/// leaf nodes is used by MakePlaceHolderExprHelper to find the order -/// of the leaf node in a expression tree at compile time. -template <typename Expr> -struct LeafCount; - -template<typename... Args> struct CategoryCount; - -template<> struct CategoryCount<> -{ - static const size_t Count =0; -}; - -template<typename Arg, typename... Args> -struct CategoryCount<Arg,Args...>{ - static const size_t Count = LeafCount<Arg>::Count + CategoryCount<Args...>::Count; -}; - -/// specialisation of the \ref LeafCount struct when the node type is const TensorMap -template <typename PlainObjectType, int Options_, template <class> class MakePointer_> -struct LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> > { - static const size_t Count =1; -}; - -/// specialisation of the \ref LeafCount struct when the node type is TensorMap -template <typename PlainObjectType, int Options_, template <class> class MakePointer_> -struct LeafCount<TensorMap<PlainObjectType, Options_, MakePointer_> > :LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> >{}; - -// const TensorCwiseUnaryOp, const TensorCwiseNullaryOp, const TensorCwiseBinaryOp, const TensorCwiseTernaryOp, and Const TensorBroadcastingOp -template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr> -struct LeafCount<const CategoryExpr<OP, RHSExpr...> >: CategoryCount<RHSExpr...> {}; -// TensorCwiseUnaryOp, TensorCwiseNullaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, and TensorBroadcastingOp -template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr> -struct LeafCount<CategoryExpr<OP, RHSExpr...> > :LeafCount<const CategoryExpr<OP, RHSExpr...> >{}; - -/// specialisation of the \ref LeafCount struct when the node type is const TensorSelectOp is an exception -template <typename IfExpr, typename ThenExpr, typename ElseExpr> -struct LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > : CategoryCount<IfExpr, ThenExpr, ElseExpr> {}; -/// specialisation of the \ref LeafCount struct when the node type is TensorSelectOp -template <typename IfExpr, typename ThenExpr, typename ElseExpr> -struct LeafCount<TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >: LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > {}; - - -/// specialisation of the \ref LeafCount struct when the node type is const TensorAssignOp -template <typename LHSExpr, typename RHSExpr> -struct LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >: CategoryCount<LHSExpr,RHSExpr> {}; - -/// specialisation of the \ref LeafCount struct when the node type is -/// TensorAssignOp is an exception. It is not the same as Unary -template <typename LHSExpr, typename RHSExpr> -struct LeafCount<TensorAssignOp<LHSExpr, RHSExpr> > :LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >{}; - -/// specialisation of the \ref LeafCount struct when the node type is const TensorForcedEvalOp -template <typename Expr> -struct LeafCount<const TensorForcedEvalOp<Expr> > { - static const size_t Count =1; -}; - -/// specialisation of the \ref LeafCount struct when the node type is TensorForcedEvalOp -template <typename Expr> -struct LeafCount<TensorForcedEvalOp<Expr> >: LeafCount<const TensorForcedEvalOp<Expr> > {}; - -/// specialisation of the \ref LeafCount struct when the node type is const TensorEvalToOp -template <typename Expr> -struct LeafCount<const TensorEvalToOp<Expr> > { - static const size_t Count = 1 + CategoryCount<Expr>::Count; -}; - -/// specialisation of the \ref LeafCount struct when the node type is const TensorReductionOp -template <typename OP, typename Dim, typename Expr> -struct LeafCount<const TensorReductionOp<OP, Dim, Expr> > { - static const size_t Count =1; -}; - -/// specialisation of the \ref LeafCount struct when the node type is TensorReductionOp -template <typename OP, typename Dim, typename Expr> -struct LeafCount<TensorReductionOp<OP, Dim, Expr> >: LeafCount<const TensorReductionOp<OP, Dim, Expr> >{}; - -/// specialisation of the \ref LeafCount struct when the node type is TensorEvalToOp -template <typename Expr> -struct LeafCount<TensorEvalToOp<Expr> >: LeafCount<const TensorEvalToOp<Expr> >{}; - -} /// namespace TensorSycl -} /// namespace internal -} /// namespace Eigen - -#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h deleted file mode 100644 index d4c250c..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h +++ /dev/null @@ -1,181 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/***************************************************************** - * TensorSyclPlaceHolderExpr.h - * - * \brief: - * This is the specialisation of the placeholder expression based on the - * operation type - * -*****************************************************************/ - -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP - -namespace Eigen { -namespace TensorSycl { -namespace internal { - -/// \struct PlaceHolder -/// \brief PlaceHolder is used to replace the \ref TensorMap in the expression -/// tree. -/// PlaceHolder contains the order of the leaf node in the expression tree. -template <typename Scalar, size_t N> -struct PlaceHolder { - static constexpr size_t I = N; - typedef Scalar Type; -}; - -/// \sttruct PlaceHolderExpression -/// \brief it is used to create the PlaceHolder expression. The PlaceHolder -/// expression is a copy of expression type in which the TensorMap of the has -/// been replaced with PlaceHolder. -template <typename Expr, size_t N> -struct PlaceHolderExpression; - -template<size_t N, typename... Args> -struct CalculateIndex; - -template<size_t N, typename Arg> -struct CalculateIndex<N, Arg>{ - typedef typename PlaceHolderExpression<Arg, N>::Type ArgType; - typedef utility::tuple::Tuple<ArgType> ArgsTuple; -}; - -template<size_t N, typename Arg1, typename Arg2> -struct CalculateIndex<N, Arg1, Arg2>{ - static const size_t Arg2LeafCount = LeafCount<Arg2>::Count; - typedef typename PlaceHolderExpression<Arg1, N - Arg2LeafCount>::Type Arg1Type; - typedef typename PlaceHolderExpression<Arg2, N>::Type Arg2Type; - typedef utility::tuple::Tuple<Arg1Type, Arg2Type> ArgsTuple; -}; - -template<size_t N, typename Arg1, typename Arg2, typename Arg3> -struct CalculateIndex<N, Arg1, Arg2, Arg3> { - static const size_t Arg3LeafCount = LeafCount<Arg3>::Count; - static const size_t Arg2LeafCount = LeafCount<Arg2>::Count; - typedef typename PlaceHolderExpression<Arg1, N - Arg3LeafCount - Arg2LeafCount>::Type Arg1Type; - typedef typename PlaceHolderExpression<Arg2, N - Arg3LeafCount>::Type Arg2Type; - typedef typename PlaceHolderExpression<Arg3, N>::Type Arg3Type; - typedef utility::tuple::Tuple<Arg1Type, Arg2Type, Arg3Type> ArgsTuple; -}; - -template<template<class...> class Category , class OP, class TPL> -struct CategoryHelper; - -template<template<class...> class Category , class OP, class ...T > -struct CategoryHelper<Category, OP, utility::tuple::Tuple<T...> > { - typedef Category<OP, T... > Type; -}; - -template<template<class...> class Category , class ...T > -struct CategoryHelper<Category, NoOP, utility::tuple::Tuple<T...> > { - typedef Category<T... > Type; -}; - -/// specialisation of the \ref PlaceHolderExpression when the node is -/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorBroadcastingOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp -#define OPEXPRCATEGORY(CVQual)\ -template <template <class, class... > class Category, typename OP, typename... SubExpr, size_t N>\ -struct PlaceHolderExpression<CVQual Category<OP, SubExpr...>, N>{\ - typedef CVQual typename CategoryHelper<Category, OP, typename CalculateIndex<N, SubExpr...>::ArgsTuple>::Type Type;\ -}; - -OPEXPRCATEGORY(const) -OPEXPRCATEGORY() -#undef OPEXPRCATEGORY - -/// specialisation of the \ref PlaceHolderExpression when the node is -/// TensorCwiseSelectOp -#define SELECTEXPR(CVQual)\ -template <typename IfExpr, typename ThenExpr, typename ElseExpr, size_t N>\ -struct PlaceHolderExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, N> {\ - typedef CVQual typename CategoryHelper<TensorSelectOp, NoOP, typename CalculateIndex<N, IfExpr, ThenExpr, ElseExpr>::ArgsTuple>::Type Type;\ -}; - -SELECTEXPR(const) -SELECTEXPR() -#undef SELECTEXPR - -/// specialisation of the \ref PlaceHolderExpression when the node is -/// TensorAssignOp -#define ASSIGNEXPR(CVQual)\ -template <typename LHSExpr, typename RHSExpr, size_t N>\ -struct PlaceHolderExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr>, N> {\ - typedef CVQual typename CategoryHelper<TensorAssignOp, NoOP, typename CalculateIndex<N, LHSExpr, RHSExpr>::ArgsTuple>::Type Type;\ -}; - -ASSIGNEXPR(const) -ASSIGNEXPR() -#undef ASSIGNEXPR - -/// specialisation of the \ref PlaceHolderExpression when the node is -/// TensorMap -#define TENSORMAPEXPR(CVQual)\ -template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_, size_t N>\ -struct PlaceHolderExpression< CVQual TensorMap< Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> {\ - typedef CVQual PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> Type;\ -}; - -TENSORMAPEXPR(const) -TENSORMAPEXPR() -#undef TENSORMAPEXPR - -/// specialisation of the \ref PlaceHolderExpression when the node is -/// TensorForcedEvalOp -#define FORCEDEVAL(CVQual)\ -template <typename Expr, size_t N>\ -struct PlaceHolderExpression<CVQual TensorForcedEvalOp<Expr>, N> {\ - typedef CVQual PlaceHolder<CVQual TensorForcedEvalOp<Expr>, N> Type;\ -}; - -FORCEDEVAL(const) -FORCEDEVAL() -#undef FORCEDEVAL - -/// specialisation of the \ref PlaceHolderExpression when the node is -/// TensorEvalToOp -#define EVALTO(CVQual)\ -template <typename Expr, size_t N>\ -struct PlaceHolderExpression<CVQual TensorEvalToOp<Expr>, N> {\ - typedef CVQual TensorEvalToOp<typename CalculateIndex <N, Expr>::ArgType> Type;\ -}; - -EVALTO(const) -EVALTO() -#undef EVALTO - - -/// specialisation of the \ref PlaceHolderExpression when the node is -/// TensorReductionOp -#define SYCLREDUCTION(CVQual)\ -template <typename OP, typename Dims, typename Expr, size_t N>\ -struct PlaceHolderExpression<CVQual TensorReductionOp<OP, Dims, Expr>, N>{\ - typedef CVQual PlaceHolder<CVQual TensorReductionOp<OP, Dims,Expr>, N> Type;\ -}; -SYCLREDUCTION(const) -SYCLREDUCTION() -#undef SYCLREDUCTION - -/// template deduction for \ref PlaceHolderExpression struct -template <typename Expr> -struct createPlaceHolderExpression { - static const size_t TotalLeaves = LeafCount<Expr>::Count; - typedef typename PlaceHolderExpression<Expr, TotalLeaves - 1>::Type Type; -}; - -} // internal -} // TensorSycl -} // namespace Eigen - -#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h deleted file mode 100644 index 7914b6f..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h +++ /dev/null @@ -1,70 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Cummins Chris PhD student at The University of Edinburgh. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/***************************************************************** - * TensorSyclRun.h - * - * \brief: - * Schedule_kernel invoke an specialised version of kernel struct. The - * specialisation is based on the data dimension in sycl buffer - * -*****************************************************************/ - -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP - -namespace Eigen { -namespace TensorSycl { -/// The run function in tensor sycl convert the expression tree to a buffer -/// based expression tree; -/// creates the expression tree for the device with accessor to buffers; -/// construct the kernel and submit it to the sycl queue. -template <typename Expr, typename Dev> -void run(Expr &expr, Dev &dev) { - Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); - if (needs_assign) { - typedef typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr; - auto functors = internal::extractFunctors(evaluator); - - size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2; - dev.m_queue.submit([&](cl::sycl::handler &cgh) { - - // create a tuple of accessors from Evaluator - auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator); - const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0]; - size_t GRange=range; - if (tileSize>GRange) tileSize=GRange; - else if(GRange>tileSize){ - size_t xMode = GRange % tileSize; - if (xMode != 0) GRange += (tileSize - xMode); - } - // run the kernel - cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) { - typedef typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr; - auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); - auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice()); - if (itemID.get_global_linear_id() < range) { - device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id())); - } - }); - }); - dev.m_queue.throw_asynchronous(); - } - - evaluator.cleanup(); -} -} // namespace TensorSycl -} // namespace Eigen - -#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h deleted file mode 100644 index 83915f3..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h +++ /dev/null @@ -1,237 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/***************************************************************** - * TensroSyclTuple.h - * - * \brief: - * Minimal implementation of std::tuple that can be used inside a SYCL kernel. - * -*****************************************************************/ - -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP -namespace utility { -namespace tuple { -/// \struct StaticIf -/// \brief The StaticIf struct is used to statically choose the type based on the -/// condition. -template <bool, typename T = void> struct StaticIf; -/// \brief specialisation of the \ref StaticIf when the condition is true -template <typename T> -struct StaticIf<true, T> { - typedef T type; -}; - -/// \struct Tuple -/// \brief is a fixed-size collection of heterogeneous values -/// \tparam Ts... - the types of the elements that the tuple stores. -/// Empty list is supported. -template <class... Ts> -struct Tuple {}; - -/// \brief specialisation of the \ref Tuple class when the tuple has at least -/// one element. -/// \tparam T : the type of the first element in the tuple. -/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty. -template <class T, class... Ts> -struct Tuple<T, Ts...> { - Tuple(T t, Ts... ts) : head(t), tail(ts...) {} - T head; - Tuple<Ts...> tail; -}; - -///\ struct ElemTypeHolder -/// \brief ElemTypeHolder class is used to specify the types of the -/// elements inside the tuple -/// \tparam size_t the number of elements inside the tuple -/// \tparam class the tuple class -template <size_t, class> -struct ElemTypeHolder; - -/// \brief specialisation of the \ref ElemTypeHolder class when the number of -/// elements inside the tuple is 1 -template <class T, class... Ts> -struct ElemTypeHolder<0, Tuple<T, Ts...> > { - typedef T type; -}; - -/// \brief specialisation of the \ref ElemTypeHolder class when the number of -/// elements inside the tuple is bigger than 1. It recursively calls itself to -/// detect the type of each element in the tuple -/// \tparam T : the type of the first element in the tuple. -/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty. -/// \tparam K is the Kth element in the tuple -template <size_t k, class T, class... Ts> -struct ElemTypeHolder<k, Tuple<T, Ts...> > { - typedef typename ElemTypeHolder<k - 1, Tuple<Ts...> >::type type; -}; - -/// get -/// \brief Extracts the first element from the tuple. -/// K=0 represents the first element of the tuple. The tuple cannot be empty. -/// \tparam Ts... are the type of the elements in the tuple. -/// \param t is the tuple whose contents to extract -/// \return typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type - -#define TERMINATE_CONDS_TUPLE_GET(CVQual) \ -template <size_t k, class... Ts> \ -typename StaticIf<k == 0, CVQual typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type \ -get(CVQual Tuple<Ts...> &t) { \ - static_assert(sizeof...(Ts)!=0, "The requseted value is bigger than the size of the tuple"); \ - return t.head; \ -} - -TERMINATE_CONDS_TUPLE_GET(const) -TERMINATE_CONDS_TUPLE_GET() -#undef TERMINATE_CONDS_TUPLE_GET -/// get -/// \brief Extracts the Kth element from the tuple. -///\tparam K is an integer value in [0,sizeof...(Types)). -/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple -/// \tparam Ts... are the type of the elements in the tuple. -/// \param t is the tuple whose contents to extract -/// \return typename ElemTypeHolder<K, Tuple<Ts...> >::type &>::type -#define RECURSIVE_TUPLE_GET(CVQual) \ -template <size_t k, class T, class... Ts> \ -typename StaticIf<k != 0, CVQual typename ElemTypeHolder<k, Tuple<T, Ts...> >::type &>::type \ -get(CVQual Tuple<T, Ts...> &t) { \ - return utility::tuple::get<k - 1>(t.tail); \ -} -RECURSIVE_TUPLE_GET(const) -RECURSIVE_TUPLE_GET() -#undef RECURSIVE_TUPLE_GET - -/// make_tuple -/// \brief Creates a tuple object, deducing the target type from the types of -/// arguments. -/// \tparam Args the type of the arguments to construct the tuple from -/// \param args zero or more arguments to construct the tuple from -/// \return Tuple<Args...> -template <typename... Args> -Tuple<Args...> make_tuple(Args... args) { - return Tuple<Args...>(args...); -} - -/// size -/// \brief Provides access to the number of elements in a tuple as a -/// compile-time constant expression. -/// \tparam Args the type of the arguments to construct the tuple from -/// \return size_t -template <typename... Args> -static constexpr size_t size(Tuple<Args...> &) { - return sizeof...(Args); -} - -/// \struct IndexList -/// \brief Creates a list of index from the elements in the tuple -/// \tparam Is... a list of index from [0 to sizeof...(tuple elements)) -template <size_t... Is> -struct IndexList {}; - -/// \struct RangeBuilder -/// \brief Collects internal details for generating index ranges [MIN, MAX) -/// Declare primary template for index range builder -/// \tparam MIN is the starting index in the tuple -/// \tparam N represents sizeof..(elemens)- sizeof...(Is) -/// \tparam Is... are the list of generated index so far -template <size_t MIN, size_t N, size_t... Is> -struct RangeBuilder; - -// FIXME Doxygen has problems with recursive inheritance -#ifndef EIGEN_PARSED_BY_DOXYGEN -/// \brief base Step: Specialisation of the \ref RangeBuilder when the -/// MIN==MAX. In this case the Is... is [0 to sizeof...(tuple elements)) -/// \tparam MIN is the starting index of the tuple -/// \tparam Is is [0 to sizeof...(tuple elements)) -template <size_t MIN, size_t... Is> -struct RangeBuilder<MIN, MIN, Is...> { - typedef IndexList<Is...> type; -}; - -/// Induction step: Specialisation of the RangeBuilder class when N!=MIN -/// in this case we are recursively subtracting N by one and adding one -/// index to Is... list until MIN==N -/// \tparam MIN is the starting index in the tuple -/// \tparam N represents sizeof..(elemens)- sizeof...(Is) -/// \tparam Is... are the list of generated index so far -template <size_t MIN, size_t N, size_t... Is> -struct RangeBuilder : public RangeBuilder<MIN, N - 1, N - 1, Is...> {}; -#endif // EIGEN_PARSED_BY_DOXYGEN - -/// \brief IndexRange that returns a [MIN, MAX) index range -/// \tparam MIN is the starting index in the tuple -/// \tparam MAX is the size of the tuple -template <size_t MIN, size_t MAX> -struct IndexRange: RangeBuilder<MIN, MAX>::type {}; - -/// append_base -/// \brief unpacking the elements of the input tuple t and creating a new tuple -/// by adding element a at the end of it. -///\tparam Args... the type of the elements inside the tuple t -/// \tparam T the type of the new element going to be added at the end of tuple -/// \tparam I... is the list of index from [0 to sizeof...(t)) -/// \param t the tuple on which we want to append a. -/// \param a the new elements going to be added to the tuple -/// \return Tuple<Args..., T> -template <typename... Args, typename T, size_t... I> -Tuple<Args..., T> append_base(Tuple<Args...> t, T a,IndexList<I...>) { - return utility::tuple::make_tuple(get<I>(t)..., a); -} - -/// append -/// \brief the deduction function for \ref append_base that automatically -/// generate the \ref IndexRange -///\tparam Args... the type of the elements inside the tuple t -/// \tparam T the type of the new element going to be added at the end of tuple -/// \param t the tuple on which we want to append a. -/// \param a the new elements going to be added to the tuple -/// \return Tuple<Args..., T> -template <typename... Args, typename T> -Tuple<Args..., T> append(Tuple<Args...> t, T a) { - return utility::tuple::append_base(t, a, IndexRange<0, sizeof...(Args)>()); -} - -/// append_base -/// \brief This is a specialisation of \ref append_base when we want to -/// concatenate -/// tuple t2 at the end of the tuple t1. Here we unpack both tuples, generate the -/// IndexRange for each of them and create an output tuple T that contains both -/// elements of t1 and t2. -///\tparam Args1... the type of the elements inside the tuple t1 -///\tparam Args2... the type of the elements inside the tuple t2 -/// \tparam I1... is the list of index from [0 to sizeof...(t1)) -/// \tparam I2... is the list of index from [0 to sizeof...(t2)) -/// \param t1 is the tuple on which we want to append t2. -/// \param t2 is the tuple that is going to be added on t1. -/// \return Tuple<Args1..., Args2...> -template <typename... Args1, typename... Args2, size_t... I1, size_t... I2> -Tuple<Args1..., Args2...> append_base(Tuple<Args1...> t1, Tuple<Args2...> t2, IndexList<I1...>, IndexList<I2...>) { - return utility::tuple::make_tuple(get<I1>(t1)...,get<I2>(t2)...); -} - -/// append -/// \brief deduction function for \ref append_base when we are appending tuple -/// t1 by tuple t2. In this case the \ref IndexRange for both tuple are -/// automatically generated. -///\tparam Args1... the type of the elements inside the tuple t1 -///\tparam Args2... the type of the elements inside the tuple t2 -/// \param t1 is the tuple on which we want to append t2. -/// \param t2 is the tuple that is going to be added on t1. -/// \return Tuple<Args1..., Args2...> -template <typename... Args1, typename... Args2> -Tuple<Args1..., Args2...> append(Tuple<Args1...> t1,Tuple<Args2...> t2) { - return utility::tuple::append_base(t1, t2, IndexRange<0, sizeof...(Args1)>(), IndexRange<0, sizeof...(Args2)>()); -} -} // tuple -} // utility -#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h deleted file mode 100644 index ffcf8b0..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ /dev/null @@ -1,272 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H -#define EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H - -namespace Eigen { -namespace internal { - - -template<typename Scalar, int Options> -class compute_tensor_flags -{ - enum { - is_dynamic_size_storage = 1, - - is_aligned = - ( - ((Options&DontAlign)==0) && ( -#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 - (!is_dynamic_size_storage) -#else - 0 -#endif - | -#if EIGEN_MAX_ALIGN_BYTES>0 - is_dynamic_size_storage -#else - 0 -#endif - ) - ), - packet_access_bit = packet_traits<Scalar>::Vectorizable && is_aligned ? PacketAccessBit : 0 - }; - - public: - enum { ret = packet_access_bit }; -}; - - -template<typename Scalar_, int NumIndices_, int Options_, typename IndexType_> -struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > -{ - typedef Scalar_ Scalar; - typedef Dense StorageKind; - typedef IndexType_ Index; - static const int NumDimensions = NumIndices_; - static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; - enum { - Options = Options_, - Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit) - }; - template <typename T> struct MakePointer { - typedef T* Type; - }; -}; - - -template<typename Scalar_, typename Dimensions, int Options_, typename IndexType_> -struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> > -{ - typedef Scalar_ Scalar; - typedef Dense StorageKind; - typedef IndexType_ Index; - static const int NumDimensions = array_size<Dimensions>::value; - static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; - enum { - Options = Options_, - Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit) - }; - template <typename T> struct MakePointer { - typedef T* Type; - }; -}; - - -template<typename PlainObjectType, int Options_, template <class> class MakePointer_> -struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> > - : public traits<PlainObjectType> -{ - typedef traits<PlainObjectType> BaseTraits; - typedef typename BaseTraits::Scalar Scalar; - typedef typename BaseTraits::StorageKind StorageKind; - typedef typename BaseTraits::Index Index; - static const int NumDimensions = BaseTraits::NumDimensions; - static const int Layout = BaseTraits::Layout; - enum { - Options = Options_, - Flags = BaseTraits::Flags - }; - template <class T> struct MakePointer { - // Intermediate typedef to workaround MSVC issue. - typedef MakePointer_<T> MakePointerT; - typedef typename MakePointerT::Type Type; - }; -}; - -template<typename PlainObjectType> -struct traits<TensorRef<PlainObjectType> > - : public traits<PlainObjectType> -{ - typedef traits<PlainObjectType> BaseTraits; - typedef typename BaseTraits::Scalar Scalar; - typedef typename BaseTraits::StorageKind StorageKind; - typedef typename BaseTraits::Index Index; - static const int NumDimensions = BaseTraits::NumDimensions; - static const int Layout = BaseTraits::Layout; - enum { - Options = BaseTraits::Options, - Flags = BaseTraits::Flags - }; -}; - - -template<typename _Scalar, int NumIndices_, int Options, typename IndexType_> -struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense> -{ - typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; -}; - -template<typename _Scalar, int NumIndices_, int Options, typename IndexType_> -struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense> -{ - typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; -}; - -template<typename Scalar_, typename Dimensions, int Options, typename IndexType_> -struct eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense> -{ - typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; -}; - -template<typename Scalar_, typename Dimensions, int Options, typename IndexType_> -struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense> -{ - typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; -}; - -template<typename PlainObjectType, int Options, template <class> class MakePointer> -struct eval<TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense> -{ - typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; -}; - -template<typename PlainObjectType, int Options, template <class> class MakePointer> -struct eval<const TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense> -{ - typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; -}; - -template<typename PlainObjectType> -struct eval<TensorRef<PlainObjectType>, Eigen::Dense> -{ - typedef const TensorRef<PlainObjectType>& type; -}; - -template<typename PlainObjectType> -struct eval<const TensorRef<PlainObjectType>, Eigen::Dense> -{ - typedef const TensorRef<PlainObjectType>& type; -}; - -// TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector. -template<typename T, int n=1, typename PlainObject = void> struct nested -{ - typedef typename ref_selector<T>::type type; -}; - -template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_> -struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > -{ - typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type; -}; - -template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_> -struct nested<const Tensor<Scalar_, NumIndices_, Options_, IndexType_> > -{ - typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type; -}; - -template <typename Scalar_, typename Dimensions, int Options, typename IndexType_> -struct nested<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> > -{ - typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; -}; - -template <typename Scalar_, typename Dimensions, int Options, typename IndexType_> -struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> > -{ - typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; -}; - - -template <typename PlainObjectType, int Options, template <class> class MakePointer> -struct nested<TensorMap<PlainObjectType, Options, MakePointer> > -{ - typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; -}; - -template <typename PlainObjectType, int Options, template <class> class MakePointer> -struct nested<const TensorMap<PlainObjectType, Options, MakePointer> > -{ - typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; -}; - -template <typename PlainObjectType> -struct nested<TensorRef<PlainObjectType> > -{ - typedef const TensorRef<PlainObjectType>& type; -}; - -template <typename PlainObjectType> -struct nested<const TensorRef<PlainObjectType> > -{ - typedef const TensorRef<PlainObjectType>& type; -}; - -} // end namespace internal - -// Convolutional layers take in an input tensor of shape (D, R, C, B), or (D, C, -// R, B), and convolve it with a set of filters, which can also be presented as -// a tensor (D, K, K, M), where M is the number of filters, K is the filter -// size, and each 3-dimensional tensor of size (D, K, K) is a filter. For -// simplicity we assume that we always use square filters (which is usually the -// case in images), hence the two Ks in the tensor dimension. It also takes in -// a few additional parameters: -// Stride (S): The convolution stride is the offset between locations where we -// apply the filters. A larger stride means that the output will be -// spatially smaller. -// Padding (P): The padding we apply to the input tensor along the R and C -// dimensions. This is usually used to make sure that the spatial -// dimensions of the output matches our intention. -// -// Two types of padding are often used: -// SAME: The pad value is computed so that the output will have size -// R/S and C/S. -// VALID: no padding is carried out. -// When we do padding, the padded values at the padded locations are usually -// zero. -// -// The output dimensions for convolution, when given all the parameters above, -// are as follows: -// When Padding = SAME: the output size is (B, R', C', M), where -// R' = ceil(float(R) / float(S)) -// C' = ceil(float(C) / float(S)) -// where ceil is the ceiling function. The input tensor is padded with 0 as -// needed. The number of padded rows and columns are computed as: -// Pr = ((R' - 1) * S + K - R) / 2 -// Pc = ((C' - 1) * S + K - C) / 2 -// when the stride is 1, we have the simplified case R'=R, C'=C, Pr=Pc=(K-1)/2. -// This is where SAME comes from - the output has the same size as the input has. -// When Padding = VALID: the output size is computed as -// R' = ceil(float(R - K + 1) / float(S)) -// C' = ceil(float(C - K + 1) / float(S)) -// and the number of padded rows and columns are computed in the same way as in -// the SAME case. -// When the stride is 1, we have the simplified case R'=R-K+1, C'=C-K+1, Pr=0, -// Pc=0. -typedef enum { - PADDING_VALID = 1, - PADDING_SAME = 2 -} PaddingType; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h deleted file mode 100644 index 3523e7c..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +++ /dev/null @@ -1,248 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H -#define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H - -namespace Eigen { -namespace internal { - - -template <uint64_t n> -struct static_val { - static const uint64_t value = n; - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { } - - template <typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) { - eigen_assert(v == n); - } -}; - - -template <typename HIGH = uint64_t, typename LOW = uint64_t> -struct TensorUInt128 -{ - HIGH high; - LOW low; - - template<typename OTHER_HIGH, typename OTHER_LOW> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128(const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) : high(other.high), low(other.low) { - EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE); - } - - template<typename OTHER_HIGH, typename OTHER_LOW> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128& operator = (const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) { - EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE); - high = other.high; - low = other.low; - return *this; - } - - template<typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - explicit TensorUInt128(const T& x) : high(0), low(x) { - eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest())); - eigen_assert(x >= 0); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - TensorUInt128(HIGH y, LOW x) : high(y), low(x) { } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const { - return low; - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const { - return low; - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const { - return high; - } -}; - - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - return (lhs.high == rhs.high) & (lhs.low == rhs.low); -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - return (lhs.high != rhs.high) | (lhs.low != rhs.low); -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - if (lhs.high != rhs.high) { - return lhs.high > rhs.high; - } - return lhs.low >= rhs.low; -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - if (lhs.high != rhs.high) { - return lhs.high < rhs.high; - } - return lhs.low < rhs.low; -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low); - if (result.low < rhs.low) { - result.high += 1; - } - return result; -} - -template <typename HL, typename LL, typename HR, typename LR> -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low); - if (result.low > lhs.low) { - result.high -= 1; - } - return result; -} - - -template <typename HL, typename LL, typename HR, typename LR> -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - // Split each 128-bit integer into 4 32-bit integers, and then do the - // multiplications by hand as follow: - // lhs a b c d - // rhs e f g h - // ----------- - // ah bh ch dh - // bg cg dg - // cf df - // de - // The result is stored in 2 64bit integers, high and low. - - const uint64_t LOW = 0x00000000FFFFFFFFLL; - const uint64_t HIGH = 0xFFFFFFFF00000000LL; - - uint64_t d = lhs.low & LOW; - uint64_t c = (lhs.low & HIGH) >> 32LL; - uint64_t b = lhs.high & LOW; - uint64_t a = (lhs.high & HIGH) >> 32LL; - - uint64_t h = rhs.low & LOW; - uint64_t g = (rhs.low & HIGH) >> 32LL; - uint64_t f = rhs.high & LOW; - uint64_t e = (rhs.high & HIGH) >> 32LL; - - // Compute the low 32 bits of low - uint64_t acc = d * h; - uint64_t low = acc & LOW; - // Compute the high 32 bits of low. Add a carry every time we wrap around - acc >>= 32LL; - uint64_t carry = 0; - uint64_t acc2 = acc + c * h; - if (acc2 < acc) { - carry++; - } - acc = acc2 + d * g; - if (acc < acc2) { - carry++; - } - low |= (acc << 32LL); - - // Carry forward the high bits of acc to initiate the computation of the - // low 32 bits of high - acc2 = (acc >> 32LL) | (carry << 32LL); - carry = 0; - - acc = acc2 + b * h; - if (acc < acc2) { - carry++; - } - acc2 = acc + c * g; - if (acc2 < acc) { - carry++; - } - acc = acc2 + d * f; - if (acc < acc2) { - carry++; - } - uint64_t high = acc & LOW; - - // Start to compute the high 32 bits of high. - acc2 = (acc >> 32LL) | (carry << 32LL); - - acc = acc2 + a * h; - acc2 = acc + b * g; - acc = acc2 + c * f; - acc2 = acc + d * e; - high |= (acc2 << 32LL); - - return TensorUInt128<uint64_t, uint64_t>(high, low); -} - -template <typename HL, typename LL, typename HR, typename LR> -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs) -{ - if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) { - return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low); - } else if (lhs < rhs) { - return TensorUInt128<uint64_t, uint64_t>(0); - } else { - // calculate the biggest power of 2 times rhs that's less than or equal to lhs - TensorUInt128<uint64_t, uint64_t> power2(1); - TensorUInt128<uint64_t, uint64_t> d(rhs); - TensorUInt128<uint64_t, uint64_t> tmp(lhs - d); - while (lhs >= d) { - tmp = tmp - d; - d = d + d; - power2 = power2 + power2; - } - - tmp = TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low); - TensorUInt128<uint64_t, uint64_t> result(0); - while (power2 != TensorUInt128<static_val<0>, static_val<0> >(0)) { - if (tmp >= d) { - tmp = tmp - d; - result = result + power2; - } - // Shift right - power2 = TensorUInt128<uint64_t, uint64_t>(power2.high >> 1, (power2.low >> 1) | (power2.high << 63)); - d = TensorUInt128<uint64_t, uint64_t>(d.high >> 1, (d.low >> 1) | (d.high << 63)); - } - - return result; - } -} - - -} // namespace internal -} // namespace Eigen - - -#endif // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h deleted file mode 100644 index 0ca2cac..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +++ /dev/null @@ -1,608 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H -#define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H - -namespace Eigen { - -/** \class TensorVolumePatch - * \ingroup CXX11_Tensor_Module - * - * \brief Patch extraction specialized for processing of volumetric data. - * This assumes that the input has a least 4 dimensions ordered as follows: - * - channels - * - planes - * - rows - * - columns - * - (optional) additional dimensions such as time or batch size. - * Calling the volume patch code with patch_planes, patch_rows, and patch_cols - * is equivalent to calling the regular patch extraction code with parameters - * d, patch_planes, patch_rows, patch_cols, and 1 for all the additional - * dimensions. - */ -namespace internal { -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> -struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > : public traits<XprType> -{ - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef traits<XprType> XprTraits; - typedef typename XprTraits::StorageKind StorageKind; - typedef typename XprTraits::Index Index; - typedef typename XprType::Nested Nested; - typedef typename remove_reference<Nested>::type _Nested; - static const int NumDimensions = XprTraits::NumDimensions + 1; - static const int Layout = XprTraits::Layout; -}; - -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> -struct eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, Eigen::Dense> -{ - typedef const TensorVolumePatchOp<Planes, Rows, Cols, XprType>& type; -}; - -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> -struct nested<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, 1, typename eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType> >::type> -{ - typedef TensorVolumePatchOp<Planes, Rows, Cols, XprType> type; -}; - -} // end namespace internal - -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> -class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, ReadOnlyAccessors> -{ - public: - typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Scalar Scalar; - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename Eigen::internal::nested<TensorVolumePatchOp>::type Nested; - typedef typename Eigen::internal::traits<TensorVolumePatchOp>::StorageKind StorageKind; - typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, - DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, - DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, - DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, - PaddingType padding_type, Scalar padding_value) - : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols), - m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides), - m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), - m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), - m_padding_explicit(false), m_padding_top_z(0), m_padding_bottom_z(0), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), - m_padding_type(padding_type), m_padding_value(padding_value) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, - DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, - DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, - DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, - DenseIndex padding_top_z, DenseIndex padding_bottom_z, - DenseIndex padding_top, DenseIndex padding_bottom, - DenseIndex padding_left, DenseIndex padding_right, - Scalar padding_value) - : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols), - m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides), - m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), - m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), - m_padding_explicit(true), m_padding_top_z(padding_top_z), m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom), - m_padding_left(padding_left), m_padding_right(padding_right), - m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} - - EIGEN_DEVICE_FUNC - DenseIndex patch_planes() const { return m_patch_planes; } - EIGEN_DEVICE_FUNC - DenseIndex patch_rows() const { return m_patch_rows; } - EIGEN_DEVICE_FUNC - DenseIndex patch_cols() const { return m_patch_cols; } - EIGEN_DEVICE_FUNC - DenseIndex plane_strides() const { return m_plane_strides; } - EIGEN_DEVICE_FUNC - DenseIndex row_strides() const { return m_row_strides; } - EIGEN_DEVICE_FUNC - DenseIndex col_strides() const { return m_col_strides; } - EIGEN_DEVICE_FUNC - DenseIndex in_plane_strides() const { return m_in_plane_strides; } - EIGEN_DEVICE_FUNC - DenseIndex in_row_strides() const { return m_in_row_strides; } - EIGEN_DEVICE_FUNC - DenseIndex in_col_strides() const { return m_in_col_strides; } - EIGEN_DEVICE_FUNC - DenseIndex plane_inflate_strides() const { return m_plane_inflate_strides; } - EIGEN_DEVICE_FUNC - DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } - EIGEN_DEVICE_FUNC - DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } - EIGEN_DEVICE_FUNC - bool padding_explicit() const { return m_padding_explicit; } - EIGEN_DEVICE_FUNC - DenseIndex padding_top_z() const { return m_padding_top_z; } - EIGEN_DEVICE_FUNC - DenseIndex padding_bottom_z() const { return m_padding_bottom_z; } - EIGEN_DEVICE_FUNC - DenseIndex padding_top() const { return m_padding_top; } - EIGEN_DEVICE_FUNC - DenseIndex padding_bottom() const { return m_padding_bottom; } - EIGEN_DEVICE_FUNC - DenseIndex padding_left() const { return m_padding_left; } - EIGEN_DEVICE_FUNC - DenseIndex padding_right() const { return m_padding_right; } - EIGEN_DEVICE_FUNC - PaddingType padding_type() const { return m_padding_type; } - EIGEN_DEVICE_FUNC - Scalar padding_value() const { return m_padding_value; } - - EIGEN_DEVICE_FUNC - const typename internal::remove_all<typename XprType::Nested>::type& - expression() const { return m_xpr; } - - protected: - typename XprType::Nested m_xpr; - const DenseIndex m_patch_planes; - const DenseIndex m_patch_rows; - const DenseIndex m_patch_cols; - const DenseIndex m_plane_strides; - const DenseIndex m_row_strides; - const DenseIndex m_col_strides; - const DenseIndex m_in_plane_strides; - const DenseIndex m_in_row_strides; - const DenseIndex m_in_col_strides; - const DenseIndex m_plane_inflate_strides; - const DenseIndex m_row_inflate_strides; - const DenseIndex m_col_inflate_strides; - const bool m_padding_explicit; - const DenseIndex m_padding_top_z; - const DenseIndex m_padding_bottom_z; - const DenseIndex m_padding_top; - const DenseIndex m_padding_bottom; - const DenseIndex m_padding_left; - const DenseIndex m_padding_right; - const PaddingType m_padding_type; - const Scalar m_padding_value; -}; - - -// Eval as rvalue -template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device> -struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, Device> -{ - typedef TensorVolumePatchOp<Planes, Rows, Cols, ArgType> XprType; - typedef typename XprType::Index Index; - static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; - static const int NumDims = NumInputDims + 1; - typedef DSizes<Index, NumDims> Dimensions; - typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; - - enum { - IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, - BlockAccess = false, - Layout = TensorEvaluator<ArgType, Device>::Layout, - CoordAccess = false, - RawAccess = false - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) - { - EIGEN_STATIC_ASSERT((NumDims >= 5), YOU_MADE_A_PROGRAMMING_MISTAKE); - - m_paddingValue = op.padding_value(); - - const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); - - // Cache a few variables. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_inputDepth = input_dims[0]; - m_inputPlanes = input_dims[1]; - m_inputRows = input_dims[2]; - m_inputCols = input_dims[3]; - } else { - m_inputDepth = input_dims[NumInputDims-1]; - m_inputPlanes = input_dims[NumInputDims-2]; - m_inputRows = input_dims[NumInputDims-3]; - m_inputCols = input_dims[NumInputDims-4]; - } - - m_plane_strides = op.plane_strides(); - m_row_strides = op.row_strides(); - m_col_strides = op.col_strides(); - - // Input strides and effective input/patch size - m_in_plane_strides = op.in_plane_strides(); - m_in_row_strides = op.in_row_strides(); - m_in_col_strides = op.in_col_strides(); - m_plane_inflate_strides = op.plane_inflate_strides(); - m_row_inflate_strides = op.row_inflate_strides(); - m_col_inflate_strides = op.col_inflate_strides(); - - // The "effective" spatial size after inflating data with zeros. - m_input_planes_eff = (m_inputPlanes - 1) * m_plane_inflate_strides + 1; - m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; - m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; - m_patch_planes_eff = op.patch_planes() + (op.patch_planes() - 1) * (m_in_plane_strides - 1); - m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); - m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); - - if (op.padding_explicit()) { - m_outputPlanes = numext::ceil((m_input_planes_eff + op.padding_top_z() + op.padding_bottom_z() - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides)); - m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); - m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); - m_planePaddingTop = op.padding_top_z(); - m_rowPaddingTop = op.padding_top(); - m_colPaddingLeft = op.padding_left(); - } else { - // Computing padding from the type - switch (op.padding_type()) { - case PADDING_VALID: - m_outputPlanes = numext::ceil((m_input_planes_eff - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides)); - m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); - m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); - m_planePaddingTop = 0; - m_rowPaddingTop = 0; - m_colPaddingLeft = 0; - break; - case PADDING_SAME: { - m_outputPlanes = numext::ceil(m_input_planes_eff / static_cast<float>(m_plane_strides)); - m_outputRows = numext::ceil(m_input_rows_eff / static_cast<float>(m_row_strides)); - m_outputCols = numext::ceil(m_input_cols_eff / static_cast<float>(m_col_strides)); - const Index dz = m_outputPlanes * m_plane_strides + m_patch_planes_eff - 1 - m_input_planes_eff; - const Index dy = m_outputRows * m_row_strides + m_patch_rows_eff - 1 - m_input_rows_eff; - const Index dx = m_outputCols * m_col_strides + m_patch_cols_eff - 1 - m_input_cols_eff; - m_planePaddingTop = dz - dz / 2; - m_rowPaddingTop = dy - dy / 2; - m_colPaddingLeft = dx - dx / 2; - break; - } - default: - eigen_assert(false && "unexpected padding"); - } - } - eigen_assert(m_outputRows > 0); - eigen_assert(m_outputCols > 0); - eigen_assert(m_outputPlanes > 0); - - // Dimensions for result of extraction. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - // ColMajor - // 0: depth - // 1: patch_planes - // 2: patch_rows - // 3: patch_cols - // 4: number of patches - // 5 and beyond: anything else (such as batch). - m_dimensions[0] = input_dims[0]; - m_dimensions[1] = op.patch_planes(); - m_dimensions[2] = op.patch_rows(); - m_dimensions[3] = op.patch_cols(); - m_dimensions[4] = m_outputPlanes * m_outputRows * m_outputCols; - for (int i = 5; i < NumDims; ++i) { - m_dimensions[i] = input_dims[i-1]; - } - } else { - // RowMajor - // NumDims-1: depth - // NumDims-2: patch_planes - // NumDims-3: patch_rows - // NumDims-4: patch_cols - // NumDims-5: number of patches - // NumDims-6 and beyond: anything else (such as batch). - m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; - m_dimensions[NumDims-2] = op.patch_planes(); - m_dimensions[NumDims-3] = op.patch_rows(); - m_dimensions[NumDims-4] = op.patch_cols(); - m_dimensions[NumDims-5] = m_outputPlanes * m_outputRows * m_outputCols; - for (int i = NumDims-6; i >= 0; --i) { - m_dimensions[i] = input_dims[i]; - } - } - - // Strides for the output tensor. - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_rowStride = m_dimensions[1]; - m_colStride = m_dimensions[2] * m_rowStride; - m_patchStride = m_colStride * m_dimensions[3] * m_dimensions[0]; - m_otherStride = m_patchStride * m_dimensions[4]; - } else { - m_rowStride = m_dimensions[NumDims-2]; - m_colStride = m_dimensions[NumDims-3] * m_rowStride; - m_patchStride = m_colStride * m_dimensions[NumDims-4] * m_dimensions[NumDims-1]; - m_otherStride = m_patchStride * m_dimensions[NumDims-5]; - } - - // Strides for navigating through the input tensor. - m_planeInputStride = m_inputDepth; - m_rowInputStride = m_inputDepth * m_inputPlanes; - m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes; - m_otherInputStride = m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes; - - m_outputPlanesRows = m_outputPlanes * m_outputRows; - - // Fast representations of different variables. - m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride); - m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride); - m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride); - m_fastRowStride = internal::TensorIntDivisor<Index>(m_rowStride); - m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides); - m_fastInputColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides); - m_fastInputPlaneStride = internal::TensorIntDivisor<Index>(m_plane_inflate_strides); - m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff); - m_fastOutputPlanes = internal::TensorIntDivisor<Index>(m_outputPlanes); - m_fastOutputPlanesRows = internal::TensorIntDivisor<Index>(m_outputPlanesRows); - - if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { - m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]); - } else { - m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]); - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_impl.cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - // Patch index corresponding to the passed in index. - const Index patchIndex = index / m_fastPatchStride; - - // Spatial offset within the patch. This has to be translated into 3D - // coordinates within the patch. - const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; - - // Batch, etc. - const Index otherIndex = (NumDims == 5) ? 0 : index / m_fastOtherStride; - const Index patch3DIndex = (NumDims == 5) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; - - // Calculate column index in the input original tensor. - const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; - const Index colOffset = patchOffset / m_fastColStride; - const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; - const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); - if (inputCol < 0 || inputCol >= m_input_cols_eff || - ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { - return Scalar(m_paddingValue); - } - - // Calculate row index in the original input tensor. - const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; - const Index rowOffset = (patchOffset - colOffset * m_colStride) / m_fastRowStride; - const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; - const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); - if (inputRow < 0 || inputRow >= m_input_rows_eff || - ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { - return Scalar(m_paddingValue); - } - - // Calculate plane index in the original input tensor. - const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex)); - const Index planeOffset = patchOffset - colOffset * m_colStride - rowOffset * m_rowStride; - const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop; - const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0); - if (inputPlane < 0 || inputPlane >= m_input_planes_eff || - ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) { - return Scalar(m_paddingValue); - } - - const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; - const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; - - const Index inputIndex = depth + - origInputRow * m_rowInputStride + - origInputCol * m_colInputStride + - origInputPlane * m_planeInputStride + - otherIndex * m_otherInputStride; - - return m_impl.coeff(inputIndex); - } - - template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); - - if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 || - m_in_plane_strides != 1 || m_plane_inflate_strides != 1) { - return packetWithPossibleZero(index); - } - - const Index indices[2] = {index, index + PacketSize - 1}; - const Index patchIndex = indices[0] / m_fastPatchStride; - if (patchIndex != indices[1] / m_fastPatchStride) { - return packetWithPossibleZero(index); - } - const Index otherIndex = (NumDims == 5) ? 0 : indices[0] / m_fastOtherStride; - eigen_assert(otherIndex == indices[1] / m_fastOtherStride); - - // Find the offset of the element wrt the location of the first element. - const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, - (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; - - const Index patch3DIndex = (NumDims == 5) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; - eigen_assert(patch3DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); - - const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; - const Index colOffsets[2] = { - patchOffsets[0] / m_fastColStride, - patchOffsets[1] / m_fastColStride}; - - // Calculate col indices in the original input tensor. - const Index inputCols[2] = { - colIndex * m_col_strides + colOffsets[0] - m_colPaddingLeft, - colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; - if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { - return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); - } - - if (inputCols[0] != inputCols[1]) { - return packetWithPossibleZero(index); - } - - const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; - const Index rowOffsets[2] = { - (patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride, - (patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride}; - eigen_assert(rowOffsets[0] <= rowOffsets[1]); - // Calculate col indices in the original input tensor. - const Index inputRows[2] = { - rowIndex * m_row_strides + rowOffsets[0] - m_rowPaddingTop, - rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; - - if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { - return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); - } - - if (inputRows[0] != inputRows[1]) { - return packetWithPossibleZero(index); - } - - const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex)); - const Index planeOffsets[2] = { - patchOffsets[0] - colOffsets[0] * m_colStride - rowOffsets[0] * m_rowStride, - patchOffsets[1] - colOffsets[1] * m_colStride - rowOffsets[1] * m_rowStride}; - eigen_assert(planeOffsets[0] <= planeOffsets[1]); - const Index inputPlanes[2] = { - planeIndex * m_plane_strides + planeOffsets[0] - m_planePaddingTop, - planeIndex * m_plane_strides + planeOffsets[1] - m_planePaddingTop}; - - if (inputPlanes[1] < 0 || inputPlanes[0] >= m_inputPlanes) { - return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); - } - - if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) { - // no padding - const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; - const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; - const Index inputIndex = depth + - inputRows[0] * m_rowInputStride + - inputCols[0] * m_colInputStride + - m_planeInputStride * inputPlanes[0] + - otherIndex * m_otherInputStride; - return m_impl.template packet<Unaligned>(inputIndex); - } - - return packetWithPossibleZero(index); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost - costPerCoeff(bool vectorized) const { - const double compute_cost = - 10 * TensorOpCost::DivCost<Index>() + 21 * TensorOpCost::MulCost<Index>() + - 8 * TensorOpCost::AddCost<Index>(); - return TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } - - Index planePaddingTop() const { return m_planePaddingTop; } - Index rowPaddingTop() const { return m_rowPaddingTop; } - Index colPaddingLeft() const { return m_colPaddingLeft; } - Index outputPlanes() const { return m_outputPlanes; } - Index outputRows() const { return m_outputRows; } - Index outputCols() const { return m_outputCols; } - Index userPlaneStride() const { return m_plane_strides; } - Index userRowStride() const { return m_row_strides; } - Index userColStride() const { return m_col_strides; } - Index userInPlaneStride() const { return m_in_plane_strides; } - Index userInRowStride() const { return m_in_row_strides; } - Index userInColStride() const { return m_in_col_strides; } - Index planeInflateStride() const { return m_plane_inflate_strides; } - Index rowInflateStride() const { return m_row_inflate_strides; } - Index colInflateStride() const { return m_col_inflate_strides; } - - protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const - { - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - for (int i = 0; i < PacketSize; ++i) { - values[i] = coeff(index+i); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - - Dimensions m_dimensions; - - // Parameters passed to the costructor. - Index m_plane_strides; - Index m_row_strides; - Index m_col_strides; - - Index m_outputPlanes; - Index m_outputRows; - Index m_outputCols; - - Index m_planePaddingTop; - Index m_rowPaddingTop; - Index m_colPaddingLeft; - - Index m_in_plane_strides; - Index m_in_row_strides; - Index m_in_col_strides; - - Index m_plane_inflate_strides; - Index m_row_inflate_strides; - Index m_col_inflate_strides; - - // Cached input size. - Index m_inputDepth; - Index m_inputPlanes; - Index m_inputRows; - Index m_inputCols; - - // Other cached variables. - Index m_outputPlanesRows; - - // Effective input/patch post-inflation size. - Index m_input_planes_eff; - Index m_input_rows_eff; - Index m_input_cols_eff; - Index m_patch_planes_eff; - Index m_patch_rows_eff; - Index m_patch_cols_eff; - - // Strides for the output tensor. - Index m_otherStride; - Index m_patchStride; - Index m_rowStride; - Index m_colStride; - - // Strides for the input tensor. - Index m_planeInputStride; - Index m_rowInputStride; - Index m_colInputStride; - Index m_otherInputStride; - - internal::TensorIntDivisor<Index> m_fastOtherStride; - internal::TensorIntDivisor<Index> m_fastPatchStride; - internal::TensorIntDivisor<Index> m_fastColStride; - internal::TensorIntDivisor<Index> m_fastRowStride; - internal::TensorIntDivisor<Index> m_fastInputPlaneStride; - internal::TensorIntDivisor<Index> m_fastInputRowStride; - internal::TensorIntDivisor<Index> m_fastInputColStride; - internal::TensorIntDivisor<Index> m_fastInputColsEff; - internal::TensorIntDivisor<Index> m_fastOutputPlanesRows; - internal::TensorIntDivisor<Index> m_fastOutputPlanes; - internal::TensorIntDivisor<Index> m_fastOutputDepth; - - Scalar m_paddingValue; - - TensorEvaluator<ArgType, Device> m_impl; -}; - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H diff --git a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h b/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h deleted file mode 100644 index bc4f202..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h +++ /dev/null @@ -1,293 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H -#define EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H - -namespace Eigen { - -class DynamicSGroup -{ - public: - inline explicit DynamicSGroup() : m_numIndices(1), m_elements(), m_generators(), m_globalFlags(0) { m_elements.push_back(ge(Generator(0, 0, 0))); } - inline DynamicSGroup(const DynamicSGroup& o) : m_numIndices(o.m_numIndices), m_elements(o.m_elements), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { } - inline DynamicSGroup(DynamicSGroup&& o) : m_numIndices(o.m_numIndices), m_elements(), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { std::swap(m_elements, o.m_elements); } - inline DynamicSGroup& operator=(const DynamicSGroup& o) { m_numIndices = o.m_numIndices; m_elements = o.m_elements; m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; } - inline DynamicSGroup& operator=(DynamicSGroup&& o) { m_numIndices = o.m_numIndices; std::swap(m_elements, o.m_elements); m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; } - - void add(int one, int two, int flags = 0); - - template<typename Gen_> - inline void add(Gen_) { add(Gen_::One, Gen_::Two, Gen_::Flags); } - inline void addSymmetry(int one, int two) { add(one, two, 0); } - inline void addAntiSymmetry(int one, int two) { add(one, two, NegationFlag); } - inline void addHermiticity(int one, int two) { add(one, two, ConjugationFlag); } - inline void addAntiHermiticity(int one, int two) { add(one, two, NegationFlag | ConjugationFlag); } - - template<typename Op, typename RV, typename Index, std::size_t N, typename... Args> - inline RV apply(const std::array<Index, N>& idx, RV initial, Args&&... args) const - { - eigen_assert(N >= m_numIndices && "Can only apply symmetry group to objects that have at least the required amount of indices."); - for (std::size_t i = 0; i < size(); i++) - initial = Op::run(h_permute(i, idx, typename internal::gen_numeric_list<int, N>::type()), m_elements[i].flags, initial, std::forward<Args>(args)...); - return initial; - } - - template<typename Op, typename RV, typename Index, typename... Args> - inline RV apply(const std::vector<Index>& idx, RV initial, Args&&... args) const - { - eigen_assert(idx.size() >= m_numIndices && "Can only apply symmetry group to objects that have at least the required amount of indices."); - for (std::size_t i = 0; i < size(); i++) - initial = Op::run(h_permute(i, idx), m_elements[i].flags, initial, std::forward<Args>(args)...); - return initial; - } - - inline int globalFlags() const { return m_globalFlags; } - inline std::size_t size() const { return m_elements.size(); } - - template<typename Tensor_, typename... IndexTypes> - inline internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const - { - static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); - return operator()(tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices>{{firstIndex, otherIndices...}}); - } - - template<typename Tensor_> - inline internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup> operator()(Tensor_& tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices> const& indices) const - { - return internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup>(tensor, *this, indices); - } - private: - struct GroupElement { - std::vector<int> representation; - int flags; - bool isId() const - { - for (std::size_t i = 0; i < representation.size(); i++) - if (i != (size_t)representation[i]) - return false; - return true; - } - }; - struct Generator { - int one; - int two; - int flags; - constexpr inline Generator(int one_, int two_, int flags_) : one(one_), two(two_), flags(flags_) {} - }; - - std::size_t m_numIndices; - std::vector<GroupElement> m_elements; - std::vector<Generator> m_generators; - int m_globalFlags; - - template<typename Index, std::size_t N, int... n> - inline std::array<Index, N> h_permute(std::size_t which, const std::array<Index, N>& idx, internal::numeric_list<int, n...>) const - { - return std::array<Index, N>{{ idx[n >= m_numIndices ? n : m_elements[which].representation[n]]... }}; - } - - template<typename Index> - inline std::vector<Index> h_permute(std::size_t which, std::vector<Index> idx) const - { - std::vector<Index> result; - result.reserve(idx.size()); - for (auto k : m_elements[which].representation) - result.push_back(idx[k]); - for (std::size_t i = m_numIndices; i < idx.size(); i++) - result.push_back(idx[i]); - return result; - } - - inline GroupElement ge(Generator const& g) const - { - GroupElement result; - result.representation.reserve(m_numIndices); - result.flags = g.flags; - for (std::size_t k = 0; k < m_numIndices; k++) { - if (k == (std::size_t)g.one) - result.representation.push_back(g.two); - else if (k == (std::size_t)g.two) - result.representation.push_back(g.one); - else - result.representation.push_back(int(k)); - } - return result; - } - - GroupElement mul(GroupElement, GroupElement) const; - inline GroupElement mul(Generator g1, GroupElement g2) const - { - return mul(ge(g1), g2); - } - - inline GroupElement mul(GroupElement g1, Generator g2) const - { - return mul(g1, ge(g2)); - } - - inline GroupElement mul(Generator g1, Generator g2) const - { - return mul(ge(g1), ge(g2)); - } - - inline int findElement(GroupElement e) const - { - for (auto ee : m_elements) { - if (ee.representation == e.representation) - return ee.flags ^ e.flags; - } - return -1; - } - - void updateGlobalFlags(int flagDiffOfSameGenerator); -}; - -// dynamic symmetry group that auto-adds the template parameters in the constructor -template<typename... Gen> -class DynamicSGroupFromTemplateArgs : public DynamicSGroup -{ - public: - inline DynamicSGroupFromTemplateArgs() : DynamicSGroup() - { - add_all(internal::type_list<Gen...>()); - } - inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs const& other) : DynamicSGroup(other) { } - inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs&& other) : DynamicSGroup(other) { } - inline DynamicSGroupFromTemplateArgs<Gen...>& operator=(const DynamicSGroupFromTemplateArgs<Gen...>& o) { DynamicSGroup::operator=(o); return *this; } - inline DynamicSGroupFromTemplateArgs<Gen...>& operator=(DynamicSGroupFromTemplateArgs<Gen...>&& o) { DynamicSGroup::operator=(o); return *this; } - - private: - template<typename Gen1, typename... GenNext> - inline void add_all(internal::type_list<Gen1, GenNext...>) - { - add(Gen1()); - add_all(internal::type_list<GenNext...>()); - } - - inline void add_all(internal::type_list<>) - { - } -}; - -inline DynamicSGroup::GroupElement DynamicSGroup::mul(GroupElement g1, GroupElement g2) const -{ - eigen_internal_assert(g1.representation.size() == m_numIndices); - eigen_internal_assert(g2.representation.size() == m_numIndices); - - GroupElement result; - result.representation.reserve(m_numIndices); - for (std::size_t i = 0; i < m_numIndices; i++) { - int v = g2.representation[g1.representation[i]]; - eigen_assert(v >= 0); - result.representation.push_back(v); - } - result.flags = g1.flags ^ g2.flags; - return result; -} - -inline void DynamicSGroup::add(int one, int two, int flags) -{ - eigen_assert(one >= 0); - eigen_assert(two >= 0); - eigen_assert(one != two); - - if ((std::size_t)one >= m_numIndices || (std::size_t)two >= m_numIndices) { - std::size_t newNumIndices = (one > two) ? one : two + 1; - for (auto& gelem : m_elements) { - gelem.representation.reserve(newNumIndices); - for (std::size_t i = m_numIndices; i < newNumIndices; i++) - gelem.representation.push_back(i); - } - m_numIndices = newNumIndices; - } - - Generator g{one, two, flags}; - GroupElement e = ge(g); - - /* special case for first generator */ - if (m_elements.size() == 1) { - while (!e.isId()) { - m_elements.push_back(e); - e = mul(e, g); - } - - if (e.flags > 0) - updateGlobalFlags(e.flags); - - // only add in case we didn't have identity - if (m_elements.size() > 1) - m_generators.push_back(g); - return; - } - - int p = findElement(e); - if (p >= 0) { - updateGlobalFlags(p); - return; - } - - std::size_t coset_order = m_elements.size(); - m_elements.push_back(e); - for (std::size_t i = 1; i < coset_order; i++) - m_elements.push_back(mul(m_elements[i], e)); - m_generators.push_back(g); - - std::size_t coset_rep = coset_order; - do { - for (auto g : m_generators) { - e = mul(m_elements[coset_rep], g); - p = findElement(e); - if (p < 0) { - // element not yet in group - m_elements.push_back(e); - for (std::size_t i = 1; i < coset_order; i++) - m_elements.push_back(mul(m_elements[i], e)); - } else if (p > 0) { - updateGlobalFlags(p); - } - } - coset_rep += coset_order; - } while (coset_rep < m_elements.size()); -} - -inline void DynamicSGroup::updateGlobalFlags(int flagDiffOfSameGenerator) -{ - switch (flagDiffOfSameGenerator) { - case 0: - default: - // nothing happened - break; - case NegationFlag: - // every element is it's own negative => whole tensor is zero - m_globalFlags |= GlobalZeroFlag; - break; - case ConjugationFlag: - // every element is it's own conjugate => whole tensor is real - m_globalFlags |= GlobalRealFlag; - break; - case (NegationFlag | ConjugationFlag): - // every element is it's own negative conjugate => whole tensor is imaginary - m_globalFlags |= GlobalImagFlag; - break; - /* NOTE: - * since GlobalZeroFlag == GlobalRealFlag | GlobalImagFlag, if one generator - * causes the tensor to be real and the next one to be imaginary, this will - * trivially give the correct result - */ - } -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h b/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h deleted file mode 100644 index 942293b..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h +++ /dev/null @@ -1,236 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H -#define EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H - -namespace Eigen { - -namespace internal { - -template<typename list> struct tensor_static_symgroup_permutate; - -template<int... nn> -struct tensor_static_symgroup_permutate<numeric_list<int, nn...>> -{ - constexpr static std::size_t N = sizeof...(nn); - - template<typename T> - constexpr static inline std::array<T, N> run(const std::array<T, N>& indices) - { - return {{indices[nn]...}}; - } -}; - -template<typename indices_, int flags_> -struct tensor_static_symgroup_element -{ - typedef indices_ indices; - constexpr static int flags = flags_; -}; - -template<typename Gen, int N> -struct tensor_static_symgroup_element_ctor -{ - typedef tensor_static_symgroup_element< - typename gen_numeric_list_swapped_pair<int, N, Gen::One, Gen::Two>::type, - Gen::Flags - > type; -}; - -template<int N> -struct tensor_static_symgroup_identity_ctor -{ - typedef tensor_static_symgroup_element< - typename gen_numeric_list<int, N>::type, - 0 - > type; -}; - -template<typename iib> -struct tensor_static_symgroup_multiply_helper -{ - template<int... iia> - constexpr static inline numeric_list<int, get<iia, iib>::value...> helper(numeric_list<int, iia...>) { - return numeric_list<int, get<iia, iib>::value...>(); - } -}; - -template<typename A, typename B> -struct tensor_static_symgroup_multiply -{ - private: - typedef typename A::indices iia; - typedef typename B::indices iib; - constexpr static int ffa = A::flags; - constexpr static int ffb = B::flags; - - public: - static_assert(iia::count == iib::count, "Cannot multiply symmetry elements with different number of indices."); - - typedef tensor_static_symgroup_element< - decltype(tensor_static_symgroup_multiply_helper<iib>::helper(iia())), - ffa ^ ffb - > type; -}; - -template<typename A, typename B> -struct tensor_static_symgroup_equality -{ - typedef typename A::indices iia; - typedef typename B::indices iib; - constexpr static int ffa = A::flags; - constexpr static int ffb = B::flags; - static_assert(iia::count == iib::count, "Cannot compare symmetry elements with different number of indices."); - - constexpr static bool value = is_same<iia, iib>::value; - - private: - /* this should be zero if they are identical, or else the tensor - * will be forced to be pure real, pure imaginary or even pure zero - */ - constexpr static int flags_cmp_ = ffa ^ ffb; - - /* either they are not equal, then we don't care whether the flags - * match, or they are equal, and then we have to check - */ - constexpr static bool is_zero = value && flags_cmp_ == NegationFlag; - constexpr static bool is_real = value && flags_cmp_ == ConjugationFlag; - constexpr static bool is_imag = value && flags_cmp_ == (NegationFlag | ConjugationFlag); - - public: - constexpr static int global_flags = - (is_real ? GlobalRealFlag : 0) | - (is_imag ? GlobalImagFlag : 0) | - (is_zero ? GlobalZeroFlag : 0); -}; - -template<std::size_t NumIndices, typename... Gen> -struct tensor_static_symgroup -{ - typedef StaticSGroup<Gen...> type; - constexpr static std::size_t size = type::static_size; -}; - -template<typename Index, std::size_t N, int... ii, int... jj> -constexpr static inline std::array<Index, N> tensor_static_symgroup_index_permute(std::array<Index, N> idx, internal::numeric_list<int, ii...>, internal::numeric_list<int, jj...>) -{ - return {{ idx[ii]..., idx[jj]... }}; -} - -template<typename Index, int... ii> -static inline std::vector<Index> tensor_static_symgroup_index_permute(std::vector<Index> idx, internal::numeric_list<int, ii...>) -{ - std::vector<Index> result{{ idx[ii]... }}; - std::size_t target_size = idx.size(); - for (std::size_t i = result.size(); i < target_size; i++) - result.push_back(idx[i]); - return result; -} - -template<typename T> struct tensor_static_symgroup_do_apply; - -template<typename first, typename... next> -struct tensor_static_symgroup_do_apply<internal::type_list<first, next...>> -{ - template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, std::size_t NumIndices, typename... Args> - static inline RV run(const std::array<Index, NumIndices>& idx, RV initial, Args&&... args) - { - static_assert(NumIndices >= SGNumIndices, "Can only apply symmetry group to objects that have at least the required amount of indices."); - typedef typename internal::gen_numeric_list<int, NumIndices - SGNumIndices, SGNumIndices>::type remaining_indices; - initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices(), remaining_indices()), first::flags, initial, std::forward<Args>(args)...); - return tensor_static_symgroup_do_apply<internal::type_list<next...>>::template run<Op, RV, SGNumIndices>(idx, initial, args...); - } - - template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, typename... Args> - static inline RV run(const std::vector<Index>& idx, RV initial, Args&&... args) - { - eigen_assert(idx.size() >= SGNumIndices && "Can only apply symmetry group to objects that have at least the required amount of indices."); - initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices()), first::flags, initial, std::forward<Args>(args)...); - return tensor_static_symgroup_do_apply<internal::type_list<next...>>::template run<Op, RV, SGNumIndices>(idx, initial, args...); - } -}; - -template<EIGEN_TPL_PP_SPEC_HACK_DEF(typename, empty)> -struct tensor_static_symgroup_do_apply<internal::type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>> -{ - template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, std::size_t NumIndices, typename... Args> - static inline RV run(const std::array<Index, NumIndices>&, RV initial, Args&&...) - { - // do nothing - return initial; - } - - template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, typename... Args> - static inline RV run(const std::vector<Index>&, RV initial, Args&&...) - { - // do nothing - return initial; - } -}; - -} // end namespace internal - -template<typename... Gen> -class StaticSGroup -{ - constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices<Gen...>::value; - typedef internal::group_theory::enumerate_group_elements< - internal::tensor_static_symgroup_multiply, - internal::tensor_static_symgroup_equality, - typename internal::tensor_static_symgroup_identity_ctor<NumIndices>::type, - internal::type_list<typename internal::tensor_static_symgroup_element_ctor<Gen, NumIndices>::type...> - > group_elements; - typedef typename group_elements::type ge; - public: - constexpr inline StaticSGroup() {} - constexpr inline StaticSGroup(const StaticSGroup<Gen...>&) {} - constexpr inline StaticSGroup(StaticSGroup<Gen...>&&) {} - - template<typename Op, typename RV, typename Index, std::size_t N, typename... Args> - static inline RV apply(const std::array<Index, N>& idx, RV initial, Args&&... args) - { - return internal::tensor_static_symgroup_do_apply<ge>::template run<Op, RV, NumIndices>(idx, initial, args...); - } - - template<typename Op, typename RV, typename Index, typename... Args> - static inline RV apply(const std::vector<Index>& idx, RV initial, Args&&... args) - { - eigen_assert(idx.size() == NumIndices); - return internal::tensor_static_symgroup_do_apply<ge>::template run<Op, RV, NumIndices>(idx, initial, args...); - } - - constexpr static std::size_t static_size = ge::count; - - constexpr static inline std::size_t size() { - return ge::count; - } - constexpr static inline int globalFlags() { return group_elements::global_flags; } - - template<typename Tensor_, typename... IndexTypes> - inline internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const - { - static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); - return operator()(tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices>{{firstIndex, otherIndices...}}); - } - - template<typename Tensor_> - inline internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>> operator()(Tensor_& tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices> const& indices) const - { - return internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>>(tensor, *this, indices); - } -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h b/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h deleted file mode 100644 index 879d6cd..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h +++ /dev/null @@ -1,338 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H -#define EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H - -namespace Eigen { - -enum { - NegationFlag = 0x01, - ConjugationFlag = 0x02 -}; - -enum { - GlobalRealFlag = 0x01, - GlobalImagFlag = 0x02, - GlobalZeroFlag = 0x03 -}; - -namespace internal { - -template<std::size_t NumIndices, typename... Sym> struct tensor_symmetry_pre_analysis; -template<std::size_t NumIndices, typename... Sym> struct tensor_static_symgroup; -template<bool instantiate, std::size_t NumIndices, typename... Sym> struct tensor_static_symgroup_if; -template<typename Tensor_> struct tensor_symmetry_calculate_flags; -template<typename Tensor_> struct tensor_symmetry_assign_value; -template<typename... Sym> struct tensor_symmetry_num_indices; - -} // end namespace internal - -template<int One_, int Two_> -struct Symmetry -{ - static_assert(One_ != Two_, "Symmetries must cover distinct indices."); - constexpr static int One = One_; - constexpr static int Two = Two_; - constexpr static int Flags = 0; -}; - -template<int One_, int Two_> -struct AntiSymmetry -{ - static_assert(One_ != Two_, "Symmetries must cover distinct indices."); - constexpr static int One = One_; - constexpr static int Two = Two_; - constexpr static int Flags = NegationFlag; -}; - -template<int One_, int Two_> -struct Hermiticity -{ - static_assert(One_ != Two_, "Symmetries must cover distinct indices."); - constexpr static int One = One_; - constexpr static int Two = Two_; - constexpr static int Flags = ConjugationFlag; -}; - -template<int One_, int Two_> -struct AntiHermiticity -{ - static_assert(One_ != Two_, "Symmetries must cover distinct indices."); - constexpr static int One = One_; - constexpr static int Two = Two_; - constexpr static int Flags = ConjugationFlag | NegationFlag; -}; - -/** \class DynamicSGroup - * \ingroup TensorSymmetry_Module - * - * \brief Dynamic symmetry group - * - * The %DynamicSGroup class represents a symmetry group that need not be known at - * compile time. It is useful if one wants to support arbitrary run-time defineable - * symmetries for tensors, but it is also instantiated if a symmetry group is defined - * at compile time that would be either too large for the compiler to reasonably - * generate (using templates to calculate this at compile time is very inefficient) - * or that the compiler could generate the group but that it wouldn't make sense to - * unroll the loop for setting coefficients anymore. - */ -class DynamicSGroup; - -/** \internal - * - * \class DynamicSGroupFromTemplateArgs - * \ingroup TensorSymmetry_Module - * - * \brief Dynamic symmetry group, initialized from template arguments - * - * This class is a child class of DynamicSGroup. It uses the template arguments - * specified to initialize itself. - */ -template<typename... Gen> -class DynamicSGroupFromTemplateArgs; - -/** \class StaticSGroup - * \ingroup TensorSymmetry_Module - * - * \brief Static symmetry group - * - * This class represents a symmetry group that is known and resolved completely - * at compile time. Ideally, no run-time penalty is incurred compared to the - * manual unrolling of the symmetry. - * - * <b><i>CAUTION:</i></b> - * - * Do not use this class directly for large symmetry groups. The compiler - * may run into a limit, or segfault or in the very least will take a very, - * very, very long time to compile the code. Use the SGroup class instead - * if you want a static group. That class contains logic that will - * automatically select the DynamicSGroup class instead if the symmetry - * group becomes too large. (In that case, unrolling may not even be - * beneficial.) - */ -template<typename... Gen> -class StaticSGroup; - -/** \class SGroup - * \ingroup TensorSymmetry_Module - * - * \brief Symmetry group, initialized from template arguments - * - * This class represents a symmetry group whose generators are already - * known at compile time. It may or may not be resolved at compile time, - * depending on the estimated size of the group. - * - * \sa StaticSGroup - * \sa DynamicSGroup - */ -template<typename... Gen> -class SGroup : public internal::tensor_symmetry_pre_analysis<internal::tensor_symmetry_num_indices<Gen...>::value, Gen...>::root_type -{ - public: - constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices<Gen...>::value; - typedef typename internal::tensor_symmetry_pre_analysis<NumIndices, Gen...>::root_type Base; - - // make standard constructors + assignment operators public - inline SGroup() : Base() { } - inline SGroup(const SGroup<Gen...>& other) : Base(other) { } - inline SGroup(SGroup<Gen...>&& other) : Base(other) { } - inline SGroup<Gen...>& operator=(const SGroup<Gen...>& other) { Base::operator=(other); return *this; } - inline SGroup<Gen...>& operator=(SGroup<Gen...>&& other) { Base::operator=(other); return *this; } - - // all else is defined in the base class -}; - -namespace internal { - -template<typename... Sym> struct tensor_symmetry_num_indices -{ - constexpr static std::size_t value = 1; -}; - -template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> -{ -private: - constexpr static std::size_t One = static_cast<std::size_t>(One_); - constexpr static std::size_t Two = static_cast<std::size_t>(Two_); - constexpr static std::size_t Three = tensor_symmetry_num_indices<Sym...>::value; - - // don't use std::max, since it's not constexpr until C++14... - constexpr static std::size_t maxOneTwoPlusOne = ((One > Two) ? One : Two) + 1; -public: - constexpr static std::size_t value = (maxOneTwoPlusOne > Three) ? maxOneTwoPlusOne : Three; -}; - -template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<AntiSymmetry<One_, Two_>, Sym...> - : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {}; -template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<Hermiticity<One_, Two_>, Sym...> - : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {}; -template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<AntiHermiticity<One_, Two_>, Sym...> - : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {}; - -/** \internal - * - * \class tensor_symmetry_pre_analysis - * \ingroup TensorSymmetry_Module - * - * \brief Pre-select whether to use a static or dynamic symmetry group - * - * When a symmetry group could in principle be determined at compile time, - * this template implements the logic whether to actually do that or whether - * to rather defer that to runtime. - * - * The logic is as follows: - * <dl> - * <dt><b>No generators (trivial symmetry):</b></dt> - * <dd>Use a trivial static group. Ideally, this has no performance impact - * compared to not using symmetry at all. In practice, this might not - * be the case.</dd> - * <dt><b>More than 4 generators:</b></dt> - * <dd>Calculate the group at run time, it is likely far too large for the - * compiler to be able to properly generate it in a realistic time.</dd> - * <dt><b>Up to and including 4 generators:</b></dt> - * <dd>Actually enumerate all group elements, but then check how many there - * are. If there are more than 16, it is unlikely that unrolling the - * loop (as is done in the static compile-time case) is sensible, so - * use a dynamic group instead. If there are at most 16 elements, actually - * use that static group. Note that the largest group with 4 generators - * still compiles with reasonable resources.</dd> - * </dl> - * - * Note: Example compile time performance with g++-4.6 on an Intenl Core i5-3470 - * with 16 GiB RAM (all generators non-redundant and the subgroups don't - * factorize): - * - * # Generators -O0 -ggdb -O2 - * ------------------------------------------------------------------- - * 1 0.5 s / 250 MiB 0.45s / 230 MiB - * 2 0.5 s / 260 MiB 0.5 s / 250 MiB - * 3 0.65s / 310 MiB 0.62s / 310 MiB - * 4 2.2 s / 860 MiB 1.7 s / 770 MiB - * 5 130 s / 13000 MiB 120 s / 11000 MiB - * - * It is clear that everything is still very efficient up to 4 generators, then - * the memory and CPU requirements become unreasonable. Thus we only instantiate - * the template group theory logic if the number of generators supplied is 4 or - * lower, otherwise this will be forced to be done during runtime, where the - * algorithm is reasonably fast. - */ -template<std::size_t NumIndices> -struct tensor_symmetry_pre_analysis<NumIndices> -{ - typedef StaticSGroup<> root_type; -}; - -template<std::size_t NumIndices, typename Gen_, typename... Gens_> -struct tensor_symmetry_pre_analysis<NumIndices, Gen_, Gens_...> -{ - constexpr static std::size_t max_static_generators = 4; - constexpr static std::size_t max_static_elements = 16; - typedef tensor_static_symgroup_if<(sizeof...(Gens_) + 1 <= max_static_generators), NumIndices, Gen_, Gens_...> helper; - constexpr static std::size_t possible_size = helper::size; - - typedef typename conditional< - possible_size == 0 || possible_size >= max_static_elements, - DynamicSGroupFromTemplateArgs<Gen_, Gens_...>, - typename helper::type - >::type root_type; -}; - -template<bool instantiate, std::size_t NumIndices, typename... Gens> -struct tensor_static_symgroup_if -{ - constexpr static std::size_t size = 0; - typedef void type; -}; - -template<std::size_t NumIndices, typename... Gens> -struct tensor_static_symgroup_if<true, NumIndices, Gens...> : tensor_static_symgroup<NumIndices, Gens...> {}; - -template<typename Tensor_> -struct tensor_symmetry_assign_value -{ - typedef typename Tensor_::Index Index; - typedef typename Tensor_::Scalar Scalar; - constexpr static std::size_t NumIndices = Tensor_::NumIndices; - - static inline int run(const std::array<Index, NumIndices>& transformed_indices, int transformation_flags, int dummy, Tensor_& tensor, const Scalar& value_) - { - Scalar value(value_); - if (transformation_flags & ConjugationFlag) - value = numext::conj(value); - if (transformation_flags & NegationFlag) - value = -value; - tensor.coeffRef(transformed_indices) = value; - return dummy; - } -}; - -template<typename Tensor_> -struct tensor_symmetry_calculate_flags -{ - typedef typename Tensor_::Index Index; - constexpr static std::size_t NumIndices = Tensor_::NumIndices; - - static inline int run(const std::array<Index, NumIndices>& transformed_indices, int transform_flags, int current_flags, const std::array<Index, NumIndices>& orig_indices) - { - if (transformed_indices == orig_indices) { - if (transform_flags & (ConjugationFlag | NegationFlag)) - return current_flags | GlobalImagFlag; // anti-hermitian diagonal - else if (transform_flags & ConjugationFlag) - return current_flags | GlobalRealFlag; // hermitian diagonal - else if (transform_flags & NegationFlag) - return current_flags | GlobalZeroFlag; // anti-symmetric diagonal - } - return current_flags; - } -}; - -template<typename Tensor_, typename Symmetry_, int Flags = 0> -class tensor_symmetry_value_setter -{ - public: - typedef typename Tensor_::Index Index; - typedef typename Tensor_::Scalar Scalar; - constexpr static std::size_t NumIndices = Tensor_::NumIndices; - - inline tensor_symmetry_value_setter(Tensor_& tensor, Symmetry_ const& symmetry, std::array<Index, NumIndices> const& indices) - : m_tensor(tensor), m_symmetry(symmetry), m_indices(indices) { } - - inline tensor_symmetry_value_setter<Tensor_, Symmetry_, Flags>& operator=(Scalar const& value) - { - doAssign(value); - return *this; - } - private: - Tensor_& m_tensor; - Symmetry_ m_symmetry; - std::array<Index, NumIndices> m_indices; - - inline void doAssign(Scalar const& value) - { - #ifdef EIGEN_TENSOR_SYMMETRY_CHECK_VALUES - int value_flags = m_symmetry.template apply<internal::tensor_symmetry_calculate_flags<Tensor_>, int>(m_indices, m_symmetry.globalFlags(), m_indices); - if (value_flags & GlobalRealFlag) - eigen_assert(numext::imag(value) == 0); - if (value_flags & GlobalImagFlag) - eigen_assert(numext::real(value) == 0); - #endif - m_symmetry.template apply<internal::tensor_symmetry_assign_value<Tensor_>, int>(m_indices, 0, m_tensor, value); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h b/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h deleted file mode 100644 index 5e97d07..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h +++ /dev/null @@ -1,669 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H -#define EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H - -namespace Eigen { - -namespace internal { - -namespace group_theory { - -/** \internal - * \file CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h - * This file contains C++ templates that implement group theory algorithms. - * - * The algorithms allow for a compile-time analysis of finite groups. - * - * Currently only Dimino's algorithm is implemented, which returns a list - * of all elements in a group given a set of (possibly redundant) generators. - * (One could also do that with the so-called orbital algorithm, but that - * is much more expensive and usually has no advantages.) - */ - -/********************************************************************** - * "Ok kid, here is where it gets complicated." - * - Amelia Pond in the "Doctor Who" episode - * "The Big Bang" - * - * Dimino's algorithm - * ================== - * - * The following is Dimino's algorithm in sequential form: - * - * Input: identity element, list of generators, equality check, - * multiplication operation - * Output: list of group elements - * - * 1. add identity element - * 2. remove identities from list of generators - * 3. add all powers of first generator that aren't the - * identity element - * 4. go through all remaining generators: - * a. if generator is already in the list of elements - * -> do nothing - * b. otherwise - * i. remember current # of elements - * (i.e. the size of the current subgroup) - * ii. add all current elements (which includes - * the identity) each multiplied from right - * with the current generator to the group - * iii. add all remaining cosets that are generated - * by products of the new generator with itself - * and all other generators seen so far - * - * In functional form, this is implemented as a long set of recursive - * templates that have a complicated relationship. - * - * The main interface for Dimino's algorithm is the template - * enumerate_group_elements. All lists are implemented as variadic - * type_list<typename...> and numeric_list<typename = int, int...> - * templates. - * - * 'Calling' templates is usually done via typedefs. - * - * This algorithm is an extended version of the basic version. The - * extension consists in the fact that each group element has a set - * of flags associated with it. Multiplication of two group elements - * with each other results in a group element whose flags are the - * XOR of the flags of the previous elements. Each time the algorithm - * notices that a group element it just calculated is already in the - * list of current elements, the flags of both will be compared and - * added to the so-called 'global flags' of the group. - * - * The rationale behind this extension is that this allows not only - * for the description of symmetries between tensor indices, but - * also allows for the description of hermiticity, antisymmetry and - * antihermiticity. Negation and conjugation each are specific bit - * in the flags value and if two different ways to reach a group - * element lead to two different flags, this poses a constraint on - * the allowed values of the resulting tensor. For example, if a - * group element is reach both with and without the conjugation - * flags, it is clear that the resulting tensor has to be real. - * - * Note that this flag mechanism is quite generic and may have other - * uses beyond tensor properties. - * - * IMPORTANT: - * This algorithm assumes the group to be finite. If you try to - * run it with a group that's infinite, the algorithm will only - * terminate once you hit a compiler limit (max template depth). - * Also note that trying to use this implementation to create a - * very large group will probably either make you hit the same - * limit, cause the compiler to segfault or at the very least - * take a *really* long time (hours, days, weeks - sic!) to - * compile. It is not recommended to plug in more than 4 - * generators, unless they are independent of each other. - */ - -/** \internal - * - * \class strip_identities - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Cleanse a list of group elements of the identity element - * - * This template is used to make a first pass through all initial - * generators of Dimino's algorithm and remove the identity - * elements. - * - * \sa enumerate_group_elements - */ -template<template<typename, typename> class Equality, typename id, typename L> struct strip_identities; - -template< - template<typename, typename> class Equality, - typename id, - typename t, - typename... ts -> -struct strip_identities<Equality, id, type_list<t, ts...>> -{ - typedef typename conditional< - Equality<id, t>::value, - typename strip_identities<Equality, id, type_list<ts...>>::type, - typename concat<type_list<t>, typename strip_identities<Equality, id, type_list<ts...>>::type>::type - >::type type; - constexpr static int global_flags = Equality<id, t>::global_flags | strip_identities<Equality, id, type_list<ts...>>::global_flags; -}; - -template< - template<typename, typename> class Equality, - typename id - EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, ts) -> -struct strip_identities<Equality, id, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(ts)>> -{ - typedef type_list<> type; - constexpr static int global_flags = 0; -}; - -/** \internal - * - * \class dimino_first_step_elements_helper - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Recursive template that adds powers of the first generator to the list of group elements - * - * This template calls itself recursively to add powers of the first - * generator to the list of group elements. It stops if it reaches - * the identity element again. - * - * \sa enumerate_group_elements, dimino_first_step_elements - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename g, - typename current_element, - typename elements, - bool dont_add_current_element // = false -> -struct dimino_first_step_elements_helper -#ifndef EIGEN_PARSED_BY_DOXYGEN - : // recursive inheritance is too difficult for Doxygen - public dimino_first_step_elements_helper< - Multiply, - Equality, - id, - g, - typename Multiply<current_element, g>::type, - typename concat<elements, type_list<current_element>>::type, - Equality<typename Multiply<current_element, g>::type, id>::value - > {}; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename g, - typename current_element, - typename elements -> -struct dimino_first_step_elements_helper<Multiply, Equality, id, g, current_element, elements, true> -#endif // EIGEN_PARSED_BY_DOXYGEN -{ - typedef elements type; - constexpr static int global_flags = Equality<current_element, id>::global_flags; -}; - -/** \internal - * - * \class dimino_first_step_elements - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Add all powers of the first generator to the list of group elements - * - * This template takes the first non-identity generator and generates the initial - * list of elements which consists of all powers of that generator. For a group - * with just one generated, it would be enumerated after this. - * - * \sa enumerate_group_elements - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename generators -> -struct dimino_first_step_elements -{ - typedef typename get<0, generators>::type first_generator; - typedef typename skip<1, generators>::type next_generators; - typedef type_list<first_generator> generators_done; - - typedef dimino_first_step_elements_helper< - Multiply, - Equality, - id, - first_generator, - first_generator, - type_list<id>, - false - > helper; - typedef typename helper::type type; - constexpr static int global_flags = helper::global_flags; -}; - -/** \internal - * - * \class dimino_get_coset_elements - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Generate all elements of a specific coset - * - * This template generates all the elements of a specific coset by - * multiplying all elements in the given subgroup with the new - * coset representative. Note that the first element of the - * subgroup is always the identity element, so the first element of - * ther result of this template is going to be the coset - * representative itself. - * - * Note that this template accepts an additional boolean parameter - * that specifies whether to actually generate the coset (true) or - * just return an empty list (false). - * - * \sa enumerate_group_elements, dimino_add_cosets_for_rep - */ -template< - template<typename, typename> class Multiply, - typename sub_group_elements, - typename new_coset_rep, - bool generate_coset // = true -> -struct dimino_get_coset_elements -{ - typedef typename apply_op_from_right<Multiply, new_coset_rep, sub_group_elements>::type type; -}; - -template< - template<typename, typename> class Multiply, - typename sub_group_elements, - typename new_coset_rep -> -struct dimino_get_coset_elements<Multiply, sub_group_elements, new_coset_rep, false> -{ - typedef type_list<> type; -}; - -/** \internal - * - * \class dimino_add_cosets_for_rep - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Recursive template for adding coset spaces - * - * This template multiplies the coset representative with a generator - * from the list of previous generators. If the new element is not in - * the group already, it adds the corresponding coset. Finally it - * proceeds to call itself with the next generator from the list. - * - * \sa enumerate_group_elements, dimino_add_all_coset_spaces - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename sub_group_elements, - typename elements, - typename generators, - typename rep_element, - int sub_group_size -> -struct dimino_add_cosets_for_rep; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename sub_group_elements, - typename elements, - typename g, - typename... gs, - typename rep_element, - int sub_group_size -> -struct dimino_add_cosets_for_rep<Multiply, Equality, id, sub_group_elements, elements, type_list<g, gs...>, rep_element, sub_group_size> -{ - typedef typename Multiply<rep_element, g>::type new_coset_rep; - typedef contained_in_list_gf<Equality, new_coset_rep, elements> _cil; - constexpr static bool add_coset = !_cil::value; - - typedef typename dimino_get_coset_elements< - Multiply, - sub_group_elements, - new_coset_rep, - add_coset - >::type coset_elements; - - typedef dimino_add_cosets_for_rep< - Multiply, - Equality, - id, - sub_group_elements, - typename concat<elements, coset_elements>::type, - type_list<gs...>, - rep_element, - sub_group_size - > _helper; - - typedef typename _helper::type type; - constexpr static int global_flags = _cil::global_flags | _helper::global_flags; - - /* Note that we don't have to update global flags here, since - * we will only add these elements if they are not part of - * the group already. But that only happens if the coset rep - * is not already in the group, so the check for the coset rep - * will catch this. - */ -}; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename sub_group_elements, - typename elements - EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty), - typename rep_element, - int sub_group_size -> -struct dimino_add_cosets_for_rep<Multiply, Equality, id, sub_group_elements, elements, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, rep_element, sub_group_size> -{ - typedef elements type; - constexpr static int global_flags = 0; -}; - -/** \internal - * - * \class dimino_add_all_coset_spaces - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Recursive template for adding all coset spaces for a new generator - * - * This template tries to go through the list of generators (with - * the help of the dimino_add_cosets_for_rep template) as long as - * it still finds elements that are not part of the group and add - * the corresponding cosets. - * - * \sa enumerate_group_elements, dimino_add_cosets_for_rep - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename sub_group_elements, - typename elements, - typename generators, - int sub_group_size, - int rep_pos, - bool stop_condition // = false -> -struct dimino_add_all_coset_spaces -{ - typedef typename get<rep_pos, elements>::type rep_element; - typedef dimino_add_cosets_for_rep< - Multiply, - Equality, - id, - sub_group_elements, - elements, - generators, - rep_element, - sub_group_elements::count - > _ac4r; - typedef typename _ac4r::type new_elements; - - constexpr static int new_rep_pos = rep_pos + sub_group_elements::count; - constexpr static bool new_stop_condition = new_rep_pos >= new_elements::count; - - typedef dimino_add_all_coset_spaces< - Multiply, - Equality, - id, - sub_group_elements, - new_elements, - generators, - sub_group_size, - new_rep_pos, - new_stop_condition - > _helper; - - typedef typename _helper::type type; - constexpr static int global_flags = _helper::global_flags | _ac4r::global_flags; -}; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename sub_group_elements, - typename elements, - typename generators, - int sub_group_size, - int rep_pos -> -struct dimino_add_all_coset_spaces<Multiply, Equality, id, sub_group_elements, elements, generators, sub_group_size, rep_pos, true> -{ - typedef elements type; - constexpr static int global_flags = 0; -}; - -/** \internal - * - * \class dimino_add_generator - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Enlarge the group by adding a new generator. - * - * It accepts a boolean parameter that determines if the generator is redundant, - * i.e. was already seen in the group. In that case, it reduces to a no-op. - * - * \sa enumerate_group_elements, dimino_add_all_coset_spaces - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename elements, - typename generators_done, - typename current_generator, - bool redundant // = false -> -struct dimino_add_generator -{ - /* this template is only called if the generator is not redundant - * => all elements of the group multiplied with the new generator - * are going to be new elements of the most trivial coset space - */ - typedef typename apply_op_from_right<Multiply, current_generator, elements>::type multiplied_elements; - typedef typename concat<elements, multiplied_elements>::type new_elements; - - constexpr static int rep_pos = elements::count; - - typedef dimino_add_all_coset_spaces< - Multiply, - Equality, - id, - elements, // elements of previous subgroup - new_elements, - typename concat<generators_done, type_list<current_generator>>::type, - elements::count, // size of previous subgroup - rep_pos, - false // don't stop (because rep_pos >= new_elements::count is always false at this point) - > _helper; - typedef typename _helper::type type; - constexpr static int global_flags = _helper::global_flags; -}; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename elements, - typename generators_done, - typename current_generator -> -struct dimino_add_generator<Multiply, Equality, id, elements, generators_done, current_generator, true> -{ - // redundant case - typedef elements type; - constexpr static int global_flags = 0; -}; - -/** \internal - * - * \class dimino_add_remaining_generators - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Recursive template that adds all remaining generators to a group - * - * Loop through the list of generators that remain and successively - * add them to the group. - * - * \sa enumerate_group_elements, dimino_add_generator - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename generators_done, - typename remaining_generators, - typename elements -> -struct dimino_add_remaining_generators -{ - typedef typename get<0, remaining_generators>::type first_generator; - typedef typename skip<1, remaining_generators>::type next_generators; - - typedef contained_in_list_gf<Equality, first_generator, elements> _cil; - - typedef dimino_add_generator< - Multiply, - Equality, - id, - elements, - generators_done, - first_generator, - _cil::value - > _helper; - - typedef typename _helper::type new_elements; - - typedef dimino_add_remaining_generators< - Multiply, - Equality, - id, - typename concat<generators_done, type_list<first_generator>>::type, - next_generators, - new_elements - > _next_iter; - - typedef typename _next_iter::type type; - constexpr static int global_flags = - _cil::global_flags | - _helper::global_flags | - _next_iter::global_flags; -}; - -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename generators_done, - typename elements -> -struct dimino_add_remaining_generators<Multiply, Equality, id, generators_done, type_list<>, elements> -{ - typedef elements type; - constexpr static int global_flags = 0; -}; - -/** \internal - * - * \class enumerate_group_elements_noid - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Helper template that implements group element enumeration - * - * This is a helper template that implements the actual enumeration - * of group elements. This has been split so that the list of - * generators can be cleansed of the identity element before - * performing the actual operation. - * - * \sa enumerate_group_elements - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename generators, - int initial_global_flags = 0 -> -struct enumerate_group_elements_noid -{ - typedef dimino_first_step_elements<Multiply, Equality, id, generators> first_step; - typedef typename first_step::type first_step_elements; - - typedef dimino_add_remaining_generators< - Multiply, - Equality, - id, - typename first_step::generators_done, - typename first_step::next_generators, // remaining_generators - typename first_step::type // first_step elements - > _helper; - - typedef typename _helper::type type; - constexpr static int global_flags = - initial_global_flags | - first_step::global_flags | - _helper::global_flags; -}; - -// in case when no generators are specified -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - int initial_global_flags -> -struct enumerate_group_elements_noid<Multiply, Equality, id, type_list<>, initial_global_flags> -{ - typedef type_list<id> type; - constexpr static int global_flags = initial_global_flags; -}; - -/** \internal - * - * \class enumerate_group_elements - * \ingroup CXX11_TensorSymmetry_Module - * - * \brief Enumerate all elements in a finite group - * - * This template enumerates all elements in a finite group. It accepts - * the following template parameters: - * - * \tparam Multiply The multiplication operation that multiplies two group elements - * with each other. - * \tparam Equality The equality check operation that checks if two group elements - * are equal to another. - * \tparam id The identity element - * \tparam _generators A list of (possibly redundant) generators of the group - */ -template< - template<typename, typename> class Multiply, - template<typename, typename> class Equality, - typename id, - typename _generators -> -struct enumerate_group_elements - : public enumerate_group_elements_noid< - Multiply, - Equality, - id, - typename strip_identities<Equality, id, _generators>::type, - strip_identities<Equality, id, _generators>::global_flags - > -{ -}; - -} // end namespace group_theory - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h deleted file mode 100644 index 71d5555..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +++ /dev/null @@ -1,233 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_ -#define EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_ - -namespace Eigen { - -// EventCount allows to wait for arbitrary predicates in non-blocking -// algorithms. Think of condition variable, but wait predicate does not need to -// be protected by a mutex. Usage: -// Waiting thread does: -// -// if (predicate) -// return act(); -// EventCount::Waiter& w = waiters[my_index]; -// ec.Prewait(&w); -// if (predicate) { -// ec.CancelWait(&w); -// return act(); -// } -// ec.CommitWait(&w); -// -// Notifying thread does: -// -// predicate = true; -// ec.Notify(true); -// -// Notify is cheap if there are no waiting threads. Prewait/CommitWait are not -// cheap, but they are executed only if the preceeding predicate check has -// failed. -// -// Algorihtm outline: -// There are two main variables: predicate (managed by user) and state_. -// Operation closely resembles Dekker mutual algorithm: -// https://en.wikipedia.org/wiki/Dekker%27s_algorithm -// Waiting thread sets state_ then checks predicate, Notifying thread sets -// predicate then checks state_. Due to seq_cst fences in between these -// operations it is guaranteed than either waiter will see predicate change -// and won't block, or notifying thread will see state_ change and will unblock -// the waiter, or both. But it can't happen that both threads don't see each -// other changes, which would lead to deadlock. -class EventCount { - public: - class Waiter; - - EventCount(MaxSizeVector<Waiter>& waiters) : waiters_(waiters) { - eigen_assert(waiters.size() < (1 << kWaiterBits) - 1); - // Initialize epoch to something close to overflow to test overflow. - state_ = kStackMask | (kEpochMask - kEpochInc * waiters.size() * 2); - } - - ~EventCount() { - // Ensure there are no waiters. - eigen_assert((state_.load() & (kStackMask | kWaiterMask)) == kStackMask); - } - - // Prewait prepares for waiting. - // After calling this function the thread must re-check the wait predicate - // and call either CancelWait or CommitWait passing the same Waiter object. - void Prewait(Waiter* w) { - w->epoch = state_.fetch_add(kWaiterInc, std::memory_order_relaxed); - std::atomic_thread_fence(std::memory_order_seq_cst); - } - - // CommitWait commits waiting. - void CommitWait(Waiter* w) { - w->state = Waiter::kNotSignaled; - // Modification epoch of this waiter. - uint64_t epoch = - (w->epoch & kEpochMask) + - (((w->epoch & kWaiterMask) >> kWaiterShift) << kEpochShift); - uint64_t state = state_.load(std::memory_order_seq_cst); - for (;;) { - if (int64_t((state & kEpochMask) - epoch) < 0) { - // The preceeding waiter has not decided on its fate. Wait until it - // calls either CancelWait or CommitWait, or is notified. - EIGEN_THREAD_YIELD(); - state = state_.load(std::memory_order_seq_cst); - continue; - } - // We've already been notified. - if (int64_t((state & kEpochMask) - epoch) > 0) return; - // Remove this thread from prewait counter and add it to the waiter list. - eigen_assert((state & kWaiterMask) != 0); - uint64_t newstate = state - kWaiterInc + kEpochInc; - newstate = (newstate & ~kStackMask) | (w - &waiters_[0]); - if ((state & kStackMask) == kStackMask) - w->next.store(nullptr, std::memory_order_relaxed); - else - w->next.store(&waiters_[state & kStackMask], std::memory_order_relaxed); - if (state_.compare_exchange_weak(state, newstate, - std::memory_order_release)) - break; - } - Park(w); - } - - // CancelWait cancels effects of the previous Prewait call. - void CancelWait(Waiter* w) { - uint64_t epoch = - (w->epoch & kEpochMask) + - (((w->epoch & kWaiterMask) >> kWaiterShift) << kEpochShift); - uint64_t state = state_.load(std::memory_order_relaxed); - for (;;) { - if (int64_t((state & kEpochMask) - epoch) < 0) { - // The preceeding waiter has not decided on its fate. Wait until it - // calls either CancelWait or CommitWait, or is notified. - EIGEN_THREAD_YIELD(); - state = state_.load(std::memory_order_relaxed); - continue; - } - // We've already been notified. - if (int64_t((state & kEpochMask) - epoch) > 0) return; - // Remove this thread from prewait counter. - eigen_assert((state & kWaiterMask) != 0); - if (state_.compare_exchange_weak(state, state - kWaiterInc + kEpochInc, - std::memory_order_relaxed)) - return; - } - } - - // Notify wakes one or all waiting threads. - // Must be called after changing the associated wait predicate. - void Notify(bool all) { - std::atomic_thread_fence(std::memory_order_seq_cst); - uint64_t state = state_.load(std::memory_order_acquire); - for (;;) { - // Easy case: no waiters. - if ((state & kStackMask) == kStackMask && (state & kWaiterMask) == 0) - return; - uint64_t waiters = (state & kWaiterMask) >> kWaiterShift; - uint64_t newstate; - if (all) { - // Reset prewait counter and empty wait list. - newstate = (state & kEpochMask) + (kEpochInc * waiters) + kStackMask; - } else if (waiters) { - // There is a thread in pre-wait state, unblock it. - newstate = state + kEpochInc - kWaiterInc; - } else { - // Pop a waiter from list and unpark it. - Waiter* w = &waiters_[state & kStackMask]; - Waiter* wnext = w->next.load(std::memory_order_relaxed); - uint64_t next = kStackMask; - if (wnext != nullptr) next = wnext - &waiters_[0]; - // Note: we don't add kEpochInc here. ABA problem on the lock-free stack - // can't happen because a waiter is re-pushed onto the stack only after - // it was in the pre-wait state which inevitably leads to epoch - // increment. - newstate = (state & kEpochMask) + next; - } - if (state_.compare_exchange_weak(state, newstate, - std::memory_order_acquire)) { - if (!all && waiters) return; // unblocked pre-wait thread - if ((state & kStackMask) == kStackMask) return; - Waiter* w = &waiters_[state & kStackMask]; - if (!all) w->next.store(nullptr, std::memory_order_relaxed); - Unpark(w); - return; - } - } - } - - class Waiter { - friend class EventCount; - // Align to 128 byte boundary to prevent false sharing with other Waiter objects in the same vector. - EIGEN_ALIGN_TO_BOUNDARY(128) std::atomic<Waiter*> next; - std::mutex mu; - std::condition_variable cv; - uint64_t epoch; - unsigned state; - enum { - kNotSignaled, - kWaiting, - kSignaled, - }; - }; - - private: - // State_ layout: - // - low kStackBits is a stack of waiters committed wait. - // - next kWaiterBits is count of waiters in prewait state. - // - next kEpochBits is modification counter. - static const uint64_t kStackBits = 16; - static const uint64_t kStackMask = (1ull << kStackBits) - 1; - static const uint64_t kWaiterBits = 16; - static const uint64_t kWaiterShift = 16; - static const uint64_t kWaiterMask = ((1ull << kWaiterBits) - 1) - << kWaiterShift; - static const uint64_t kWaiterInc = 1ull << kWaiterBits; - static const uint64_t kEpochBits = 32; - static const uint64_t kEpochShift = 32; - static const uint64_t kEpochMask = ((1ull << kEpochBits) - 1) << kEpochShift; - static const uint64_t kEpochInc = 1ull << kEpochShift; - std::atomic<uint64_t> state_; - MaxSizeVector<Waiter>& waiters_; - - void Park(Waiter* w) { - std::unique_lock<std::mutex> lock(w->mu); - while (w->state != Waiter::kSignaled) { - w->state = Waiter::kWaiting; - w->cv.wait(lock); - } - } - - void Unpark(Waiter* waiters) { - Waiter* next = nullptr; - for (Waiter* w = waiters; w; w = next) { - next = w->next.load(std::memory_order_relaxed); - unsigned state; - { - std::unique_lock<std::mutex> lock(w->mu); - state = w->state; - w->state = Waiter::kSignaled; - } - // Avoid notifying if it wasn't waiting. - if (state == Waiter::kWaiting) w->cv.notify_one(); - } - } - - EventCount(const EventCount&) = delete; - void operator=(const EventCount&) = delete; -}; - -} // namespace Eigen - -#endif // EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_ diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h deleted file mode 100644 index 354bce5..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ /dev/null @@ -1,274 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H -#define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H - - -namespace Eigen { - -template <typename Environment> -class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { - public: - typedef typename Environment::Task Task; - typedef RunQueue<Task, 1024> Queue; - - NonBlockingThreadPoolTempl(int num_threads, Environment env = Environment()) - : env_(env), - threads_(num_threads), - queues_(num_threads), - coprimes_(num_threads), - waiters_(num_threads), - blocked_(0), - spinning_(0), - done_(false), - ec_(waiters_) { - waiters_.resize(num_threads); - - // Calculate coprimes of num_threads. - // Coprimes are used for a random walk over all threads in Steal - // and NonEmptyQueueIndex. Iteration is based on the fact that if we take - // a walk starting thread index t and calculate num_threads - 1 subsequent - // indices as (t + coprime) % num_threads, we will cover all threads without - // repetitions (effectively getting a presudo-random permutation of thread - // indices). - for (int i = 1; i <= num_threads; i++) { - unsigned a = i; - unsigned b = num_threads; - // If GCD(a, b) == 1, then a and b are coprimes. - while (b != 0) { - unsigned tmp = a; - a = b; - b = tmp % b; - } - if (a == 1) { - coprimes_.push_back(i); - } - } - for (int i = 0; i < num_threads; i++) { - queues_.push_back(new Queue()); - } - for (int i = 0; i < num_threads; i++) { - threads_.push_back(env_.CreateThread([this, i]() { WorkerLoop(i); })); - } - } - - ~NonBlockingThreadPoolTempl() { - done_ = true; - // Now if all threads block without work, they will start exiting. - // But note that threads can continue to work arbitrary long, - // block, submit new work, unblock and otherwise live full life. - ec_.Notify(true); - - // Join threads explicitly to avoid destruction order issues. - for (size_t i = 0; i < threads_.size(); i++) delete threads_[i]; - for (size_t i = 0; i < threads_.size(); i++) delete queues_[i]; - } - - void Schedule(std::function<void()> fn) { - Task t = env_.CreateTask(std::move(fn)); - PerThread* pt = GetPerThread(); - if (pt->pool == this) { - // Worker thread of this pool, push onto the thread's queue. - Queue* q = queues_[pt->thread_id]; - t = q->PushFront(std::move(t)); - } else { - // A free-standing thread (or worker of another pool), push onto a random - // queue. - Queue* q = queues_[Rand(&pt->rand) % queues_.size()]; - t = q->PushBack(std::move(t)); - } - // Note: below we touch this after making w available to worker threads. - // Strictly speaking, this can lead to a racy-use-after-free. Consider that - // Schedule is called from a thread that is neither main thread nor a worker - // thread of this pool. Then, execution of w directly or indirectly - // completes overall computations, which in turn leads to destruction of - // this. We expect that such scenario is prevented by program, that is, - // this is kept alive while any threads can potentially be in Schedule. - if (!t.f) - ec_.Notify(false); - else - env_.ExecuteTask(t); // Push failed, execute directly. - } - - int NumThreads() const final { - return static_cast<int>(threads_.size()); - } - - int CurrentThreadId() const final { - const PerThread* pt = - const_cast<NonBlockingThreadPoolTempl*>(this)->GetPerThread(); - if (pt->pool == this) { - return pt->thread_id; - } else { - return -1; - } - } - - private: - typedef typename Environment::EnvThread Thread; - - struct PerThread { - constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) { } - NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. - uint64_t rand; // Random generator state. - int thread_id; // Worker thread index in pool. - }; - - Environment env_; - MaxSizeVector<Thread*> threads_; - MaxSizeVector<Queue*> queues_; - MaxSizeVector<unsigned> coprimes_; - MaxSizeVector<EventCount::Waiter> waiters_; - std::atomic<unsigned> blocked_; - std::atomic<bool> spinning_; - std::atomic<bool> done_; - EventCount ec_; - - // Main worker thread loop. - void WorkerLoop(int thread_id) { - PerThread* pt = GetPerThread(); - pt->pool = this; - pt->rand = std::hash<std::thread::id>()(std::this_thread::get_id()); - pt->thread_id = thread_id; - Queue* q = queues_[thread_id]; - EventCount::Waiter* waiter = &waiters_[thread_id]; - for (;;) { - Task t = q->PopFront(); - if (!t.f) { - t = Steal(); - if (!t.f) { - // Leave one thread spinning. This reduces latency. - // TODO(dvyukov): 1000 iterations is based on fair dice roll, tune it. - // Also, the time it takes to attempt to steal work 1000 times depends - // on the size of the thread pool. However the speed at which the user - // of the thread pool submit tasks is independent of the size of the - // pool. Consider a time based limit instead. - if (!spinning_ && !spinning_.exchange(true)) { - for (int i = 0; i < 1000 && !t.f; i++) { - t = Steal(); - } - spinning_ = false; - } - if (!t.f) { - if (!WaitForWork(waiter, &t)) { - return; - } - } - } - } - if (t.f) { - env_.ExecuteTask(t); - } - } - } - - // Steal tries to steal work from other worker threads in best-effort manner. - Task Steal() { - PerThread* pt = GetPerThread(); - const size_t size = queues_.size(); - unsigned r = Rand(&pt->rand); - unsigned inc = coprimes_[r % coprimes_.size()]; - unsigned victim = r % size; - for (unsigned i = 0; i < size; i++) { - Task t = queues_[victim]->PopBack(); - if (t.f) { - return t; - } - victim += inc; - if (victim >= size) { - victim -= size; - } - } - return Task(); - } - - // WaitForWork blocks until new work is available (returns true), or if it is - // time to exit (returns false). Can optionally return a task to execute in t - // (in such case t.f != nullptr on return). - bool WaitForWork(EventCount::Waiter* waiter, Task* t) { - eigen_assert(!t->f); - // We already did best-effort emptiness check in Steal, so prepare for - // blocking. - ec_.Prewait(waiter); - // Now do a reliable emptiness check. - int victim = NonEmptyQueueIndex(); - if (victim != -1) { - ec_.CancelWait(waiter); - *t = queues_[victim]->PopBack(); - return true; - } - // Number of blocked threads is used as termination condition. - // If we are shutting down and all worker threads blocked without work, - // that's we are done. - blocked_++; - if (done_ && blocked_ == threads_.size()) { - ec_.CancelWait(waiter); - // Almost done, but need to re-check queues. - // Consider that all queues are empty and all worker threads are preempted - // right after incrementing blocked_ above. Now a free-standing thread - // submits work and calls destructor (which sets done_). If we don't - // re-check queues, we will exit leaving the work unexecuted. - if (NonEmptyQueueIndex() != -1) { - // Note: we must not pop from queues before we decrement blocked_, - // otherwise the following scenario is possible. Consider that instead - // of checking for emptiness we popped the only element from queues. - // Now other worker threads can start exiting, which is bad if the - // work item submits other work. So we just check emptiness here, - // which ensures that all worker threads exit at the same time. - blocked_--; - return true; - } - // Reached stable termination state. - ec_.Notify(true); - return false; - } - ec_.CommitWait(waiter); - blocked_--; - return true; - } - - int NonEmptyQueueIndex() { - PerThread* pt = GetPerThread(); - const size_t size = queues_.size(); - unsigned r = Rand(&pt->rand); - unsigned inc = coprimes_[r % coprimes_.size()]; - unsigned victim = r % size; - for (unsigned i = 0; i < size; i++) { - if (!queues_[victim]->Empty()) { - return victim; - } - victim += inc; - if (victim >= size) { - victim -= size; - } - } - return -1; - } - - static EIGEN_STRONG_INLINE PerThread* GetPerThread() { - EIGEN_THREAD_LOCAL PerThread per_thread_; - PerThread* pt = &per_thread_; - return pt; - } - - static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) { - uint64_t current = *state; - // Update the internal state - *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; - // Generate the random output (using the PCG-XSH-RS scheme) - return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61))); - } -}; - -typedef NonBlockingThreadPoolTempl<StlThreadEnvironment> NonBlockingThreadPool; - -} // namespace Eigen - -#endif // EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h deleted file mode 100644 index 05ed76c..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +++ /dev/null @@ -1,210 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ -#define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ - - -namespace Eigen { - -// RunQueue is a fixed-size, partially non-blocking deque or Work items. -// Operations on front of the queue must be done by a single thread (owner), -// operations on back of the queue can be done by multiple threads concurrently. -// -// Algorithm outline: -// All remote threads operating on the queue back are serialized by a mutex. -// This ensures that at most two threads access state: owner and one remote -// thread (Size aside). The algorithm ensures that the occupied region of the -// underlying array is logically continuous (can wraparound, but no stray -// occupied elements). Owner operates on one end of this region, remote thread -// operates on the other end. Synchronization between these threads -// (potential consumption of the last element and take up of the last empty -// element) happens by means of state variable in each element. States are: -// empty, busy (in process of insertion of removal) and ready. Threads claim -// elements (empty->busy and ready->busy transitions) by means of a CAS -// operation. The finishing transition (busy->empty and busy->ready) are done -// with plain store as the element is exclusively owned by the current thread. -// -// Note: we could permit only pointers as elements, then we would not need -// separate state variable as null/non-null pointer value would serve as state, -// but that would require malloc/free per operation for large, complex values -// (and this is designed to store std::function<()>). -template <typename Work, unsigned kSize> -class RunQueue { - public: - RunQueue() : front_(0), back_(0) { - // require power-of-two for fast masking - eigen_assert((kSize & (kSize - 1)) == 0); - eigen_assert(kSize > 2); // why would you do this? - eigen_assert(kSize <= (64 << 10)); // leave enough space for counter - for (unsigned i = 0; i < kSize; i++) - array_[i].state.store(kEmpty, std::memory_order_relaxed); - } - - ~RunQueue() { eigen_assert(Size() == 0); } - - // PushFront inserts w at the beginning of the queue. - // If queue is full returns w, otherwise returns default-constructed Work. - Work PushFront(Work w) { - unsigned front = front_.load(std::memory_order_relaxed); - Elem* e = &array_[front & kMask]; - uint8_t s = e->state.load(std::memory_order_relaxed); - if (s != kEmpty || - !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) - return w; - front_.store(front + 1 + (kSize << 1), std::memory_order_relaxed); - e->w = std::move(w); - e->state.store(kReady, std::memory_order_release); - return Work(); - } - - // PopFront removes and returns the first element in the queue. - // If the queue was empty returns default-constructed Work. - Work PopFront() { - unsigned front = front_.load(std::memory_order_relaxed); - Elem* e = &array_[(front - 1) & kMask]; - uint8_t s = e->state.load(std::memory_order_relaxed); - if (s != kReady || - !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) - return Work(); - Work w = std::move(e->w); - e->state.store(kEmpty, std::memory_order_release); - front = ((front - 1) & kMask2) | (front & ~kMask2); - front_.store(front, std::memory_order_relaxed); - return w; - } - - // PushBack adds w at the end of the queue. - // If queue is full returns w, otherwise returns default-constructed Work. - Work PushBack(Work w) { - std::unique_lock<std::mutex> lock(mutex_); - unsigned back = back_.load(std::memory_order_relaxed); - Elem* e = &array_[(back - 1) & kMask]; - uint8_t s = e->state.load(std::memory_order_relaxed); - if (s != kEmpty || - !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) - return w; - back = ((back - 1) & kMask2) | (back & ~kMask2); - back_.store(back, std::memory_order_relaxed); - e->w = std::move(w); - e->state.store(kReady, std::memory_order_release); - return Work(); - } - - // PopBack removes and returns the last elements in the queue. - // Can fail spuriously. - Work PopBack() { - if (Empty()) return Work(); - std::unique_lock<std::mutex> lock(mutex_, std::try_to_lock); - if (!lock) return Work(); - unsigned back = back_.load(std::memory_order_relaxed); - Elem* e = &array_[back & kMask]; - uint8_t s = e->state.load(std::memory_order_relaxed); - if (s != kReady || - !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire)) - return Work(); - Work w = std::move(e->w); - e->state.store(kEmpty, std::memory_order_release); - back_.store(back + 1 + (kSize << 1), std::memory_order_relaxed); - return w; - } - - // PopBackHalf removes and returns half last elements in the queue. - // Returns number of elements removed. But can also fail spuriously. - unsigned PopBackHalf(std::vector<Work>* result) { - if (Empty()) return 0; - std::unique_lock<std::mutex> lock(mutex_, std::try_to_lock); - if (!lock) return 0; - unsigned back = back_.load(std::memory_order_relaxed); - unsigned size = Size(); - unsigned mid = back; - if (size > 1) mid = back + (size - 1) / 2; - unsigned n = 0; - unsigned start = 0; - for (; static_cast<int>(mid - back) >= 0; mid--) { - Elem* e = &array_[mid & kMask]; - uint8_t s = e->state.load(std::memory_order_relaxed); - if (n == 0) { - if (s != kReady || - !e->state.compare_exchange_strong(s, kBusy, - std::memory_order_acquire)) - continue; - start = mid; - } else { - // Note: no need to store temporal kBusy, we exclusively own these - // elements. - eigen_assert(s == kReady); - } - result->push_back(std::move(e->w)); - e->state.store(kEmpty, std::memory_order_release); - n++; - } - if (n != 0) - back_.store(start + 1 + (kSize << 1), std::memory_order_relaxed); - return n; - } - - // Size returns current queue size. - // Can be called by any thread at any time. - unsigned Size() const { - // Emptiness plays critical role in thread pool blocking. So we go to great - // effort to not produce false positives (claim non-empty queue as empty). - for (;;) { - // Capture a consistent snapshot of front/tail. - unsigned front = front_.load(std::memory_order_acquire); - unsigned back = back_.load(std::memory_order_acquire); - unsigned front1 = front_.load(std::memory_order_relaxed); - if (front != front1) continue; - int size = (front & kMask2) - (back & kMask2); - // Fix overflow. - if (size < 0) size += 2 * kSize; - // Order of modification in push/pop is crafted to make the queue look - // larger than it is during concurrent modifications. E.g. pop can - // decrement size before the corresponding push has incremented it. - // So the computed size can be up to kSize + 1, fix it. - if (size > static_cast<int>(kSize)) size = kSize; - return size; - } - } - - // Empty tests whether container is empty. - // Can be called by any thread at any time. - bool Empty() const { return Size() == 0; } - - private: - static const unsigned kMask = kSize - 1; - static const unsigned kMask2 = (kSize << 1) - 1; - struct Elem { - std::atomic<uint8_t> state; - Work w; - }; - enum { - kEmpty, - kBusy, - kReady, - }; - std::mutex mutex_; - // Low log(kSize) + 1 bits in front_ and back_ contain rolling index of - // front/back, repsectively. The remaining bits contain modification counters - // that are incremented on Push operations. This allows us to (1) distinguish - // between empty and full conditions (if we would use log(kSize) bits for - // position, these conditions would be indistinguishable); (2) obtain - // consistent snapshot of front_/back_ for Size operation using the - // modification counters. - std::atomic<unsigned> front_; - std::atomic<unsigned> back_; - Elem array_[kSize]; - - RunQueue(const RunQueue&) = delete; - void operator=(const RunQueue&) = delete; -}; - -} // namespace Eigen - -#endif // EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h deleted file mode 100644 index e75d0f4..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h +++ /dev/null @@ -1,154 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_THREADPOOL_SIMPLE_THREAD_POOL_H -#define EIGEN_CXX11_THREADPOOL_SIMPLE_THREAD_POOL_H - -namespace Eigen { - -// The implementation of the ThreadPool type ensures that the Schedule method -// runs the functions it is provided in FIFO order when the scheduling is done -// by a single thread. -// Environment provides a way to create threads and also allows to intercept -// task submission and execution. -template <typename Environment> -class SimpleThreadPoolTempl : public ThreadPoolInterface { - public: - // Construct a pool that contains "num_threads" threads. - explicit SimpleThreadPoolTempl(int num_threads, Environment env = Environment()) - : env_(env), threads_(num_threads), waiters_(num_threads) { - for (int i = 0; i < num_threads; i++) { - threads_.push_back(env.CreateThread([this, i]() { WorkerLoop(i); })); - } - } - - // Wait until all scheduled work has finished and then destroy the - // set of threads. - ~SimpleThreadPoolTempl() { - { - // Wait for all work to get done. - std::unique_lock<std::mutex> l(mu_); - while (!pending_.empty()) { - empty_.wait(l); - } - exiting_ = true; - - // Wakeup all waiters. - for (auto w : waiters_) { - w->ready = true; - w->task.f = nullptr; - w->cv.notify_one(); - } - } - - // Wait for threads to finish. - for (auto t : threads_) { - delete t; - } - } - - // Schedule fn() for execution in the pool of threads. The functions are - // executed in the order in which they are scheduled. - void Schedule(std::function<void()> fn) final { - Task t = env_.CreateTask(std::move(fn)); - std::unique_lock<std::mutex> l(mu_); - if (waiters_.empty()) { - pending_.push_back(std::move(t)); - } else { - Waiter* w = waiters_.back(); - waiters_.pop_back(); - w->ready = true; - w->task = std::move(t); - w->cv.notify_one(); - } - } - - int NumThreads() const final { - return static_cast<int>(threads_.size()); - } - - int CurrentThreadId() const final { - const PerThread* pt = this->GetPerThread(); - if (pt->pool == this) { - return pt->thread_id; - } else { - return -1; - } - } - - protected: - void WorkerLoop(int thread_id) { - std::unique_lock<std::mutex> l(mu_); - PerThread* pt = GetPerThread(); - pt->pool = this; - pt->thread_id = thread_id; - Waiter w; - Task t; - while (!exiting_) { - if (pending_.empty()) { - // Wait for work to be assigned to me - w.ready = false; - waiters_.push_back(&w); - while (!w.ready) { - w.cv.wait(l); - } - t = w.task; - w.task.f = nullptr; - } else { - // Pick up pending work - t = std::move(pending_.front()); - pending_.pop_front(); - if (pending_.empty()) { - empty_.notify_all(); - } - } - if (t.f) { - mu_.unlock(); - env_.ExecuteTask(t); - t.f = nullptr; - mu_.lock(); - } - } - } - - private: - typedef typename Environment::Task Task; - typedef typename Environment::EnvThread Thread; - - struct Waiter { - std::condition_variable cv; - Task task; - bool ready; - }; - - struct PerThread { - constexpr PerThread() : pool(NULL), thread_id(-1) { } - SimpleThreadPoolTempl* pool; // Parent pool, or null for normal threads. - int thread_id; // Worker thread index in pool. - }; - - Environment env_; - std::mutex mu_; - MaxSizeVector<Thread*> threads_; // All threads - MaxSizeVector<Waiter*> waiters_; // Stack of waiting threads. - std::deque<Task> pending_; // Queue of pending work - std::condition_variable empty_; // Signaled on pending_.empty() - bool exiting_ = false; - - PerThread* GetPerThread() const { - EIGEN_THREAD_LOCAL PerThread per_thread; - return &per_thread; - } -}; - -typedef SimpleThreadPoolTempl<StlThreadEnvironment> SimpleThreadPool; - -} // namespace Eigen - -#endif // EIGEN_CXX11_THREADPOOL_SIMPLE_THREAD_POOL_H diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h deleted file mode 100644 index 399f95c..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +++ /dev/null @@ -1,38 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H -#define EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H - -namespace Eigen { - -struct StlThreadEnvironment { - struct Task { - std::function<void()> f; - }; - - // EnvThread constructor must start the thread, - // destructor must join the thread. - class EnvThread { - public: - EnvThread(std::function<void()> f) : thr_(std::move(f)) {} - ~EnvThread() { thr_.join(); } - - private: - std::thread thr_; - }; - - EnvThread* CreateThread(std::function<void()> f) { return new EnvThread(std::move(f)); } - Task CreateTask(std::function<void()> f) { return Task{std::move(f)}; } - void ExecuteTask(const Task& t) { t.f(); } -}; - -} // namespace Eigen - -#endif // EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h deleted file mode 100644 index cfa2217..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +++ /dev/null @@ -1,22 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H -#define EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H - -// Try to come up with a portable implementation of thread local variables -#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7) -#define EIGEN_THREAD_LOCAL static __thread -#elif EIGEN_COMP_CLANG -#define EIGEN_THREAD_LOCAL static __thread -#else -#define EIGEN_THREAD_LOCAL static thread_local -#endif - -#endif // EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h deleted file mode 100644 index a65ee97..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h +++ /dev/null @@ -1,33 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H -#define EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H - -namespace Eigen { - -// This defines an interface that ThreadPoolDevice can take to use -// custom thread pools underneath. -class ThreadPoolInterface { - public: - virtual void Schedule(std::function<void()> fn) = 0; - - // Returns the number of threads in the pool. - virtual int NumThreads() const = 0; - - // Returns a logical thread index between 0 and NumThreads() - 1 if called - // from one of the threads in the pool. Returns -1 otherwise. - virtual int CurrentThreadId() const = 0; - - virtual ~ThreadPoolInterface() {} -}; - -} // namespace Eigen - -#endif // EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h deleted file mode 100644 index a859c7b..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h +++ /dev/null @@ -1,20 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H -#define EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H - -// Try to come up with a portable way to yield -#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7) -#define EIGEN_THREAD_YIELD() sched_yield() -#else -#define EIGEN_THREAD_YIELD() std::this_thread::yield() -#endif - -#endif // EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H diff --git a/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h b/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h deleted file mode 100644 index ec27edd..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h +++ /dev/null @@ -1,542 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11META_H -#define EIGEN_CXX11META_H - -#include <vector> -#include "EmulateArray.h" - -// Emulate the cxx11 functionality that we need if the compiler doesn't support it. -// Visual studio 2015 doesn't advertise itself as cxx11 compliant, although it -// supports enough of the standard for our needs -#if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900 - -#include "CXX11Workarounds.h" - -namespace Eigen { - -namespace internal { - -/** \internal - * \file CXX11/util/CXX11Meta.h - * This file contains generic metaprogramming classes which are not specifically related to Eigen. - * This file expands upon Core/util/Meta.h and adds support for C++11 specific features. - */ - -template<typename... tt> -struct type_list { constexpr static int count = sizeof...(tt); }; - -template<typename t, typename... tt> -struct type_list<t, tt...> { constexpr static int count = sizeof...(tt) + 1; typedef t first_type; }; - -template<typename T, T... nn> -struct numeric_list { constexpr static std::size_t count = sizeof...(nn); }; - -template<typename T, T n, T... nn> -struct numeric_list<T, n, nn...> { constexpr static std::size_t count = sizeof...(nn) + 1; constexpr static T first_value = n; }; - -/* numeric list constructors - * - * equivalencies: - * constructor result - * typename gen_numeric_list<int, 5>::type numeric_list<int, 0,1,2,3,4> - * typename gen_numeric_list_reversed<int, 5>::type numeric_list<int, 4,3,2,1,0> - * typename gen_numeric_list_swapped_pair<int, 5,1,2>::type numeric_list<int, 0,2,1,3,4> - * typename gen_numeric_list_repeated<int, 0, 5>::type numeric_list<int, 0,0,0,0,0> - */ - -template<typename T, std::size_t n, T start = 0, T... ii> struct gen_numeric_list : gen_numeric_list<T, n-1, start, start + n-1, ii...> {}; -template<typename T, T start, T... ii> struct gen_numeric_list<T, 0, start, ii...> { typedef numeric_list<T, ii...> type; }; - -template<typename T, std::size_t n, T start = 0, T... ii> struct gen_numeric_list_reversed : gen_numeric_list_reversed<T, n-1, start, ii..., start + n-1> {}; -template<typename T, T start, T... ii> struct gen_numeric_list_reversed<T, 0, start, ii...> { typedef numeric_list<T, ii...> type; }; - -template<typename T, std::size_t n, T a, T b, T start = 0, T... ii> struct gen_numeric_list_swapped_pair : gen_numeric_list_swapped_pair<T, n-1, a, b, start, (start + n-1) == a ? b : ((start + n-1) == b ? a : (start + n-1)), ii...> {}; -template<typename T, T a, T b, T start, T... ii> struct gen_numeric_list_swapped_pair<T, 0, a, b, start, ii...> { typedef numeric_list<T, ii...> type; }; - -template<typename T, std::size_t n, T V, T... nn> struct gen_numeric_list_repeated : gen_numeric_list_repeated<T, n-1, V, V, nn...> {}; -template<typename T, T V, T... nn> struct gen_numeric_list_repeated<T, 0, V, nn...> { typedef numeric_list<T, nn...> type; }; - -/* list manipulation: concatenate */ - -template<class a, class b> struct concat; - -template<typename... as, typename... bs> struct concat<type_list<as...>, type_list<bs...>> { typedef type_list<as..., bs...> type; }; -template<typename T, T... as, T... bs> struct concat<numeric_list<T, as...>, numeric_list<T, bs...> > { typedef numeric_list<T, as..., bs...> type; }; - -template<typename... p> struct mconcat; -template<typename a> struct mconcat<a> { typedef a type; }; -template<typename a, typename b> struct mconcat<a, b> : concat<a, b> {}; -template<typename a, typename b, typename... cs> struct mconcat<a, b, cs...> : concat<a, typename mconcat<b, cs...>::type> {}; - -/* list manipulation: extract slices */ - -template<int n, typename x> struct take; -template<int n, typename a, typename... as> struct take<n, type_list<a, as...>> : concat<type_list<a>, typename take<n-1, type_list<as...>>::type> {}; -template<int n> struct take<n, type_list<>> { typedef type_list<> type; }; -template<typename a, typename... as> struct take<0, type_list<a, as...>> { typedef type_list<> type; }; -template<> struct take<0, type_list<>> { typedef type_list<> type; }; - -template<typename T, int n, T a, T... as> struct take<n, numeric_list<T, a, as...>> : concat<numeric_list<T, a>, typename take<n-1, numeric_list<T, as...>>::type> {}; -template<typename T, int n> struct take<n, numeric_list<T>> { typedef numeric_list<T> type; }; -template<typename T, T a, T... as> struct take<0, numeric_list<T, a, as...>> { typedef numeric_list<T> type; }; -template<typename T> struct take<0, numeric_list<T>> { typedef numeric_list<T> type; }; - -template<typename T, int n, T... ii> struct h_skip_helper_numeric; -template<typename T, int n, T i, T... ii> struct h_skip_helper_numeric<T, n, i, ii...> : h_skip_helper_numeric<T, n-1, ii...> {}; -template<typename T, T i, T... ii> struct h_skip_helper_numeric<T, 0, i, ii...> { typedef numeric_list<T, i, ii...> type; }; -template<typename T, int n> struct h_skip_helper_numeric<T, n> { typedef numeric_list<T> type; }; -template<typename T> struct h_skip_helper_numeric<T, 0> { typedef numeric_list<T> type; }; - -template<int n, typename... tt> struct h_skip_helper_type; -template<int n, typename t, typename... tt> struct h_skip_helper_type<n, t, tt...> : h_skip_helper_type<n-1, tt...> {}; -template<typename t, typename... tt> struct h_skip_helper_type<0, t, tt...> { typedef type_list<t, tt...> type; }; -template<int n> struct h_skip_helper_type<n> { typedef type_list<> type; }; -template<> struct h_skip_helper_type<0> { typedef type_list<> type; }; - -template<int n> -struct h_skip { - template<typename T, T... ii> - constexpr static inline typename h_skip_helper_numeric<T, n, ii...>::type helper(numeric_list<T, ii...>) { return typename h_skip_helper_numeric<T, n, ii...>::type(); } - template<typename... tt> - constexpr static inline typename h_skip_helper_type<n, tt...>::type helper(type_list<tt...>) { return typename h_skip_helper_type<n, tt...>::type(); } -}; - -template<int n, typename a> struct skip { typedef decltype(h_skip<n>::helper(a())) type; }; - -template<int start, int count, typename a> struct slice : take<count, typename skip<start, a>::type> {}; - -/* list manipulation: retrieve single element from list */ - -template<int n, typename x> struct get; - -template<int n, typename a, typename... as> struct get<n, type_list<a, as...>> : get<n-1, type_list<as...>> {}; -template<typename a, typename... as> struct get<0, type_list<a, as...>> { typedef a type; }; - -template<typename T, int n, T a, T... as> struct get<n, numeric_list<T, a, as...>> : get<n-1, numeric_list<T, as...>> {}; -template<typename T, T a, T... as> struct get<0, numeric_list<T, a, as...>> { constexpr static T value = a; }; - -/* always get type, regardless of dummy; good for parameter pack expansion */ - -template<typename T, T dummy, typename t> struct id_numeric { typedef t type; }; -template<typename dummy, typename t> struct id_type { typedef t type; }; - -/* equality checking, flagged version */ - -template<typename a, typename b> struct is_same_gf : is_same<a, b> { constexpr static int global_flags = 0; }; - -/* apply_op to list */ - -template< - bool from_left, // false - template<typename, typename> class op, - typename additional_param, - typename... values -> -struct h_apply_op_helper { typedef type_list<typename op<values, additional_param>::type...> type; }; -template< - template<typename, typename> class op, - typename additional_param, - typename... values -> -struct h_apply_op_helper<true, op, additional_param, values...> { typedef type_list<typename op<additional_param, values>::type...> type; }; - -template< - bool from_left, - template<typename, typename> class op, - typename additional_param -> -struct h_apply_op -{ - template<typename... values> - constexpr static typename h_apply_op_helper<from_left, op, additional_param, values...>::type helper(type_list<values...>) - { return typename h_apply_op_helper<from_left, op, additional_param, values...>::type(); } -}; - -template< - template<typename, typename> class op, - typename additional_param, - typename a -> -struct apply_op_from_left { typedef decltype(h_apply_op<true, op, additional_param>::helper(a())) type; }; - -template< - template<typename, typename> class op, - typename additional_param, - typename a -> -struct apply_op_from_right { typedef decltype(h_apply_op<false, op, additional_param>::helper(a())) type; }; - -/* see if an element is in a list */ - -template< - template<typename, typename> class test, - typename check_against, - typename h_list, - bool last_check_positive = false -> -struct contained_in_list; - -template< - template<typename, typename> class test, - typename check_against, - typename h_list -> -struct contained_in_list<test, check_against, h_list, true> -{ - constexpr static bool value = true; -}; - -template< - template<typename, typename> class test, - typename check_against, - typename a, - typename... as -> -struct contained_in_list<test, check_against, type_list<a, as...>, false> : contained_in_list<test, check_against, type_list<as...>, test<check_against, a>::value> {}; - -template< - template<typename, typename> class test, - typename check_against - EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty) -> -struct contained_in_list<test, check_against, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, false> { constexpr static bool value = false; }; - -/* see if an element is in a list and check for global flags */ - -template< - template<typename, typename> class test, - typename check_against, - typename h_list, - int default_flags = 0, - bool last_check_positive = false, - int last_check_flags = default_flags -> -struct contained_in_list_gf; - -template< - template<typename, typename> class test, - typename check_against, - typename h_list, - int default_flags, - int last_check_flags -> -struct contained_in_list_gf<test, check_against, h_list, default_flags, true, last_check_flags> -{ - constexpr static bool value = true; - constexpr static int global_flags = last_check_flags; -}; - -template< - template<typename, typename> class test, - typename check_against, - typename a, - typename... as, - int default_flags, - int last_check_flags -> -struct contained_in_list_gf<test, check_against, type_list<a, as...>, default_flags, false, last_check_flags> : contained_in_list_gf<test, check_against, type_list<as...>, default_flags, test<check_against, a>::value, test<check_against, a>::global_flags> {}; - -template< - template<typename, typename> class test, - typename check_against - EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty), - int default_flags, - int last_check_flags -> -struct contained_in_list_gf<test, check_against, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, default_flags, false, last_check_flags> { constexpr static bool value = false; constexpr static int global_flags = default_flags; }; - -/* generic reductions */ - -template< - typename Reducer, - typename... Ts -> struct reduce; - -template< - typename Reducer -> struct reduce<Reducer> -{ - constexpr static inline int run() { return Reducer::Identity; } -}; - -template< - typename Reducer, - typename A -> struct reduce<Reducer, A> -{ - constexpr static inline A run(A a) { return a; } -}; - -template< - typename Reducer, - typename A, - typename... Ts -> struct reduce<Reducer, A, Ts...> -{ - constexpr static inline auto run(A a, Ts... ts) -> decltype(Reducer::run(a, reduce<Reducer, Ts...>::run(ts...))) { - return Reducer::run(a, reduce<Reducer, Ts...>::run(ts...)); - } -}; - -/* generic binary operations */ - -struct sum_op { - template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; } - static constexpr int Identity = 0; -}; -struct product_op { - template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; } - static constexpr int Identity = 1; -}; - -struct logical_and_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a && b) { return a && b; } }; -struct logical_or_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a || b) { return a || b; } }; - -struct equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a == b) { return a == b; } }; -struct not_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a != b) { return a != b; } }; -struct lesser_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a < b) { return a < b; } }; -struct lesser_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a <= b) { return a <= b; } }; -struct greater_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a > b) { return a > b; } }; -struct greater_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a >= b) { return a >= b; } }; - -/* generic unary operations */ - -struct not_op { template<typename A> constexpr static inline auto run(A a) -> decltype(!a) { return !a; } }; -struct negation_op { template<typename A> constexpr static inline auto run(A a) -> decltype(-a) { return -a; } }; -struct greater_equal_zero_op { template<typename A> constexpr static inline auto run(A a) -> decltype(a >= 0) { return a >= 0; } }; - - -/* reductions for lists */ - -// using auto -> return value spec makes ICC 13.0 and 13.1 crash here, so we have to hack it -// together in front... (13.0 doesn't work with array_prod/array_reduce/... anyway, but 13.1 -// does... -template<typename... Ts> -constexpr inline decltype(reduce<product_op, Ts...>::run((*((Ts*)0))...)) arg_prod(Ts... ts) -{ - return reduce<product_op, Ts...>::run(ts...); -} - -template<typename... Ts> -constexpr inline decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts... ts) -{ - return reduce<sum_op, Ts...>::run(ts...); -} - -/* reverse arrays */ - -template<typename Array, int... n> -constexpr inline Array h_array_reverse(Array arr, numeric_list<int, n...>) -{ - return {{array_get<sizeof...(n) - n - 1>(arr)...}}; -} - -template<typename T, std::size_t N> -constexpr inline array<T, N> array_reverse(array<T, N> arr) -{ - return h_array_reverse(arr, typename gen_numeric_list<int, N>::type()); -} - - -/* generic array reductions */ - -// can't reuse standard reduce() interface above because Intel's Compiler -// *really* doesn't like it, so we just reimplement the stuff -// (start from N - 1 and work down to 0 because specialization for -// n == N - 1 also doesn't work in Intel's compiler, so it goes into -// an infinite loop) -template<typename Reducer, typename T, std::size_t N, std::size_t n = N - 1> -struct h_array_reduce { - EIGEN_DEVICE_FUNC constexpr static inline auto run(array<T, N> arr, T identity) -> decltype(Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr))) - { - return Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr)); - } -}; - -template<typename Reducer, typename T, std::size_t N> -struct h_array_reduce<Reducer, T, N, 0> -{ - EIGEN_DEVICE_FUNC constexpr static inline T run(const array<T, N>& arr, T) - { - return array_get<0>(arr); - } -}; - -template<typename Reducer, typename T> -struct h_array_reduce<Reducer, T, 0> -{ - EIGEN_DEVICE_FUNC constexpr static inline T run(const array<T, 0>&, T identity) - { - return identity; - } -}; - -template<typename Reducer, typename T, std::size_t N> -EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array<T, N>& arr, T identity) -> decltype(h_array_reduce<Reducer, T, N>::run(arr, identity)) -{ - return h_array_reduce<Reducer, T, N>::run(arr, identity); -} - -/* standard array reductions */ - -template<typename T, std::size_t N> -EIGEN_DEVICE_FUNC constexpr inline auto array_sum(const array<T, N>& arr) -> decltype(array_reduce<sum_op, T, N>(arr, static_cast<T>(0))) -{ - return array_reduce<sum_op, T, N>(arr, static_cast<T>(0)); -} - -template<typename T, std::size_t N> -EIGEN_DEVICE_FUNC constexpr inline auto array_prod(const array<T, N>& arr) -> decltype(array_reduce<product_op, T, N>(arr, static_cast<T>(1))) -{ - return array_reduce<product_op, T, N>(arr, static_cast<T>(1)); -} - -template<typename t> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) { - eigen_assert(a.size() > 0); - t prod = 1; - for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; } - return prod; -} - -/* zip an array */ - -template<typename Op, typename A, typename B, std::size_t N, int... n> -constexpr inline array<decltype(Op::run(A(), B())),N> h_array_zip(array<A, N> a, array<B, N> b, numeric_list<int, n...>) -{ - return array<decltype(Op::run(A(), B())),N>{{ Op::run(array_get<n>(a), array_get<n>(b))... }}; -} - -template<typename Op, typename A, typename B, std::size_t N> -constexpr inline array<decltype(Op::run(A(), B())),N> array_zip(array<A, N> a, array<B, N> b) -{ - return h_array_zip<Op>(a, b, typename gen_numeric_list<int, N>::type()); -} - -/* zip an array and reduce the result */ - -template<typename Reducer, typename Op, typename A, typename B, std::size_t N, int... n> -constexpr inline auto h_array_zip_and_reduce(array<A, N> a, array<B, N> b, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...)) -{ - return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...); -} - -template<typename Reducer, typename Op, typename A, typename B, std::size_t N> -constexpr inline auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decltype(h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type())) -{ - return h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type()); -} - -/* apply stuff to an array */ - -template<typename Op, typename A, std::size_t N, int... n> -constexpr inline array<decltype(Op::run(A())),N> h_array_apply(array<A, N> a, numeric_list<int, n...>) -{ - return array<decltype(Op::run(A())),N>{{ Op::run(array_get<n>(a))... }}; -} - -template<typename Op, typename A, std::size_t N> -constexpr inline array<decltype(Op::run(A())),N> array_apply(array<A, N> a) -{ - return h_array_apply<Op>(a, typename gen_numeric_list<int, N>::type()); -} - -/* apply stuff to an array and reduce */ - -template<typename Reducer, typename Op, typename A, std::size_t N, int... n> -constexpr inline auto h_array_apply_and_reduce(array<A, N> arr, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...)) -{ - return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...); -} - -template<typename Reducer, typename Op, typename A, std::size_t N> -constexpr inline auto array_apply_and_reduce(array<A, N> a) -> decltype(h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type())) -{ - return h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type()); -} - -/* repeat a value n times (and make an array out of it - * usage: - * array<int, 16> = repeat<16>(42); - */ - -template<int n> -struct h_repeat -{ - template<typename t, int... ii> - constexpr static inline array<t, n> run(t v, numeric_list<int, ii...>) - { - return {{ typename id_numeric<int, ii, t>::type(v)... }}; - } -}; - -template<int n, typename t> -constexpr array<t, n> repeat(t v) { return h_repeat<n>::run(v, typename gen_numeric_list<int, n>::type()); } - -/* instantiate a class by a C-style array */ -template<class InstType, typename ArrType, std::size_t N, bool Reverse, typename... Ps> -struct h_instantiate_by_c_array; - -template<class InstType, typename ArrType, std::size_t N, typename... Ps> -struct h_instantiate_by_c_array<InstType, ArrType, N, false, Ps...> -{ - static InstType run(ArrType* arr, Ps... args) - { - return h_instantiate_by_c_array<InstType, ArrType, N - 1, false, Ps..., ArrType>::run(arr + 1, args..., arr[0]); - } -}; - -template<class InstType, typename ArrType, std::size_t N, typename... Ps> -struct h_instantiate_by_c_array<InstType, ArrType, N, true, Ps...> -{ - static InstType run(ArrType* arr, Ps... args) - { - return h_instantiate_by_c_array<InstType, ArrType, N - 1, false, ArrType, Ps...>::run(arr + 1, arr[0], args...); - } -}; - -template<class InstType, typename ArrType, typename... Ps> -struct h_instantiate_by_c_array<InstType, ArrType, 0, false, Ps...> -{ - static InstType run(ArrType* arr, Ps... args) - { - (void)arr; - return InstType(args...); - } -}; - -template<class InstType, typename ArrType, typename... Ps> -struct h_instantiate_by_c_array<InstType, ArrType, 0, true, Ps...> -{ - static InstType run(ArrType* arr, Ps... args) - { - (void)arr; - return InstType(args...); - } -}; - -template<class InstType, typename ArrType, std::size_t N, bool Reverse = false> -InstType instantiate_by_c_array(ArrType* arr) -{ - return h_instantiate_by_c_array<InstType, ArrType, N, Reverse>::run(arr); -} - -} // end namespace internal - -} // end namespace Eigen - -#else // Non C++11, fallback to emulation mode - -#include "EmulateCXX11Meta.h" - -#endif - -#endif // EIGEN_CXX11META_H diff --git a/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h b/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h deleted file mode 100644 index fe4d228..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h +++ /dev/null @@ -1,88 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11WORKAROUNDS_H -#define EIGEN_CXX11WORKAROUNDS_H - -/* COMPATIBILITY CHECKS - * (so users of compilers that are too old get some realistic error messages) - */ -#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1310) -#error Intel Compiler only supports required C++ features since version 13.1. -// note that most stuff in principle works with 13.0 but when combining -// some features, at some point 13.0 will just fail with an internal assertion -#elif defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6)) -// G++ < 4.6 by default will continue processing the source files - even if we use #error to make -// it error out. For this reason, we use the pragma to make sure G++ aborts at the first error -// it sees. Unfortunately, that is still not our #error directive, but at least the output is -// short enough the user has a chance to see that the compiler version is not sufficient for -// the funky template mojo we use. -#pragma GCC diagnostic error "-Wfatal-errors" -#error GNU C++ Compiler (g++) only supports required C++ features since version 4.6. -#endif - -/* Check that the compiler at least claims to support C++11. It might not be sufficient - * because the compiler may not implement it correctly, but at least we'll know. - * On the other hand, visual studio still doesn't claim to support C++11 although it's - * compliant enugh for our purpose. - */ -#if (__cplusplus <= 199711L) && (EIGEN_COMP_MSVC < 1900) -#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) -#pragma GCC diagnostic error "-Wfatal-errors" -#endif -#error This library needs at least a C++11 compliant compiler. If you use g++/clang, please enable the -std=c++11 compiler flag. (-std=c++0x on older versions.) -#endif - -namespace Eigen { - -namespace internal { - -/* std::get is only constexpr in C++14, not yet in C++11 - */ - - -template<std::size_t I, class T> constexpr inline T& array_get(std::vector<T>& a) { return a[I]; } -template<std::size_t I, class T> constexpr inline T&& array_get(std::vector<T>&& a) { return a[I]; } -template<std::size_t I, class T> constexpr inline T const& array_get(std::vector<T> const& a) { return a[I]; } - -/* Suppose you have a template of the form - * template<typename T> struct X; - * And you want to specialize it in such a way: - * template<typename S1, typename... SN> struct X<Foo<S1, SN...>> { ::: }; - * template<> struct X<Foo<>> { ::: }; - * This will work in Intel's compiler 13.0, but only to some extent in g++ 4.6, since - * g++ can only match templates called with parameter packs if the number of template - * arguments is not a fixed size (so inside the first specialization, referencing - * X<Foo<Sn...>> will fail in g++). On the other hand, g++ will accept the following: - * template<typename S...> struct X<Foo<S...>> { ::: }: - * as an additional (!) specialization, which will then only match the empty case. - * But Intel's compiler 13.0 won't accept that, it will only accept the empty syntax, - * so we have to create a workaround for this. - */ -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) -#define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) mt... n -#define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n) , EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) -#define EIGEN_TPL_PP_SPEC_HACK_USE(n) n... -#define EIGEN_TPL_PP_SPEC_HACK_USEC(n) , n... -#else -#define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) -#define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n) -#define EIGEN_TPL_PP_SPEC_HACK_USE(n) -#define EIGEN_TPL_PP_SPEC_HACK_USEC(n) -#endif - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11WORKAROUNDS_H - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h b/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h deleted file mode 100644 index 30d3ebc..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h +++ /dev/null @@ -1,267 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_EMULATE_ARRAY_H -#define EIGEN_EMULATE_ARRAY_H - - - -// The array class is only available starting with cxx11. Emulate our own here -// if needed. Beware, msvc still doesn't advertise itself as a c++11 compiler! -// Moreover, CUDA doesn't support the STL containers, so we use our own instead. -#if (__cplusplus <= 199711L && EIGEN_COMP_MSVC < 1900) || defined(__CUDACC__) || defined(EIGEN_AVOID_STL_ARRAY) - -namespace Eigen { -template <typename T, size_t n> class array { - public: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& operator[] (size_t index) { return values[index]; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& front() { return values[0]; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& front() const { return values[0]; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& back() { return values[n-1]; } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& back() const { return values[n-1]; } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE - static std::size_t size() { return n; } - - T values[n]; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array() { } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v) { - EIGEN_STATIC_ASSERT(n==1, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2) { - EIGEN_STATIC_ASSERT(n==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3) { - EIGEN_STATIC_ASSERT(n==3, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, - const T& v4) { - EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5) { - EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6) { - EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6, const T& v7) { - EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - values[6] = v7; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array( - const T& v1, const T& v2, const T& v3, const T& v4, - const T& v5, const T& v6, const T& v7, const T& v8) { - EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE) - values[0] = v1; - values[1] = v2; - values[2] = v3; - values[3] = v4; - values[4] = v5; - values[5] = v6; - values[6] = v7; - values[7] = v8; - } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array(std::initializer_list<T> l) { - eigen_assert(l.size() == n); - internal::smart_copy(l.begin(), l.end(), values); - } -#endif -}; - - -// Specialize array for zero size -template <typename T> class array<T, 0> { - public: - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& operator[] (size_t) { - eigen_assert(false && "Can't index a zero size array"); - return dummy; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& operator[] (size_t) const { - eigen_assert(false && "Can't index a zero size array"); - return dummy; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& front() { - eigen_assert(false && "Can't index a zero size array"); - return dummy; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& front() const { - eigen_assert(false && "Can't index a zero size array"); - return dummy; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE T& back() { - eigen_assert(false && "Can't index a zero size array"); - return dummy; - } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const T& back() const { - eigen_assert(false && "Can't index a zero size array"); - return dummy; - } - - static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE array() : dummy() { } - -#if EIGEN_HAS_VARIADIC_TEMPLATES - EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) : dummy() { - eigen_assert(l.size() == 0); - } -#endif - - private: - T dummy; -}; - -// Comparison operator -// Todo: implement !=, <, <=, >, and >= -template<class T, std::size_t N> -EIGEN_DEVICE_FUNC bool operator==(const array<T,N>& lhs, const array<T,N>& rhs) { - for (std::size_t i = 0; i < N; ++i) { - if (lhs[i] != rhs[i]) { - return false; - } - } - return true; -} - - -namespace internal { -template<std::size_t I, class T, std::size_t N> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array<T,N>& a) { - return a[I]; -} -template<std::size_t I, class T, std::size_t N> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array<T,N>& a) { - return a[I]; -} - -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<array<T,N> > { - static const size_t value = N; -}; -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<array<T,N>& > { - static const size_t value = N; -}; -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<const array<T,N> > { - static const size_t value = N; -}; -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<const array<T,N>& > { - static const size_t value = N; -}; - -} // end namespace internal -} // end namespace Eigen - -#else - -// The compiler supports c++11, and we're not targetting cuda: use std::array as Eigen::array -#include <array> -namespace Eigen { - -template <typename T, std::size_t N> using array = std::array<T, N>; - -namespace internal { -/* std::get is only constexpr in C++14, not yet in C++11 - * - libstdc++ from version 4.7 onwards has it nevertheless, - * so use that - * - libstdc++ older versions: use _M_instance directly - * - libc++ all versions so far: use __elems_ directly - * - all other libs: use std::get to be portable, but - * this may not be constexpr - */ -#if defined(__GLIBCXX__) && __GLIBCXX__ < 20120322 -#define STD_GET_ARR_HACK a._M_instance[I] -#elif defined(_LIBCPP_VERSION) -#define STD_GET_ARR_HACK a.__elems_[I] -#else -#define STD_GET_ARR_HACK std::template get<I, T, N>(a) -#endif - -template<std::size_t I, class T, std::size_t N> constexpr inline T& array_get(std::array<T,N>& a) { return (T&) STD_GET_ARR_HACK; } -template<std::size_t I, class T, std::size_t N> constexpr inline T&& array_get(std::array<T,N>&& a) { return (T&&) STD_GET_ARR_HACK; } -template<std::size_t I, class T, std::size_t N> constexpr inline T const& array_get(std::array<T,N> const& a) { return (T const&) STD_GET_ARR_HACK; } - -#undef STD_GET_ARR_HACK - -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<const std::array<T,N> > { - static const size_t value = N; -}; -template <typename T> struct array_size; -template<class T, std::size_t N> struct array_size<std::array<T,N> > { - static const size_t value = N; -}; -} // end namespace internal -} // end namespace Eigen - -#endif - -#endif // EIGEN_EMULATE_ARRAY_H diff --git a/eigen/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h b/eigen/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h deleted file mode 100644 index 8a536fa..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h +++ /dev/null @@ -1,311 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_EMULATE_CXX11_META_H -#define EIGEN_EMULATE_CXX11_META_H - - - -namespace Eigen { - -namespace internal { - -/** \internal - * \file CXX11/util/EmulateCXX11Meta.h - * This file emulates a subset of the functionality provided by CXXMeta.h for - * compilers that don't yet support cxx11 such as nvcc. - */ - -struct empty_list { static const std::size_t count = 0; }; - -template<typename T, typename Tail=empty_list> struct type_list { - typedef T HeadType; - typedef Tail TailType; - static const T head; - static const Tail tail; - static const std::size_t count = 1 + Tail::count; -}; - -struct null_type { }; - -template<typename T1 = null_type, typename T2 = null_type, typename T3 = null_type, - typename T4 = null_type, typename T5 = null_type, typename T6 = null_type, - typename T7 = null_type, typename T8 = null_type> -struct make_type_list { - typedef typename make_type_list<T2, T3, T4, T5, T6, T7, T8>::type tailresult; - - typedef type_list<T1, tailresult> type; -}; - -template<> struct make_type_list<> { - typedef empty_list type; -}; - - -template <std::size_t index, class TList> struct get_type; - -template <class Head, class Tail> -struct get_type<0, type_list<Head, Tail> > -{ - typedef Head type; -}; - -template <std::size_t i, class Head, class Tail> -struct get_type<i, type_list<Head, Tail> > -{ - typedef typename get_type<i-1, Tail>::type type; -}; - - -/* numeric list */ -template <typename T, T n> -struct type2val { - typedef T type; - static const T value = n; -}; - - -template<typename T, size_t n, T V> struct gen_numeric_list_repeated; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 1, V> { - typedef typename make_type_list<type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 2, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 3, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 4, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 5, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 6, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, - type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 7, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, - type2val<T, V>, type2val<T, V>, type2val<T, V>, - type2val<T, V> >::type type; -}; - -template<typename T, T V> struct gen_numeric_list_repeated<T, 8, V> { - typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, - type2val<T, V>, type2val<T, V>, type2val<T, V>, - type2val<T, V>, type2val<T, V> >::type type; -}; - - -template <std::size_t index, class NList> struct get; - -template <std::size_t i> -struct get<i, empty_list> -{ - get() { eigen_assert(false && "index overflow"); } - typedef void type; - static const char value = '\0'; -}; - -template <std::size_t i, class Head> -struct get<i, type_list<Head, empty_list> > -{ - get() { eigen_assert(false && "index overflow"); } - typedef void type; - static const char value = '\0'; -}; - -template <class Head> -struct get<0, type_list<Head, empty_list> > -{ - typedef typename Head::type type; - static const type value = Head::value; -}; - -template <class Head, class Tail> -struct get<0, type_list<Head, Tail> > -{ - typedef typename Head::type type; - static const type value = Head::value; -}; - -template <std::size_t i, class Head, class Tail> -struct get<i, type_list<Head, Tail> > -{ - typedef typename Tail::HeadType::type type; - static const type value = get<i-1, Tail>::value; -}; - - -template <class NList> struct arg_prod { - static const typename NList::HeadType::type value = get<0, NList>::value * arg_prod<typename NList::TailType>::value; -}; -template <> struct arg_prod<empty_list> { - static const int value = 1; -}; - - -template<int n, typename t> -array<t, n> repeat(t v) { - array<t, n> array; - array.fill(v); - return array; -} - -template<std::size_t I, class Head, class Tail> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list<Head, Tail>&) { - return get<I, type_list<Head, Tail> >::value; -} -template<std::size_t I, class Head, class Tail> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list<Head, Tail>&) { - return get<I, type_list<Head, Tail> >::value; -} - -template <class NList> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList&) { - return arg_prod<NList>::value; -} - -template<typename t, std::size_t n> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array<t, n>& a) { - t prod = 1; - for (size_t i = 0; i < n; ++i) { prod *= a[i]; } - return prod; -} -template<typename t> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array<t, 0>& /*a*/) { - return 1; -} - -template<typename t> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) { - eigen_assert(a.size() > 0); - t prod = 1; - for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; } - return prod; -} - - -template<std::size_t I, class T> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(std::vector<T>& a) { - return a[I]; -} -template<std::size_t I, class T> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const std::vector<T>& a) { - return a[I]; -} - -struct sum_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a + b; } -}; -struct product_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a * b; } -}; - -struct logical_and_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a && b; } -}; -struct logical_or_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a || b; } -}; - -struct equal_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a == b; } -}; -struct not_equal_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a != b; } -}; -struct lesser_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a < b; } -}; -struct lesser_equal_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a <= b; } -}; - -struct greater_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a > b; } -}; -struct greater_equal_op { - template<typename A, typename B> static inline bool run(A a, B b) { return a >= b; } -}; - -struct not_op { - template<typename A> static inline bool run(A a) { return !a; } -}; -struct negation_op { - template<typename A> static inline bool run(A a) { return -a; } -}; -struct greater_equal_zero_op { - template<typename A> static inline bool run(A a) { return a >= 0; } -}; - - -template<typename Reducer, typename Op, typename A, std::size_t N> -struct ArrayApplyAndReduce { - static inline bool run(const array<A, N>& a) { - EIGEN_STATIC_ASSERT(N >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE); - bool result = Reducer::run(Op::run(a[0]), Op::run(a[1])); - for (size_t i = 2; i < N; ++i) { - result = Reducer::run(result, Op::run(a[i])); - } - return result; - } -}; - -template<typename Reducer, typename Op, typename A> -struct ArrayApplyAndReduce<Reducer, Op, A, 1> { - static inline bool run(const array<A, 1>& a) { - return Op::run(a[0]); - } -}; - -template<typename Reducer, typename Op, typename A, std::size_t N> -inline bool array_apply_and_reduce(const array<A, N>& a) { - return ArrayApplyAndReduce<Reducer, Op, A, N>::run(a); -} - -template<typename Reducer, typename Op, typename A, typename B, std::size_t N> -struct ArrayZipAndReduce { - static inline bool run(const array<A, N>& a, const array<B, N>& b) { - EIGEN_STATIC_ASSERT(N >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE); - bool result = Reducer::run(Op::run(a[0], b[0]), Op::run(a[1], b[1])); - for (size_t i = 2; i < N; ++i) { - result = Reducer::run(result, Op::run(a[i], b[i])); - } - return result; - } -}; - -template<typename Reducer, typename Op, typename A, typename B> -struct ArrayZipAndReduce<Reducer, Op, A, B, 1> { - static inline bool run(const array<A, 1>& a, const array<B, 1>& b) { - return Op::run(a[0], b[0]); - } -}; - -template<typename Reducer, typename Op, typename A, typename B, std::size_t N> -inline bool array_zip_and_reduce(const array<A, N>& a, const array<B, N>& b) { - return ArrayZipAndReduce<Reducer, Op, A, B, N>::run(a, b); -} - -} // end namespace internal - -} // end namespace Eigen - - - -#endif // EIGEN_EMULATE_CXX11_META_H diff --git a/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h b/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h deleted file mode 100644 index 4bc3dd1..0000000 --- a/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +++ /dev/null @@ -1,141 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FIXEDSIZEVECTOR_H -#define EIGEN_FIXEDSIZEVECTOR_H - -namespace Eigen { - -/** \class MaxSizeVector - * \ingroup Core - * - * \brief The MaxSizeVector class. - * - * The %MaxSizeVector provides a subset of std::vector functionality. - * - * The goal is to provide basic std::vector operations when using - * std::vector is not an option (e.g. on GPU or when compiling using - * FMA/AVX, as this can cause either compilation failures or illegal - * instruction failures). - * - * Beware: The constructors are not API compatible with these of - * std::vector. - */ -template <typename T> -class MaxSizeVector { - public: - // Construct a new MaxSizeVector, reserve n elements. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - explicit MaxSizeVector(size_t n) - : reserve_(n), size_(0), - data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { - for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; } - } - - // Construct a new MaxSizeVector, reserve and resize to n. - // Copy the init value to all elements. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - MaxSizeVector(size_t n, const T& init) - : reserve_(n), size_(n), - data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { - for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - ~MaxSizeVector() { - for (size_t i = 0; i < size_; ++i) { - data_[i].~T(); - } - internal::aligned_free(data_); - } - - void resize(size_t n) { - eigen_assert(n <= reserve_); - for (size_t i = size_; i < n; ++i) { - new (&data_[i]) T; - } - for (size_t i = n; i < size_; ++i) { - data_[i].~T(); - } - size_ = n; - } - - // Append new elements (up to reserved size). - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void push_back(const T& t) { - eigen_assert(size_ < reserve_); - data_[size_++] = t; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const T& operator[] (size_t i) const { - eigen_assert(i < size_); - return data_[i]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T& operator[] (size_t i) { - eigen_assert(i < size_); - return data_[i]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T& back() { - eigen_assert(size_ > 0); - return data_[size_ - 1]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const T& back() const { - eigen_assert(size_ > 0); - return data_[size_ - 1]; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - void pop_back() { - // NOTE: This does not destroy the value at the end the way - // std::vector's version of pop_back() does. That happens when - // the Vector is destroyed. - eigen_assert(size_ > 0); - size_--; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t size() const { return size_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - bool empty() const { return size_ == 0; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T* data() { return data_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const T* data() const { return data_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T* begin() { return data_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - T* end() { return data_ + size_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const T* begin() const { return data_; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const T* end() const { return data_ + size_; } - - private: - size_t reserve_; - size_t size_; - T* data_; -}; - -} // namespace Eigen - -#endif // EIGEN_FIXEDSIZEVECTOR_H diff --git a/eigen/unsupported/Eigen/EulerAngles b/eigen/unsupported/Eigen/EulerAngles deleted file mode 100644 index 521fa3f..0000000 --- a/eigen/unsupported/Eigen/EulerAngles +++ /dev/null @@ -1,43 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_EULERANGLES_MODULE_H -#define EIGEN_EULERANGLES_MODULE_H - - -#include "Eigen/Core" -#include "Eigen/Geometry" - -#include "Eigen/src/Core/util/DisableStupidWarnings.h" - -namespace Eigen { - -/** - * \defgroup EulerAngles_Module EulerAngles module - * \brief This module provides generic euler angles rotation. - * - * Euler angles are a way to represent 3D rotation. - * - * In order to use this module in your code, include this header: - * \code - * #include <unsupported/Eigen/EulerAngles> - * \endcode - * - * See \ref EulerAngles for more information. - * - */ - -} - -#include "src/EulerAngles/EulerSystem.h" -#include "src/EulerAngles/EulerAngles.h" - -#include "Eigen/src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_EULERANGLES_MODULE_H diff --git a/eigen/unsupported/Eigen/FFT b/eigen/unsupported/Eigen/FFT deleted file mode 100644 index d8cf3e6..0000000 --- a/eigen/unsupported/Eigen/FFT +++ /dev/null @@ -1,419 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Mark Borgerding mark a borgerding net -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FFT_H -#define EIGEN_FFT_H - -#include <complex> -#include <vector> -#include <map> -#include <Eigen/Core> - - -/** - * \defgroup FFT_Module Fast Fourier Transform module - * - * \code - * #include <unsupported/Eigen/FFT> - * \endcode - * - * This module provides Fast Fourier transformation, with a configurable backend - * implementation. - * - * The default implementation is based on kissfft. It is a small, free, and - * reasonably efficient default. - * - * There are currently two implementation backend: - * - * - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size. - * - MKL (http://en.wikipedia.org/wiki/Math_Kernel_Library) : fastest, commercial -- may be incompatible with Eigen in GPL form. - * - * \section FFTDesign Design - * - * The following design decisions were made concerning scaling and - * half-spectrum for real FFT. - * - * The intent is to facilitate generic programming and ease migrating code - * from Matlab/octave. - * We think the default behavior of Eigen/FFT should favor correctness and - * generality over speed. Of course, the caller should be able to "opt-out" from this - * behavior and get the speed increase if they want it. - * - * 1) %Scaling: - * Other libraries (FFTW,IMKL,KISSFFT) do not perform scaling, so there - * is a constant gain incurred after the forward&inverse transforms , so - * IFFT(FFT(x)) = Kx; this is done to avoid a vector-by-value multiply. - * The downside is that algorithms that worked correctly in Matlab/octave - * don't behave the same way once implemented in C++. - * - * How Eigen/FFT differs: invertible scaling is performed so IFFT( FFT(x) ) = x. - * - * 2) Real FFT half-spectrum - * Other libraries use only half the frequency spectrum (plus one extra - * sample for the Nyquist bin) for a real FFT, the other half is the - * conjugate-symmetric of the first half. This saves them a copy and some - * memory. The downside is the caller needs to have special logic for the - * number of bins in complex vs real. - * - * How Eigen/FFT differs: The full spectrum is returned from the forward - * transform. This facilitates generic template programming by obviating - * separate specializations for real vs complex. On the inverse - * transform, only half the spectrum is actually used if the output type is real. - */ - - -#ifdef EIGEN_FFTW_DEFAULT -// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size -# include <fftw3.h> -# include "src/FFT/ei_fftw_impl.h" - namespace Eigen { - //template <typename T> typedef struct internal::fftw_impl default_fft_impl; this does not work - template <typename T> struct default_fft_impl : public internal::fftw_impl<T> {}; - } -#elif defined EIGEN_MKL_DEFAULT -// TODO -// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form -# include "src/FFT/ei_imklfft_impl.h" - namespace Eigen { - template <typename T> struct default_fft_impl : public internal::imklfft_impl {}; - } -#else -// internal::kissfft_impl: small, free, reasonably efficient default, derived from kissfft -// -# include "src/FFT/ei_kissfft_impl.h" - namespace Eigen { - template <typename T> - struct default_fft_impl : public internal::kissfft_impl<T> {}; - } -#endif - -namespace Eigen { - - -// -template<typename T_SrcMat,typename T_FftIfc> struct fft_fwd_proxy; -template<typename T_SrcMat,typename T_FftIfc> struct fft_inv_proxy; - -namespace internal { -template<typename T_SrcMat,typename T_FftIfc> -struct traits< fft_fwd_proxy<T_SrcMat,T_FftIfc> > -{ - typedef typename T_SrcMat::PlainObject ReturnType; -}; -template<typename T_SrcMat,typename T_FftIfc> -struct traits< fft_inv_proxy<T_SrcMat,T_FftIfc> > -{ - typedef typename T_SrcMat::PlainObject ReturnType; -}; -} - -template<typename T_SrcMat,typename T_FftIfc> -struct fft_fwd_proxy - : public ReturnByValue<fft_fwd_proxy<T_SrcMat,T_FftIfc> > -{ - typedef DenseIndex Index; - - fft_fwd_proxy(const T_SrcMat& src,T_FftIfc & fft, Index nfft) : m_src(src),m_ifc(fft), m_nfft(nfft) {} - - template<typename T_DestMat> void evalTo(T_DestMat& dst) const; - - Index rows() const { return m_src.rows(); } - Index cols() const { return m_src.cols(); } -protected: - const T_SrcMat & m_src; - T_FftIfc & m_ifc; - Index m_nfft; -private: - fft_fwd_proxy& operator=(const fft_fwd_proxy&); -}; - -template<typename T_SrcMat,typename T_FftIfc> -struct fft_inv_proxy - : public ReturnByValue<fft_inv_proxy<T_SrcMat,T_FftIfc> > -{ - typedef DenseIndex Index; - - fft_inv_proxy(const T_SrcMat& src,T_FftIfc & fft, Index nfft) : m_src(src),m_ifc(fft), m_nfft(nfft) {} - - template<typename T_DestMat> void evalTo(T_DestMat& dst) const; - - Index rows() const { return m_src.rows(); } - Index cols() const { return m_src.cols(); } -protected: - const T_SrcMat & m_src; - T_FftIfc & m_ifc; - Index m_nfft; -private: - fft_inv_proxy& operator=(const fft_inv_proxy&); -}; - - -template <typename T_Scalar, - typename T_Impl=default_fft_impl<T_Scalar> > -class FFT -{ - public: - typedef T_Impl impl_type; - typedef DenseIndex Index; - typedef typename impl_type::Scalar Scalar; - typedef typename impl_type::Complex Complex; - - enum Flag { - Default=0, // goof proof - Unscaled=1, - HalfSpectrum=2, - // SomeOtherSpeedOptimization=4 - Speedy=32767 - }; - - FFT( const impl_type & impl=impl_type() , Flag flags=Default ) :m_impl(impl),m_flag(flags) { } - - inline - bool HasFlag(Flag f) const { return (m_flag & (int)f) == f;} - - inline - void SetFlag(Flag f) { m_flag |= (int)f;} - - inline - void ClearFlag(Flag f) { m_flag &= (~(int)f);} - - inline - void fwd( Complex * dst, const Scalar * src, Index nfft) - { - m_impl.fwd(dst,src,static_cast<int>(nfft)); - if ( HasFlag(HalfSpectrum) == false) - ReflectSpectrum(dst,nfft); - } - - inline - void fwd( Complex * dst, const Complex * src, Index nfft) - { - m_impl.fwd(dst,src,static_cast<int>(nfft)); - } - - /* - inline - void fwd2(Complex * dst, const Complex * src, int n0,int n1) - { - m_impl.fwd2(dst,src,n0,n1); - } - */ - - template <typename _Input> - inline - void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src) - { - if ( NumTraits<_Input>::IsComplex == 0 && HasFlag(HalfSpectrum) ) - dst.resize( (src.size()>>1)+1); // half the bins + Nyquist bin - else - dst.resize(src.size()); - fwd(&dst[0],&src[0],src.size()); - } - - template<typename InputDerived, typename ComplexDerived> - inline - void fwd( MatrixBase<ComplexDerived> & dst, const MatrixBase<InputDerived> & src, Index nfft=-1) - { - typedef typename ComplexDerived::Scalar dst_type; - typedef typename InputDerived::Scalar src_type; - EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,InputDerived) // size at compile-time - EIGEN_STATIC_ASSERT((internal::is_same<dst_type, Complex>::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) - - if (nfft<1) - nfft = src.size(); - - if ( NumTraits< src_type >::IsComplex == 0 && HasFlag(HalfSpectrum) ) - dst.derived().resize( (nfft>>1)+1); - else - dst.derived().resize(nfft); - - if ( src.innerStride() != 1 || src.size() < nfft ) { - Matrix<src_type,1,Dynamic> tmp; - if (src.size()<nfft) { - tmp.setZero(nfft); - tmp.block(0,0,src.size(),1 ) = src; - }else{ - tmp = src; - } - fwd( &dst[0],&tmp[0],nfft ); - }else{ - fwd( &dst[0],&src[0],nfft ); - } - } - - template<typename InputDerived> - inline - fft_fwd_proxy< MatrixBase<InputDerived>, FFT<T_Scalar,T_Impl> > - fwd( const MatrixBase<InputDerived> & src, Index nfft=-1) - { - return fft_fwd_proxy< MatrixBase<InputDerived> ,FFT<T_Scalar,T_Impl> >( src, *this,nfft ); - } - - template<typename InputDerived> - inline - fft_inv_proxy< MatrixBase<InputDerived>, FFT<T_Scalar,T_Impl> > - inv( const MatrixBase<InputDerived> & src, Index nfft=-1) - { - return fft_inv_proxy< MatrixBase<InputDerived> ,FFT<T_Scalar,T_Impl> >( src, *this,nfft ); - } - - inline - void inv( Complex * dst, const Complex * src, Index nfft) - { - m_impl.inv( dst,src,static_cast<int>(nfft) ); - if ( HasFlag( Unscaled ) == false) - scale(dst,Scalar(1./nfft),nfft); // scale the time series - } - - inline - void inv( Scalar * dst, const Complex * src, Index nfft) - { - m_impl.inv( dst,src,static_cast<int>(nfft) ); - if ( HasFlag( Unscaled ) == false) - scale(dst,Scalar(1./nfft),nfft); // scale the time series - } - - template<typename OutputDerived, typename ComplexDerived> - inline - void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, Index nfft=-1) - { - typedef typename ComplexDerived::Scalar src_type; - typedef typename ComplexDerived::RealScalar real_type; - typedef typename OutputDerived::Scalar dst_type; - const bool realfft= (NumTraits<dst_type>::IsComplex == 0); - EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time - EIGEN_STATIC_ASSERT((internal::is_same<src_type, Complex>::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES) - - if (nfft<1) { //automatic FFT size determination - if ( realfft && HasFlag(HalfSpectrum) ) - nfft = 2*(src.size()-1); //assume even fft size - else - nfft = src.size(); - } - dst.derived().resize( nfft ); - - // check for nfft that does not fit the input data size - Index resize_input= ( realfft && HasFlag(HalfSpectrum) ) - ? ( (nfft/2+1) - src.size() ) - : ( nfft - src.size() ); - - if ( src.innerStride() != 1 || resize_input ) { - // if the vector is strided, then we need to copy it to a packed temporary - Matrix<src_type,1,Dynamic> tmp; - if ( resize_input ) { - size_t ncopy = (std::min)(src.size(),src.size() + resize_input); - tmp.setZero(src.size() + resize_input); - if ( realfft && HasFlag(HalfSpectrum) ) { - // pad at the Nyquist bin - tmp.head(ncopy) = src.head(ncopy); - tmp(ncopy-1) = real(tmp(ncopy-1)); // enforce real-only Nyquist bin - }else{ - size_t nhead,ntail; - nhead = 1+ncopy/2-1; // range [0:pi) - ntail = ncopy/2-1; // range (-pi:0) - tmp.head(nhead) = src.head(nhead); - tmp.tail(ntail) = src.tail(ntail); - if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it - tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*real_type(.5); - }else{ // expanding -- split the old Nyquist bin into two halves - tmp(nhead) = src(nhead) * real_type(.5); - tmp(tmp.size()-nhead) = tmp(nhead); - } - } - }else{ - tmp = src; - } - inv( &dst[0],&tmp[0], nfft); - }else{ - inv( &dst[0],&src[0], nfft); - } - } - - template <typename _Output> - inline - void inv( std::vector<_Output> & dst, const std::vector<Complex> & src,Index nfft=-1) - { - if (nfft<1) - nfft = ( NumTraits<_Output>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size(); - dst.resize( nfft ); - inv( &dst[0],&src[0],nfft); - } - - - /* - // TODO: multi-dimensional FFTs - inline - void inv2(Complex * dst, const Complex * src, int n0,int n1) - { - m_impl.inv2(dst,src,n0,n1); - if ( HasFlag( Unscaled ) == false) - scale(dst,1./(n0*n1),n0*n1); - } - */ - - inline - impl_type & impl() {return m_impl;} - private: - - template <typename T_Data> - inline - void scale(T_Data * x,Scalar s,Index nx) - { -#if 1 - for (int k=0;k<nx;++k) - *x++ *= s; -#else - if ( ((ptrdiff_t)x) & 15 ) - Matrix<T_Data, Dynamic, 1>::Map(x,nx) *= s; - else - Matrix<T_Data, Dynamic, 1>::MapAligned(x,nx) *= s; - //Matrix<T_Data, Dynamic, Dynamic>::Map(x,nx) * s; -#endif - } - - inline - void ReflectSpectrum(Complex * freq, Index nfft) - { - // create the implicit right-half spectrum (conjugate-mirror of the left-half) - Index nhbins=(nfft>>1)+1; - for (Index k=nhbins;k < nfft; ++k ) - freq[k] = conj(freq[nfft-k]); - } - - impl_type m_impl; - int m_flag; -}; - -template<typename T_SrcMat,typename T_FftIfc> -template<typename T_DestMat> inline -void fft_fwd_proxy<T_SrcMat,T_FftIfc>::evalTo(T_DestMat& dst) const -{ - m_ifc.fwd( dst, m_src, m_nfft); -} - -template<typename T_SrcMat,typename T_FftIfc> -template<typename T_DestMat> inline -void fft_inv_proxy<T_SrcMat,T_FftIfc>::evalTo(T_DestMat& dst) const -{ - m_ifc.inv( dst, m_src, m_nfft); -} - -} -#endif -/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/eigen/unsupported/Eigen/IterativeSolvers b/eigen/unsupported/Eigen/IterativeSolvers deleted file mode 100644 index 31e880b..0000000 --- a/eigen/unsupported/Eigen/IterativeSolvers +++ /dev/null @@ -1,42 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ITERATIVE_SOLVERS_MODULE_H -#define EIGEN_ITERATIVE_SOLVERS_MODULE_H - -#include <Eigen/Sparse> - -/** - * \defgroup IterativeSolvers_Module Iterative solvers module - * This module aims to provide various iterative linear and non linear solver algorithms. - * It currently provides: - * - a constrained conjugate gradient - * - a Householder GMRES implementation - * \code - * #include <unsupported/Eigen/IterativeSolvers> - * \endcode - */ -//@{ - -#ifndef EIGEN_MPL2_ONLY -#include "src/IterativeSolvers/IterationController.h" -#include "src/IterativeSolvers/ConstrainedConjGrad.h" -#endif - -#include "src/IterativeSolvers/IncompleteLU.h" -#include "../../Eigen/Jacobi" -#include "../../Eigen/Householder" -#include "src/IterativeSolvers/GMRES.h" -#include "src/IterativeSolvers/DGMRES.h" -//#include "src/IterativeSolvers/SSORPreconditioner.h" -#include "src/IterativeSolvers/MINRES.h" - -//@} - -#endif // EIGEN_ITERATIVE_SOLVERS_MODULE_H diff --git a/eigen/unsupported/Eigen/KroneckerProduct b/eigen/unsupported/Eigen/KroneckerProduct deleted file mode 100644 index 5f5afb8..0000000 --- a/eigen/unsupported/Eigen/KroneckerProduct +++ /dev/null @@ -1,36 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_KRONECKER_PRODUCT_MODULE_H -#define EIGEN_KRONECKER_PRODUCT_MODULE_H - -#include "../../Eigen/Core" - -#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" - -#include "../../Eigen/src/SparseCore/SparseUtil.h" - -namespace Eigen { - -/** - * \defgroup KroneckerProduct_Module KroneckerProduct module - * - * This module contains an experimental Kronecker product implementation. - * - * \code - * #include <Eigen/KroneckerProduct> - * \endcode - */ - -} // namespace Eigen - -#include "src/KroneckerProduct/KroneckerTensorProduct.h" - -#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_KRONECKER_PRODUCT_MODULE_H diff --git a/eigen/unsupported/Eigen/LevenbergMarquardt b/eigen/unsupported/Eigen/LevenbergMarquardt deleted file mode 100644 index 0fe2680..0000000 --- a/eigen/unsupported/Eigen/LevenbergMarquardt +++ /dev/null @@ -1,45 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_LEVENBERGMARQUARDT_MODULE -#define EIGEN_LEVENBERGMARQUARDT_MODULE - -// #include <vector> - -#include <Eigen/Core> -#include <Eigen/Jacobi> -#include <Eigen/QR> -#include <unsupported/Eigen/NumericalDiff> - -#include <Eigen/SparseQR> - -/** - * \defgroup LevenbergMarquardt_Module Levenberg-Marquardt module - * - * \code - * #include </Eigen/LevenbergMarquardt> - * \endcode - * - * - */ - -#include "Eigen/SparseCore" -#ifndef EIGEN_PARSED_BY_DOXYGEN - -#include "src/LevenbergMarquardt/LMqrsolv.h" -#include "src/LevenbergMarquardt/LMcovar.h" -#include "src/LevenbergMarquardt/LMpar.h" - -#endif - -#include "src/LevenbergMarquardt/LevenbergMarquardt.h" -#include "src/LevenbergMarquardt/LMonestep.h" - - -#endif // EIGEN_LEVENBERGMARQUARDT_MODULE diff --git a/eigen/unsupported/Eigen/MPRealSupport b/eigen/unsupported/Eigen/MPRealSupport deleted file mode 100644 index 7f0b70c..0000000 --- a/eigen/unsupported/Eigen/MPRealSupport +++ /dev/null @@ -1,209 +0,0 @@ -// This file is part of a joint effort between Eigen, a lightweight C++ template library -// for linear algebra, and MPFR C++, a C++ interface to MPFR library (http://www.holoborodko.com/pavel/) -// -// Copyright (C) 2010-2012 Pavel Holoborodko <pavel@holoborodko.com> -// Copyright (C) 2010 Konstantin Holoborodko <konstantin@holoborodko.com> -// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MPREALSUPPORT_MODULE_H -#define EIGEN_MPREALSUPPORT_MODULE_H - -#include <Eigen/Core> -#include <mpreal.h> - -namespace Eigen { - -/** - * \defgroup MPRealSupport_Module MPFRC++ Support module - * \code - * #include <Eigen/MPRealSupport> - * \endcode - * - * This module provides support for multi precision floating point numbers - * via the <a href="http://www.holoborodko.com/pavel/mpfr">MPFR C++</a> - * library which itself is built upon <a href="http://www.mpfr.org/">MPFR</a>/<a href="http://gmplib.org/">GMP</a>. - * - * \warning MPFR C++ is licensed under the GPL. - * - * You can find a copy of MPFR C++ that is known to be compatible in the unsupported/test/mpreal folder. - * - * Here is an example: - * -\code -#include <iostream> -#include <Eigen/MPRealSupport> -#include <Eigen/LU> -using namespace mpfr; -using namespace Eigen; -int main() -{ - // set precision to 256 bits (double has only 53 bits) - mpreal::set_default_prec(256); - // Declare matrix and vector types with multi-precision scalar type - typedef Matrix<mpreal,Dynamic,Dynamic> MatrixXmp; - typedef Matrix<mpreal,Dynamic,1> VectorXmp; - - MatrixXmp A = MatrixXmp::Random(100,100); - VectorXmp b = VectorXmp::Random(100); - - // Solve Ax=b using LU - VectorXmp x = A.lu().solve(b); - std::cout << "relative error: " << (A*x - b).norm() / b.norm() << std::endl; - return 0; -} -\endcode - * - */ - - template<> struct NumTraits<mpfr::mpreal> - : GenericNumTraits<mpfr::mpreal> - { - enum { - IsInteger = 0, - IsSigned = 1, - IsComplex = 0, - RequireInitialization = 1, - ReadCost = HugeCost, - AddCost = HugeCost, - MulCost = HugeCost - }; - - typedef mpfr::mpreal Real; - typedef mpfr::mpreal NonInteger; - - static inline Real highest (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::maxval(Precision); } - static inline Real lowest (long Precision = mpfr::mpreal::get_default_prec()) { return -mpfr::maxval(Precision); } - - // Constants - static inline Real Pi (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_pi(Precision); } - static inline Real Euler (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_euler(Precision); } - static inline Real Log2 (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_log2(Precision); } - static inline Real Catalan (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_catalan(Precision); } - - static inline Real epsilon (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::machine_epsilon(Precision); } - static inline Real epsilon (const Real& x) { return mpfr::machine_epsilon(x); } - -#ifdef MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS - static inline int digits10 (long Precision = mpfr::mpreal::get_default_prec()) { return std::numeric_limits<Real>::digits10(Precision); } - static inline int digits10 (const Real& x) { return std::numeric_limits<Real>::digits10(x); } -#endif - - static inline Real dummy_precision() - { - mpfr_prec_t weak_prec = ((mpfr::mpreal::get_default_prec()-1) * 90) / 100; - return mpfr::machine_epsilon(weak_prec); - } - }; - - namespace internal { - - template<> inline mpfr::mpreal random<mpfr::mpreal>() - { - return mpfr::random(); - } - - template<> inline mpfr::mpreal random<mpfr::mpreal>(const mpfr::mpreal& a, const mpfr::mpreal& b) - { - return a + (b-a) * random<mpfr::mpreal>(); - } - - inline bool isMuchSmallerThan(const mpfr::mpreal& a, const mpfr::mpreal& b, const mpfr::mpreal& eps) - { - return mpfr::abs(a) <= mpfr::abs(b) * eps; - } - - inline bool isApprox(const mpfr::mpreal& a, const mpfr::mpreal& b, const mpfr::mpreal& eps) - { - return mpfr::isEqualFuzzy(a,b,eps); - } - - inline bool isApproxOrLessThan(const mpfr::mpreal& a, const mpfr::mpreal& b, const mpfr::mpreal& eps) - { - return a <= b || mpfr::isEqualFuzzy(a,b,eps); - } - - template<> inline long double cast<mpfr::mpreal,long double>(const mpfr::mpreal& x) - { return x.toLDouble(); } - - template<> inline double cast<mpfr::mpreal,double>(const mpfr::mpreal& x) - { return x.toDouble(); } - - template<> inline long cast<mpfr::mpreal,long>(const mpfr::mpreal& x) - { return x.toLong(); } - - template<> inline int cast<mpfr::mpreal,int>(const mpfr::mpreal& x) - { return int(x.toLong()); } - - // Specialize GEBP kernel and traits for mpreal (no need for peeling, nor complicated stuff) - // This also permits to directly call mpfr's routines and avoid many temporaries produced by mpreal - template<> - class gebp_traits<mpfr::mpreal, mpfr::mpreal, false, false> - { - public: - typedef mpfr::mpreal ResScalar; - enum { - Vectorizable = false, - LhsPacketSize = 1, - RhsPacketSize = 1, - ResPacketSize = 1, - NumberOfRegisters = 1, - nr = 1, - mr = 1, - LhsProgress = 1, - RhsProgress = 1 - }; - typedef ResScalar LhsPacket; - typedef ResScalar RhsPacket; - typedef ResScalar ResPacket; - - }; - - - - template<typename Index, typename DataMapper, bool ConjugateLhs, bool ConjugateRhs> - struct gebp_kernel<mpfr::mpreal,mpfr::mpreal,Index,DataMapper,1,1,ConjugateLhs,ConjugateRhs> - { - typedef mpfr::mpreal mpreal; - - EIGEN_DONT_INLINE - void operator()(const DataMapper& res, const mpreal* blockA, const mpreal* blockB, - Index rows, Index depth, Index cols, const mpreal& alpha, - Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0) - { - if(rows==0 || cols==0 || depth==0) - return; - - mpreal acc1(0,mpfr_get_prec(blockA[0].mpfr_srcptr())), - tmp (0,mpfr_get_prec(blockA[0].mpfr_srcptr())); - - if(strideA==-1) strideA = depth; - if(strideB==-1) strideB = depth; - - for(Index i=0; i<rows; ++i) - { - for(Index j=0; j<cols; ++j) - { - const mpreal *A = blockA + i*strideA + offsetA; - const mpreal *B = blockB + j*strideB + offsetB; - - acc1 = 0; - for(Index k=0; k<depth; k++) - { - mpfr_mul(tmp.mpfr_ptr(), A[k].mpfr_srcptr(), B[k].mpfr_srcptr(), mpreal::get_default_rnd()); - mpfr_add(acc1.mpfr_ptr(), acc1.mpfr_ptr(), tmp.mpfr_ptr(), mpreal::get_default_rnd()); - } - - mpfr_mul(acc1.mpfr_ptr(), acc1.mpfr_srcptr(), alpha.mpfr_srcptr(), mpreal::get_default_rnd()); - mpfr_add(res(i,j).mpfr_ptr(), res(i,j).mpfr_srcptr(), acc1.mpfr_srcptr(), mpreal::get_default_rnd()); - } - } - } - }; - } // end namespace internal -} - -#endif // EIGEN_MPREALSUPPORT_MODULE_H diff --git a/eigen/unsupported/Eigen/MatrixFunctions b/eigen/unsupported/Eigen/MatrixFunctions deleted file mode 100644 index 60dc0a6..0000000 --- a/eigen/unsupported/Eigen/MatrixFunctions +++ /dev/null @@ -1,500 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Jitse Niesen <jitse@maths.leeds.ac.uk> -// Copyright (C) 2012 Chen-Pang He <jdh8@ms63.hinet.net> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MATRIX_FUNCTIONS -#define EIGEN_MATRIX_FUNCTIONS - -#include <cfloat> -#include <list> - -#include <Eigen/Core> -#include <Eigen/LU> -#include <Eigen/Eigenvalues> - -/** - * \defgroup MatrixFunctions_Module Matrix functions module - * \brief This module aims to provide various methods for the computation of - * matrix functions. - * - * To use this module, add - * \code - * #include <unsupported/Eigen/MatrixFunctions> - * \endcode - * at the start of your source file. - * - * This module defines the following MatrixBase methods. - * - \ref matrixbase_cos "MatrixBase::cos()", for computing the matrix cosine - * - \ref matrixbase_cosh "MatrixBase::cosh()", for computing the matrix hyperbolic cosine - * - \ref matrixbase_exp "MatrixBase::exp()", for computing the matrix exponential - * - \ref matrixbase_log "MatrixBase::log()", for computing the matrix logarithm - * - \ref matrixbase_pow "MatrixBase::pow()", for computing the matrix power - * - \ref matrixbase_matrixfunction "MatrixBase::matrixFunction()", for computing general matrix functions - * - \ref matrixbase_sin "MatrixBase::sin()", for computing the matrix sine - * - \ref matrixbase_sinh "MatrixBase::sinh()", for computing the matrix hyperbolic sine - * - \ref matrixbase_sqrt "MatrixBase::sqrt()", for computing the matrix square root - * - * These methods are the main entry points to this module. - * - * %Matrix functions are defined as follows. Suppose that \f$ f \f$ - * is an entire function (that is, a function on the complex plane - * that is everywhere complex differentiable). Then its Taylor - * series - * \f[ f(0) + f'(0) x + \frac{f''(0)}{2} x^2 + \frac{f'''(0)}{3!} x^3 + \cdots \f] - * converges to \f$ f(x) \f$. In this case, we can define the matrix - * function by the same series: - * \f[ f(M) = f(0) + f'(0) M + \frac{f''(0)}{2} M^2 + \frac{f'''(0)}{3!} M^3 + \cdots \f] - * - */ - -#include "src/MatrixFunctions/MatrixExponential.h" -#include "src/MatrixFunctions/MatrixFunction.h" -#include "src/MatrixFunctions/MatrixSquareRoot.h" -#include "src/MatrixFunctions/MatrixLogarithm.h" -#include "src/MatrixFunctions/MatrixPower.h" - - -/** -\page matrixbaseextra_page -\ingroup MatrixFunctions_Module - -\section matrixbaseextra MatrixBase methods defined in the MatrixFunctions module - -The remainder of the page documents the following MatrixBase methods -which are defined in the MatrixFunctions module. - - - -\subsection matrixbase_cos MatrixBase::cos() - -Compute the matrix cosine. - -\code -const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::cos() const -\endcode - -\param[in] M a square matrix. -\returns expression representing \f$ \cos(M) \f$. - -This function computes the matrix cosine. Use ArrayBase::cos() for computing the entry-wise cosine. - -The implementation calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::cos(). - -\sa \ref matrixbase_sin "sin()" for an example. - - - -\subsection matrixbase_cosh MatrixBase::cosh() - -Compute the matrix hyberbolic cosine. - -\code -const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::cosh() const -\endcode - -\param[in] M a square matrix. -\returns expression representing \f$ \cosh(M) \f$ - -This function calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::cosh(). - -\sa \ref matrixbase_sinh "sinh()" for an example. - - - -\subsection matrixbase_exp MatrixBase::exp() - -Compute the matrix exponential. - -\code -const MatrixExponentialReturnValue<Derived> MatrixBase<Derived>::exp() const -\endcode - -\param[in] M matrix whose exponential is to be computed. -\returns expression representing the matrix exponential of \p M. - -The matrix exponential of \f$ M \f$ is defined by -\f[ \exp(M) = \sum_{k=0}^\infty \frac{M^k}{k!}. \f] -The matrix exponential can be used to solve linear ordinary -differential equations: the solution of \f$ y' = My \f$ with the -initial condition \f$ y(0) = y_0 \f$ is given by -\f$ y(t) = \exp(M) y_0 \f$. - -The matrix exponential is different from applying the exp function to all the entries in the matrix. -Use ArrayBase::exp() if you want to do the latter. - -The cost of the computation is approximately \f$ 20 n^3 \f$ for -matrices of size \f$ n \f$. The number 20 depends weakly on the -norm of the matrix. - -The matrix exponential is computed using the scaling-and-squaring -method combined with Padé approximation. The matrix is first -rescaled, then the exponential of the reduced matrix is computed -approximant, and then the rescaling is undone by repeated -squaring. The degree of the Padé approximant is chosen such -that the approximation error is less than the round-off -error. However, errors may accumulate during the squaring phase. - -Details of the algorithm can be found in: Nicholas J. Higham, "The -scaling and squaring method for the matrix exponential revisited," -<em>SIAM J. %Matrix Anal. Applic.</em>, <b>26</b>:1179–1193, -2005. - -Example: The following program checks that -\f[ \exp \left[ \begin{array}{ccc} - 0 & \frac14\pi & 0 \\ - -\frac14\pi & 0 & 0 \\ - 0 & 0 & 0 - \end{array} \right] = \left[ \begin{array}{ccc} - \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\ - \frac12\sqrt2 & \frac12\sqrt2 & 0 \\ - 0 & 0 & 1 - \end{array} \right]. \f] -This corresponds to a rotation of \f$ \frac14\pi \f$ radians around -the z-axis. - -\include MatrixExponential.cpp -Output: \verbinclude MatrixExponential.out - -\note \p M has to be a matrix of \c float, \c double, `long double` -\c complex<float>, \c complex<double>, or `complex<long double>` . - - -\subsection matrixbase_log MatrixBase::log() - -Compute the matrix logarithm. - -\code -const MatrixLogarithmReturnValue<Derived> MatrixBase<Derived>::log() const -\endcode - -\param[in] M invertible matrix whose logarithm is to be computed. -\returns expression representing the matrix logarithm root of \p M. - -The matrix logarithm of \f$ M \f$ is a matrix \f$ X \f$ such that -\f$ \exp(X) = M \f$ where exp denotes the matrix exponential. As for -the scalar logarithm, the equation \f$ \exp(X) = M \f$ may have -multiple solutions; this function returns a matrix whose eigenvalues -have imaginary part in the interval \f$ (-\pi,\pi] \f$. - -The matrix logarithm is different from applying the log function to all the entries in the matrix. -Use ArrayBase::log() if you want to do the latter. - -In the real case, the matrix \f$ M \f$ should be invertible and -it should have no eigenvalues which are real and negative (pairs of -complex conjugate eigenvalues are allowed). In the complex case, it -only needs to be invertible. - -This function computes the matrix logarithm using the Schur-Parlett -algorithm as implemented by MatrixBase::matrixFunction(). The -logarithm of an atomic block is computed by MatrixLogarithmAtomic, -which uses direct computation for 1-by-1 and 2-by-2 blocks and an -inverse scaling-and-squaring algorithm for bigger blocks, with the -square roots computed by MatrixBase::sqrt(). - -Details of the algorithm can be found in Section 11.6.2 of: -Nicholas J. Higham, -<em>Functions of Matrices: Theory and Computation</em>, -SIAM 2008. ISBN 978-0-898716-46-7. - -Example: The following program checks that -\f[ \log \left[ \begin{array}{ccc} - \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\ - \frac12\sqrt2 & \frac12\sqrt2 & 0 \\ - 0 & 0 & 1 - \end{array} \right] = \left[ \begin{array}{ccc} - 0 & \frac14\pi & 0 \\ - -\frac14\pi & 0 & 0 \\ - 0 & 0 & 0 - \end{array} \right]. \f] -This corresponds to a rotation of \f$ \frac14\pi \f$ radians around -the z-axis. This is the inverse of the example used in the -documentation of \ref matrixbase_exp "exp()". - -\include MatrixLogarithm.cpp -Output: \verbinclude MatrixLogarithm.out - -\note \p M has to be a matrix of \c float, \c double, `long -double`, \c complex<float>, \c complex<double>, or `complex<long double>`. - -\sa MatrixBase::exp(), MatrixBase::matrixFunction(), - class MatrixLogarithmAtomic, MatrixBase::sqrt(). - - -\subsection matrixbase_pow MatrixBase::pow() - -Compute the matrix raised to arbitrary real power. - -\code -const MatrixPowerReturnValue<Derived> MatrixBase<Derived>::pow(RealScalar p) const -\endcode - -\param[in] M base of the matrix power, should be a square matrix. -\param[in] p exponent of the matrix power. - -The matrix power \f$ M^p \f$ is defined as \f$ \exp(p \log(M)) \f$, -where exp denotes the matrix exponential, and log denotes the matrix -logarithm. This is different from raising all the entries in the matrix -to the p-th power. Use ArrayBase::pow() if you want to do the latter. - -If \p p is complex, the scalar type of \p M should be the type of \p -p . \f$ M^p \f$ simply evaluates into \f$ \exp(p \log(M)) \f$. -Therefore, the matrix \f$ M \f$ should meet the conditions to be an -argument of matrix logarithm. - -If \p p is real, it is casted into the real scalar type of \p M. Then -this function computes the matrix power using the Schur-Padé -algorithm as implemented by class MatrixPower. The exponent is split -into integral part and fractional part, where the fractional part is -in the interval \f$ (-1, 1) \f$. The main diagonal and the first -super-diagonal is directly computed. - -If \p M is singular with a semisimple zero eigenvalue and \p p is -positive, the Schur factor \f$ T \f$ is reordered with Givens -rotations, i.e. - -\f[ T = \left[ \begin{array}{cc} - T_1 & T_2 \\ - 0 & 0 - \end{array} \right] \f] - -where \f$ T_1 \f$ is invertible. Then \f$ T^p \f$ is given by - -\f[ T^p = \left[ \begin{array}{cc} - T_1^p & T_1^{-1} T_1^p T_2 \\ - 0 & 0 - \end{array}. \right] \f] - -\warning Fractional power of a matrix with a non-semisimple zero -eigenvalue is not well-defined. We introduce an assertion failure -against inaccurate result, e.g. \code -#include <unsupported/Eigen/MatrixFunctions> -#include <iostream> - -int main() -{ - Eigen::Matrix4d A; - A << 0, 0, 2, 3, - 0, 0, 4, 5, - 0, 0, 6, 7, - 0, 0, 8, 9; - std::cout << A.pow(0.37) << std::endl; - - // The 1 makes eigenvalue 0 non-semisimple. - A.coeffRef(0, 1) = 1; - - // This fails if EIGEN_NO_DEBUG is undefined. - std::cout << A.pow(0.37) << std::endl; - - return 0; -} -\endcode - -Details of the algorithm can be found in: Nicholas J. Higham and -Lijing Lin, "A Schur-Padé algorithm for fractional powers of a -matrix," <em>SIAM J. %Matrix Anal. Applic.</em>, -<b>32(3)</b>:1056–1078, 2011. - -Example: The following program checks that -\f[ \left[ \begin{array}{ccc} - \cos1 & -\sin1 & 0 \\ - \sin1 & \cos1 & 0 \\ - 0 & 0 & 1 - \end{array} \right]^{\frac14\pi} = \left[ \begin{array}{ccc} - \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\ - \frac12\sqrt2 & \frac12\sqrt2 & 0 \\ - 0 & 0 & 1 - \end{array} \right]. \f] -This corresponds to \f$ \frac14\pi \f$ rotations of 1 radian around -the z-axis. - -\include MatrixPower.cpp -Output: \verbinclude MatrixPower.out - -MatrixBase::pow() is user-friendly. However, there are some -circumstances under which you should use class MatrixPower directly. -MatrixPower can save the result of Schur decomposition, so it's -better for computing various powers for the same matrix. - -Example: -\include MatrixPower_optimal.cpp -Output: \verbinclude MatrixPower_optimal.out - -\note \p M has to be a matrix of \c float, \c double, `long -double`, \c complex<float>, \c complex<double>, or -\c complex<long double> . - -\sa MatrixBase::exp(), MatrixBase::log(), class MatrixPower. - - -\subsection matrixbase_matrixfunction MatrixBase::matrixFunction() - -Compute a matrix function. - -\code -const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::matrixFunction(typename internal::stem_function<typename internal::traits<Derived>::Scalar>::type f) const -\endcode - -\param[in] M argument of matrix function, should be a square matrix. -\param[in] f an entire function; \c f(x,n) should compute the n-th -derivative of f at x. -\returns expression representing \p f applied to \p M. - -Suppose that \p M is a matrix whose entries have type \c Scalar. -Then, the second argument, \p f, should be a function with prototype -\code -ComplexScalar f(ComplexScalar, int) -\endcode -where \c ComplexScalar = \c std::complex<Scalar> if \c Scalar is -real (e.g., \c float or \c double) and \c ComplexScalar = -\c Scalar if \c Scalar is complex. The return value of \c f(x,n) -should be \f$ f^{(n)}(x) \f$, the n-th derivative of f at x. - -This routine uses the algorithm described in: -Philip Davies and Nicholas J. Higham, -"A Schur-Parlett algorithm for computing matrix functions", -<em>SIAM J. %Matrix Anal. Applic.</em>, <b>25</b>:464–485, 2003. - -The actual work is done by the MatrixFunction class. - -Example: The following program checks that -\f[ \exp \left[ \begin{array}{ccc} - 0 & \frac14\pi & 0 \\ - -\frac14\pi & 0 & 0 \\ - 0 & 0 & 0 - \end{array} \right] = \left[ \begin{array}{ccc} - \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\ - \frac12\sqrt2 & \frac12\sqrt2 & 0 \\ - 0 & 0 & 1 - \end{array} \right]. \f] -This corresponds to a rotation of \f$ \frac14\pi \f$ radians around -the z-axis. This is the same example as used in the documentation -of \ref matrixbase_exp "exp()". - -\include MatrixFunction.cpp -Output: \verbinclude MatrixFunction.out - -Note that the function \c expfn is defined for complex numbers -\c x, even though the matrix \c A is over the reals. Instead of -\c expfn, we could also have used StdStemFunctions::exp: -\code -A.matrixFunction(StdStemFunctions<std::complex<double> >::exp, &B); -\endcode - - - -\subsection matrixbase_sin MatrixBase::sin() - -Compute the matrix sine. - -\code -const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::sin() const -\endcode - -\param[in] M a square matrix. -\returns expression representing \f$ \sin(M) \f$. - -This function computes the matrix sine. Use ArrayBase::sin() for computing the entry-wise sine. - -The implementation calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::sin(). - -Example: \include MatrixSine.cpp -Output: \verbinclude MatrixSine.out - - - -\subsection matrixbase_sinh MatrixBase::sinh() - -Compute the matrix hyperbolic sine. - -\code -MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::sinh() const -\endcode - -\param[in] M a square matrix. -\returns expression representing \f$ \sinh(M) \f$ - -This function calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::sinh(). - -Example: \include MatrixSinh.cpp -Output: \verbinclude MatrixSinh.out - - -\subsection matrixbase_sqrt MatrixBase::sqrt() - -Compute the matrix square root. - -\code -const MatrixSquareRootReturnValue<Derived> MatrixBase<Derived>::sqrt() const -\endcode - -\param[in] M invertible matrix whose square root is to be computed. -\returns expression representing the matrix square root of \p M. - -The matrix square root of \f$ M \f$ is the matrix \f$ M^{1/2} \f$ -whose square is the original matrix; so if \f$ S = M^{1/2} \f$ then -\f$ S^2 = M \f$. This is different from taking the square root of all -the entries in the matrix; use ArrayBase::sqrt() if you want to do the -latter. - -In the <b>real case</b>, the matrix \f$ M \f$ should be invertible and -it should have no eigenvalues which are real and negative (pairs of -complex conjugate eigenvalues are allowed). In that case, the matrix -has a square root which is also real, and this is the square root -computed by this function. - -The matrix square root is computed by first reducing the matrix to -quasi-triangular form with the real Schur decomposition. The square -root of the quasi-triangular matrix can then be computed directly. The -cost is approximately \f$ 25 n^3 \f$ real flops for the real Schur -decomposition and \f$ 3\frac13 n^3 \f$ real flops for the remainder -(though the computation time in practice is likely more than this -indicates). - -Details of the algorithm can be found in: Nicholas J. Highan, -"Computing real square roots of a real matrix", <em>Linear Algebra -Appl.</em>, 88/89:405–430, 1987. - -If the matrix is <b>positive-definite symmetric</b>, then the square -root is also positive-definite symmetric. In this case, it is best to -use SelfAdjointEigenSolver::operatorSqrt() to compute it. - -In the <b>complex case</b>, the matrix \f$ M \f$ should be invertible; -this is a restriction of the algorithm. The square root computed by -this algorithm is the one whose eigenvalues have an argument in the -interval \f$ (-\frac12\pi, \frac12\pi] \f$. This is the usual branch -cut. - -The computation is the same as in the real case, except that the -complex Schur decomposition is used to reduce the matrix to a -triangular matrix. The theoretical cost is the same. Details are in: -Åke Björck and Sven Hammarling, "A Schur method for the -square root of a matrix", <em>Linear Algebra Appl.</em>, -52/53:127–140, 1983. - -Example: The following program checks that the square root of -\f[ \left[ \begin{array}{cc} - \cos(\frac13\pi) & -\sin(\frac13\pi) \\ - \sin(\frac13\pi) & \cos(\frac13\pi) - \end{array} \right], \f] -corresponding to a rotation over 60 degrees, is a rotation over 30 degrees: -\f[ \left[ \begin{array}{cc} - \cos(\frac16\pi) & -\sin(\frac16\pi) \\ - \sin(\frac16\pi) & \cos(\frac16\pi) - \end{array} \right]. \f] - -\include MatrixSquareRoot.cpp -Output: \verbinclude MatrixSquareRoot.out - -\sa class RealSchur, class ComplexSchur, class MatrixSquareRoot, - SelfAdjointEigenSolver::operatorSqrt(). - -*/ - -#endif // EIGEN_MATRIX_FUNCTIONS - diff --git a/eigen/unsupported/Eigen/MoreVectorization b/eigen/unsupported/Eigen/MoreVectorization deleted file mode 100644 index 470e724..0000000 --- a/eigen/unsupported/Eigen/MoreVectorization +++ /dev/null @@ -1,24 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MOREVECTORIZATION_MODULE_H -#define EIGEN_MOREVECTORIZATION_MODULE_H - -#include <Eigen/Core> - -namespace Eigen { - -/** - * \defgroup MoreVectorization More vectorization module - */ - -} - -#include "src/MoreVectorization/MathFunctions.h" - -#endif // EIGEN_MOREVECTORIZATION_MODULE_H diff --git a/eigen/unsupported/Eigen/NonLinearOptimization b/eigen/unsupported/Eigen/NonLinearOptimization deleted file mode 100644 index 600ab4c..0000000 --- a/eigen/unsupported/Eigen/NonLinearOptimization +++ /dev/null @@ -1,134 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_NONLINEAROPTIMIZATION_MODULE -#define EIGEN_NONLINEAROPTIMIZATION_MODULE - -#include <vector> - -#include <Eigen/Core> -#include <Eigen/Jacobi> -#include <Eigen/QR> -#include <unsupported/Eigen/NumericalDiff> - -/** - * \defgroup NonLinearOptimization_Module Non linear optimization module - * - * \code - * #include <unsupported/Eigen/NonLinearOptimization> - * \endcode - * - * This module provides implementation of two important algorithms in non linear - * optimization. In both cases, we consider a system of non linear functions. Of - * course, this should work, and even work very well if those functions are - * actually linear. But if this is so, you should probably better use other - * methods more fitted to this special case. - * - * One algorithm allows to find an extremum of such a system (Levenberg - * Marquardt algorithm) and the second one is used to find - * a zero for the system (Powell hybrid "dogleg" method). - * - * This code is a port of minpack (http://en.wikipedia.org/wiki/MINPACK). - * Minpack is a very famous, old, robust and well-reknown package, written in - * fortran. Those implementations have been carefully tuned, tested, and used - * for several decades. - * - * The original fortran code was automatically translated using f2c (http://en.wikipedia.org/wiki/F2c) in C, - * then c++, and then cleaned by several different authors. - * The last one of those cleanings being our starting point : - * http://devernay.free.fr/hacks/cminpack.html - * - * Finally, we ported this code to Eigen, creating classes and API - * coherent with Eigen. When possible, we switched to Eigen - * implementation, such as most linear algebra (vectors, matrices, stable norms). - * - * Doing so, we were very careful to check the tests we setup at the very - * beginning, which ensure that the same results are found. - * - * \section Tests Tests - * - * The tests are placed in the file unsupported/test/NonLinear.cpp. - * - * There are two kinds of tests : those that come from examples bundled with cminpack. - * They guaranty we get the same results as the original algorithms (value for 'x', - * for the number of evaluations of the function, and for the number of evaluations - * of the jacobian if ever). - * - * Other tests were added by myself at the very beginning of the - * process and check the results for levenberg-marquardt using the reference data - * on http://www.itl.nist.gov/div898/strd/nls/nls_main.shtml. Since then i've - * carefully checked that the same results were obtained when modifiying the - * code. Please note that we do not always get the exact same decimals as they do, - * but this is ok : they use 128bits float, and we do the tests using the C type 'double', - * which is 64 bits on most platforms (x86 and amd64, at least). - * I've performed those tests on several other implementations of levenberg-marquardt, and - * (c)minpack performs VERY well compared to those, both in accuracy and speed. - * - * The documentation for running the tests is on the wiki - * http://eigen.tuxfamily.org/index.php?title=Tests - * - * \section API API : overview of methods - * - * Both algorithms can use either the jacobian (provided by the user) or compute - * an approximation by themselves (actually using Eigen \ref NumericalDiff_Module). - * The part of API referring to the latter use 'NumericalDiff' in the method names - * (exemple: LevenbergMarquardt.minimizeNumericalDiff() ) - * - * The methods LevenbergMarquardt.lmder1()/lmdif1()/lmstr1() and - * HybridNonLinearSolver.hybrj1()/hybrd1() are specific methods from the original - * minpack package that you probably should NOT use until you are porting a code that - * was previously using minpack. They just define a 'simple' API with default values - * for some parameters. - * - * All algorithms are provided using Two APIs : - * - one where the user inits the algorithm, and uses '*OneStep()' as much as he wants : - * this way the caller have control over the steps - * - one where the user just calls a method (optimize() or solve()) which will - * handle the loop: init + loop until a stop condition is met. Those are provided for - * convenience. - * - * As an example, the method LevenbergMarquardt::minimize() is - * implemented as follow : - * \code - * Status LevenbergMarquardt<FunctorType,Scalar>::minimize(FVectorType &x, const int mode) - * { - * Status status = minimizeInit(x, mode); - * do { - * status = minimizeOneStep(x, mode); - * } while (status==Running); - * return status; - * } - * \endcode - * - * \section examples Examples - * - * The easiest way to understand how to use this module is by looking at the many examples in the file - * unsupported/test/NonLinearOptimization.cpp. - */ - -#ifndef EIGEN_PARSED_BY_DOXYGEN - -#include "src/NonLinearOptimization/qrsolv.h" -#include "src/NonLinearOptimization/r1updt.h" -#include "src/NonLinearOptimization/r1mpyq.h" -#include "src/NonLinearOptimization/rwupdt.h" -#include "src/NonLinearOptimization/fdjac1.h" -#include "src/NonLinearOptimization/lmpar.h" -#include "src/NonLinearOptimization/dogleg.h" -#include "src/NonLinearOptimization/covar.h" - -#include "src/NonLinearOptimization/chkder.h" - -#endif - -#include "src/NonLinearOptimization/HybridNonLinearSolver.h" -#include "src/NonLinearOptimization/LevenbergMarquardt.h" - - -#endif // EIGEN_NONLINEAROPTIMIZATION_MODULE diff --git a/eigen/unsupported/Eigen/NumericalDiff b/eigen/unsupported/Eigen/NumericalDiff deleted file mode 100644 index 433334c..0000000 --- a/eigen/unsupported/Eigen/NumericalDiff +++ /dev/null @@ -1,56 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_NUMERICALDIFF_MODULE -#define EIGEN_NUMERICALDIFF_MODULE - -#include <Eigen/Core> - -namespace Eigen { - -/** - * \defgroup NumericalDiff_Module Numerical differentiation module - * - * \code - * #include <unsupported/Eigen/NumericalDiff> - * \endcode - * - * See http://en.wikipedia.org/wiki/Numerical_differentiation - * - * Warning : this should NOT be confused with automatic differentiation, which - * is a different method and has its own module in Eigen : \ref - * AutoDiff_Module. - * - * Currently only "Forward" and "Central" schemes are implemented. Those - * are basic methods, and there exist some more elaborated way of - * computing such approximates. They are implemented using both - * proprietary and free software, and usually requires linking to an - * external library. It is very easy for you to write a functor - * using such software, and the purpose is quite orthogonal to what we - * want to achieve with Eigen. - * - * This is why we will not provide wrappers for every great numerical - * differentiation software that exist, but should rather stick with those - * basic ones, that still are useful for testing. - * - * Also, the \ref NonLinearOptimization_Module needs this in order to - * provide full features compatibility with the original (c)minpack - * package. - * - */ -} - -//@{ - -#include "src/NumericalDiff/NumericalDiff.h" - -//@} - - -#endif // EIGEN_NUMERICALDIFF_MODULE diff --git a/eigen/unsupported/Eigen/OpenGLSupport b/eigen/unsupported/Eigen/OpenGLSupport deleted file mode 100644 index 085325c..0000000 --- a/eigen/unsupported/Eigen/OpenGLSupport +++ /dev/null @@ -1,322 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_OPENGL_MODULE -#define EIGEN_OPENGL_MODULE - -#include <Eigen/Geometry> - -#if defined(__APPLE_CC__) - #include <OpenGL/gl.h> -#else - #include <GL/gl.h> -#endif - -namespace Eigen { - -/** - * \defgroup OpenGLSUpport_Module OpenGL Support module - * - * This module provides wrapper functions for a couple of OpenGL functions - * which simplify the way to pass Eigen's object to openGL. - * Here is an exmaple: - * - * \code - * // You need to add path_to_eigen/unsupported to your include path. - * #include <Eigen/OpenGLSupport> - * // ... - * Vector3f x, y; - * Matrix3f rot; - * - * glVertex(y + x * rot); - * - * Quaternion q; - * glRotate(q); - * - * // ... - * \endcode - * - */ -//@{ - -#define EIGEN_GL_FUNC_DECLARATION(FUNC) \ -namespace internal { \ - template< typename XprType, \ - typename Scalar = typename XprType::Scalar, \ - int Rows = XprType::RowsAtCompileTime, \ - int Cols = XprType::ColsAtCompileTime, \ - bool IsGLCompatible = bool(internal::evaluator<XprType>::Flags&LinearAccessBit) \ - && bool(XprType::Flags&DirectAccessBit) \ - && (XprType::IsVectorAtCompileTime || (XprType::Flags&RowMajorBit)==0)> \ - struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl); \ - \ - template<typename XprType, typename Scalar, int Rows, int Cols> \ - struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)<XprType,Scalar,Rows,Cols,false> { \ - inline static void run(const XprType& p) { \ - EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)<typename plain_matrix_type_column_major<XprType>::type>::run(p); } \ - }; \ -} \ - \ -template<typename Derived> inline void FUNC(const Eigen::DenseBase<Derived>& p) { \ - EIGEN_CAT(EIGEN_CAT(internal::gl_,FUNC),_impl)<Derived>::run(p.derived()); \ -} - - -#define EIGEN_GL_FUNC_SPECIALIZATION_MAT(FUNC,SCALAR,ROWS,COLS,SUFFIX) \ -namespace internal { \ - template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)<XprType, SCALAR, ROWS, COLS, true> { \ - inline static void run(const XprType& p) { FUNC##SUFFIX(p.data()); } \ - }; \ -} - - -#define EIGEN_GL_FUNC_SPECIALIZATION_VEC(FUNC,SCALAR,SIZE,SUFFIX) \ -namespace internal { \ - template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)<XprType, SCALAR, SIZE, 1, true> { \ - inline static void run(const XprType& p) { FUNC##SUFFIX(p.data()); } \ - }; \ - template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)<XprType, SCALAR, 1, SIZE, true> { \ - inline static void run(const XprType& p) { FUNC##SUFFIX(p.data()); } \ - }; \ -} - - -EIGEN_GL_FUNC_DECLARATION (glVertex) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,int, 2,2iv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,short, 2,2sv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,float, 2,2fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,double, 2,2dv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,int, 3,3iv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,short, 3,3sv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,float, 3,3fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,double, 3,3dv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,int, 4,4iv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,short, 4,4sv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,float, 4,4fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glVertex,double, 4,4dv) - -EIGEN_GL_FUNC_DECLARATION (glTexCoord) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,int, 2,2iv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,short, 2,2sv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,float, 2,2fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,double, 2,2dv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,int, 3,3iv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,short, 3,3sv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,float, 3,3fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,double, 3,3dv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,int, 4,4iv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,short, 4,4sv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,float, 4,4fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTexCoord,double, 4,4dv) - -EIGEN_GL_FUNC_DECLARATION (glColor) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,int, 2,2iv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,short, 2,2sv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,float, 2,2fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,double, 2,2dv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,int, 3,3iv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,short, 3,3sv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,float, 3,3fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,double, 3,3dv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,int, 4,4iv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,short, 4,4sv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,float, 4,4fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glColor,double, 4,4dv) - -EIGEN_GL_FUNC_DECLARATION (glNormal) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glNormal,int, 3,3iv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glNormal,short, 3,3sv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glNormal,float, 3,3fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glNormal,double, 3,3dv) - -inline void glScale2fv(const float* v) { glScalef(v[0], v[1], 1.f); } -inline void glScale2dv(const double* v) { glScaled(v[0], v[1], 1.0); } -inline void glScale3fv(const float* v) { glScalef(v[0], v[1], v[2]); } -inline void glScale3dv(const double* v) { glScaled(v[0], v[1], v[2]); } - -EIGEN_GL_FUNC_DECLARATION (glScale) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glScale,float, 2,2fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glScale,double, 2,2dv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glScale,float, 3,3fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glScale,double, 3,3dv) - -template<typename Scalar> void glScale(const UniformScaling<Scalar>& s) { glScale(Matrix<Scalar,3,1>::Constant(s.factor())); } - -inline void glTranslate2fv(const float* v) { glTranslatef(v[0], v[1], 0.f); } -inline void glTranslate2dv(const double* v) { glTranslated(v[0], v[1], 0.0); } -inline void glTranslate3fv(const float* v) { glTranslatef(v[0], v[1], v[2]); } -inline void glTranslate3dv(const double* v) { glTranslated(v[0], v[1], v[2]); } - -EIGEN_GL_FUNC_DECLARATION (glTranslate) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTranslate,float, 2,2fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTranslate,double, 2,2dv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTranslate,float, 3,3fv) -EIGEN_GL_FUNC_SPECIALIZATION_VEC(glTranslate,double, 3,3dv) - -template<typename Scalar> void glTranslate(const Translation<Scalar,2>& t) { glTranslate(t.vector()); } -template<typename Scalar> void glTranslate(const Translation<Scalar,3>& t) { glTranslate(t.vector()); } - -EIGEN_GL_FUNC_DECLARATION (glMultMatrix) -EIGEN_GL_FUNC_SPECIALIZATION_MAT(glMultMatrix,float, 4,4,f) -EIGEN_GL_FUNC_SPECIALIZATION_MAT(glMultMatrix,double, 4,4,d) - -template<typename Scalar> void glMultMatrix(const Transform<Scalar,3,Affine>& t) { glMultMatrix(t.matrix()); } -template<typename Scalar> void glMultMatrix(const Transform<Scalar,3,Projective>& t) { glMultMatrix(t.matrix()); } -template<typename Scalar> void glMultMatrix(const Transform<Scalar,3,AffineCompact>& t) { glMultMatrix(Transform<Scalar,3,Affine>(t).matrix()); } - -EIGEN_GL_FUNC_DECLARATION (glLoadMatrix) -EIGEN_GL_FUNC_SPECIALIZATION_MAT(glLoadMatrix,float, 4,4,f) -EIGEN_GL_FUNC_SPECIALIZATION_MAT(glLoadMatrix,double, 4,4,d) - -template<typename Scalar> void glLoadMatrix(const Transform<Scalar,3,Affine>& t) { glLoadMatrix(t.matrix()); } -template<typename Scalar> void glLoadMatrix(const Transform<Scalar,3,Projective>& t) { glLoadMatrix(t.matrix()); } -template<typename Scalar> void glLoadMatrix(const Transform<Scalar,3,AffineCompact>& t) { glLoadMatrix(Transform<Scalar,3,Affine>(t).matrix()); } - -inline void glRotate(const Rotation2D<float>& rot) -{ - glRotatef(rot.angle()*180.f/float(EIGEN_PI), 0.f, 0.f, 1.f); -} -inline void glRotate(const Rotation2D<double>& rot) -{ - glRotated(rot.angle()*180.0/double(EIGEN_PI), 0.0, 0.0, 1.0); -} - -template<typename Derived> void glRotate(const RotationBase<Derived,3>& rot) -{ - Transform<typename Derived::Scalar,3,Projective> tr(rot); - glMultMatrix(tr.matrix()); -} - -#define EIGEN_GL_MAKE_CONST_const const -#define EIGEN_GL_MAKE_CONST__ -#define EIGEN_GL_EVAL(X) X - -#define EIGEN_GL_FUNC1_DECLARATION(FUNC,ARG1,CONST) \ -namespace internal { \ - template< typename XprType, \ - typename Scalar = typename XprType::Scalar, \ - int Rows = XprType::RowsAtCompileTime, \ - int Cols = XprType::ColsAtCompileTime, \ - bool IsGLCompatible = bool(internal::evaluator<XprType>::Flags&LinearAccessBit) \ - && bool(XprType::Flags&DirectAccessBit) \ - && (XprType::IsVectorAtCompileTime || (XprType::Flags&RowMajorBit)==0)> \ - struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl); \ - \ - template<typename XprType, typename Scalar, int Rows, int Cols> \ - struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)<XprType,Scalar,Rows,Cols,false> { \ - inline static void run(ARG1 a,EIGEN_GL_EVAL(EIGEN_GL_MAKE_CONST_##CONST) XprType& p) { \ - EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)<typename plain_matrix_type_column_major<XprType>::type>::run(a,p); } \ - }; \ -} \ - \ -template<typename Derived> inline void FUNC(ARG1 a,EIGEN_GL_EVAL(EIGEN_GL_MAKE_CONST_##CONST) Eigen::DenseBase<Derived>& p) { \ - EIGEN_CAT(EIGEN_CAT(internal::gl_,FUNC),_impl)<Derived>::run(a,p.derived()); \ -} - - -#define EIGEN_GL_FUNC1_SPECIALIZATION_MAT(FUNC,ARG1,CONST,SCALAR,ROWS,COLS,SUFFIX) \ -namespace internal { \ - template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)<XprType, SCALAR, ROWS, COLS, true> { \ - inline static void run(ARG1 a, EIGEN_GL_EVAL(EIGEN_GL_MAKE_CONST_##CONST) XprType& p) { FUNC##SUFFIX(a,p.data()); } \ - }; \ -} - - -#define EIGEN_GL_FUNC1_SPECIALIZATION_VEC(FUNC,ARG1,CONST,SCALAR,SIZE,SUFFIX) \ -namespace internal { \ - template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)<XprType, SCALAR, SIZE, 1, true> { \ - inline static void run(ARG1 a, EIGEN_GL_EVAL(EIGEN_GL_MAKE_CONST_##CONST) XprType& p) { FUNC##SUFFIX(a,p.data()); } \ - }; \ - template< typename XprType> struct EIGEN_CAT(EIGEN_CAT(gl_,FUNC),_impl)<XprType, SCALAR, 1, SIZE, true> { \ - inline static void run(ARG1 a, EIGEN_GL_EVAL(EIGEN_GL_MAKE_CONST_##CONST) XprType& p) { FUNC##SUFFIX(a,p.data()); } \ - }; \ -} - -EIGEN_GL_FUNC1_DECLARATION (glGet,GLenum,_) -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glGet,GLenum,_,float, 4,4,Floatv) -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glGet,GLenum,_,double, 4,4,Doublev) - -// glUniform API - -#ifdef GL_VERSION_2_0 - -inline void glUniform2fv_ei (GLint loc, const float* v) { glUniform2fv(loc,1,v); } -inline void glUniform2iv_ei (GLint loc, const int* v) { glUniform2iv(loc,1,v); } - -inline void glUniform3fv_ei (GLint loc, const float* v) { glUniform3fv(loc,1,v); } -inline void glUniform3iv_ei (GLint loc, const int* v) { glUniform3iv(loc,1,v); } - -inline void glUniform4fv_ei (GLint loc, const float* v) { glUniform4fv(loc,1,v); } -inline void glUniform4iv_ei (GLint loc, const int* v) { glUniform4iv(loc,1,v); } - -inline void glUniformMatrix2fv_ei (GLint loc, const float* v) { glUniformMatrix2fv(loc,1,false,v); } -inline void glUniformMatrix3fv_ei (GLint loc, const float* v) { glUniformMatrix3fv(loc,1,false,v); } -inline void glUniformMatrix4fv_ei (GLint loc, const float* v) { glUniformMatrix4fv(loc,1,false,v); } - - -EIGEN_GL_FUNC1_DECLARATION (glUniform,GLint,const) -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,float, 2,2fv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,int, 2,2iv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,float, 3,3fv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,int, 3,3iv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,float, 4,4fv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,int, 4,4iv_ei) - -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 2,2,Matrix2fv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 3,3,Matrix3fv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 4,4,Matrix4fv_ei) - -#endif - -#ifdef GL_VERSION_2_1 - -inline void glUniformMatrix2x3fv_ei(GLint loc, const float* v) { glUniformMatrix2x3fv(loc,1,false,v); } -inline void glUniformMatrix3x2fv_ei(GLint loc, const float* v) { glUniformMatrix3x2fv(loc,1,false,v); } -inline void glUniformMatrix2x4fv_ei(GLint loc, const float* v) { glUniformMatrix2x4fv(loc,1,false,v); } -inline void glUniformMatrix4x2fv_ei(GLint loc, const float* v) { glUniformMatrix4x2fv(loc,1,false,v); } -inline void glUniformMatrix3x4fv_ei(GLint loc, const float* v) { glUniformMatrix3x4fv(loc,1,false,v); } -inline void glUniformMatrix4x3fv_ei(GLint loc, const float* v) { glUniformMatrix4x3fv(loc,1,false,v); } - -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 2,3,Matrix2x3fv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 3,2,Matrix3x2fv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 2,4,Matrix2x4fv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 4,2,Matrix4x2fv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 3,4,Matrix3x4fv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_MAT(glUniform,GLint,const,float, 4,3,Matrix4x3fv_ei) - -#endif - -#ifdef GL_VERSION_3_0 - -inline void glUniform2uiv_ei (GLint loc, const unsigned int* v) { glUniform2uiv(loc,1,v); } -inline void glUniform3uiv_ei (GLint loc, const unsigned int* v) { glUniform3uiv(loc,1,v); } -inline void glUniform4uiv_ei (GLint loc, const unsigned int* v) { glUniform4uiv(loc,1,v); } - -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,unsigned int, 2,2uiv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,unsigned int, 3,3uiv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,unsigned int, 4,4uiv_ei) - -#endif - -#ifdef GL_ARB_gpu_shader_fp64 -inline void glUniform2dv_ei (GLint loc, const double* v) { glUniform2dv(loc,1,v); } -inline void glUniform3dv_ei (GLint loc, const double* v) { glUniform3dv(loc,1,v); } -inline void glUniform4dv_ei (GLint loc, const double* v) { glUniform4dv(loc,1,v); } - -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,double, 2,2dv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,double, 3,3dv_ei) -EIGEN_GL_FUNC1_SPECIALIZATION_VEC(glUniform,GLint,const,double, 4,4dv_ei) -#endif - - -//@} - -} - -#endif // EIGEN_OPENGL_MODULE diff --git a/eigen/unsupported/Eigen/Polynomials b/eigen/unsupported/Eigen/Polynomials deleted file mode 100644 index cece563..0000000 --- a/eigen/unsupported/Eigen/Polynomials +++ /dev/null @@ -1,138 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_POLYNOMIALS_MODULE_H -#define EIGEN_POLYNOMIALS_MODULE_H - -#include <Eigen/Core> - -#include <Eigen/src/Core/util/DisableStupidWarnings.h> - -#include <Eigen/Eigenvalues> - -// Note that EIGEN_HIDE_HEAVY_CODE has to be defined per module -#if (defined EIGEN_EXTERN_INSTANTIATIONS) && (EIGEN_EXTERN_INSTANTIATIONS>=2) - #ifndef EIGEN_HIDE_HEAVY_CODE - #define EIGEN_HIDE_HEAVY_CODE - #endif -#elif defined EIGEN_HIDE_HEAVY_CODE - #undef EIGEN_HIDE_HEAVY_CODE -#endif - -/** - * \defgroup Polynomials_Module Polynomials module - * \brief This module provides a QR based polynomial solver. - * - * To use this module, add - * \code - * #include <unsupported/Eigen/Polynomials> - * \endcode - * at the start of your source file. - */ - -#include "src/Polynomials/PolynomialUtils.h" -#include "src/Polynomials/Companion.h" -#include "src/Polynomials/PolynomialSolver.h" - -/** - \page polynomials Polynomials defines functions for dealing with polynomials - and a QR based polynomial solver. - \ingroup Polynomials_Module - - The remainder of the page documents first the functions for evaluating, computing - polynomials, computing estimates about polynomials and next the QR based polynomial - solver. - - \section polynomialUtils convenient functions to deal with polynomials - \subsection roots_to_monicPolynomial - The function - \code - void roots_to_monicPolynomial( const RootVector& rv, Polynomial& poly ) - \endcode - computes the coefficients \f$ a_i \f$ of - - \f$ p(x) = a_0 + a_{1}x + ... + a_{n-1}x^{n-1} + x^n \f$ - - where \f$ p \f$ is known through its roots i.e. \f$ p(x) = (x-r_1)(x-r_2)...(x-r_n) \f$. - - \subsection poly_eval - The function - \code - T poly_eval( const Polynomials& poly, const T& x ) - \endcode - evaluates a polynomial at a given point using stabilized Hörner method. - - The following code: first computes the coefficients in the monomial basis of the monic polynomial that has the provided roots; - then, it evaluates the computed polynomial, using a stabilized Hörner method. - - \include PolynomialUtils1.cpp - Output: \verbinclude PolynomialUtils1.out - - \subsection Cauchy bounds - The function - \code - Real cauchy_max_bound( const Polynomial& poly ) - \endcode - provides a maximum bound (the Cauchy one: \f$C(p)\f$) for the absolute value of a root of the given polynomial i.e. - \f$ \forall r_i \f$ root of \f$ p(x) = \sum_{k=0}^d a_k x^k \f$, - \f$ |r_i| \le C(p) = \sum_{k=0}^{d} \left | \frac{a_k}{a_d} \right | \f$ - The leading coefficient \f$ p \f$: should be non zero \f$a_d \neq 0\f$. - - - The function - \code - Real cauchy_min_bound( const Polynomial& poly ) - \endcode - provides a minimum bound (the Cauchy one: \f$c(p)\f$) for the absolute value of a non zero root of the given polynomial i.e. - \f$ \forall r_i \neq 0 \f$ root of \f$ p(x) = \sum_{k=0}^d a_k x^k \f$, - \f$ |r_i| \ge c(p) = \left( \sum_{k=0}^{d} \left | \frac{a_k}{a_0} \right | \right)^{-1} \f$ - - - - - \section QR polynomial solver class - Computes the complex roots of a polynomial by computing the eigenvalues of the associated companion matrix with the QR algorithm. - - The roots of \f$ p(x) = a_0 + a_1 x + a_2 x^2 + a_{3} x^3 + x^4 \f$ are the eigenvalues of - \f$ - \left [ - \begin{array}{cccc} - 0 & 0 & 0 & a_0 \\ - 1 & 0 & 0 & a_1 \\ - 0 & 1 & 0 & a_2 \\ - 0 & 0 & 1 & a_3 - \end{array} \right ] - \f$ - - However, the QR algorithm is not guaranteed to converge when there are several eigenvalues with same modulus. - - Therefore the current polynomial solver is guaranteed to provide a correct result only when the complex roots \f$r_1,r_2,...,r_d\f$ have distinct moduli i.e. - - \f$ \forall i,j \in [1;d],~ \| r_i \| \neq \| r_j \| \f$. - - With 32bit (float) floating types this problem shows up frequently. - However, almost always, correct accuracy is reached even in these cases for 64bit - (double) floating types and small polynomial degree (<20). - - \include PolynomialSolver1.cpp - - In the above example: - - -# a simple use of the polynomial solver is shown; - -# the accuracy problem with the QR algorithm is presented: a polynomial with almost conjugate roots is provided to the solver. - Those roots have almost same module therefore the QR algorithm failed to converge: the accuracy - of the last root is bad; - -# a simple way to circumvent the problem is shown: use doubles instead of floats. - - Output: \verbinclude PolynomialSolver1.out -*/ - -#include <Eigen/src/Core/util/ReenableStupidWarnings.h> - -#endif // EIGEN_POLYNOMIALS_MODULE_H -/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/eigen/unsupported/Eigen/Skyline b/eigen/unsupported/Eigen/Skyline deleted file mode 100644 index 71a68cb..0000000 --- a/eigen/unsupported/Eigen/Skyline +++ /dev/null @@ -1,39 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SKYLINE_MODULE_H -#define EIGEN_SKYLINE_MODULE_H - - -#include "Eigen/Core" - -#include "Eigen/src/Core/util/DisableStupidWarnings.h" - -#include <map> -#include <cstdlib> -#include <cstring> -#include <algorithm> - -/** - * \defgroup Skyline_Module Skyline module - * - * - * - * - */ - -#include "src/Skyline/SkylineUtil.h" -#include "src/Skyline/SkylineMatrixBase.h" -#include "src/Skyline/SkylineStorage.h" -#include "src/Skyline/SkylineMatrix.h" -#include "src/Skyline/SkylineInplaceLU.h" -#include "src/Skyline/SkylineProduct.h" - -#include "Eigen/src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_SKYLINE_MODULE_H diff --git a/eigen/unsupported/Eigen/SparseExtra b/eigen/unsupported/Eigen/SparseExtra deleted file mode 100644 index 819cffa..0000000 --- a/eigen/unsupported/Eigen/SparseExtra +++ /dev/null @@ -1,53 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_EXTRA_MODULE_H -#define EIGEN_SPARSE_EXTRA_MODULE_H - -#include "../../Eigen/Sparse" - -#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" - -#include <vector> -#include <map> -#include <cstdlib> -#include <cstring> -#include <algorithm> -#include <fstream> -#include <sstream> - -#ifdef EIGEN_GOOGLEHASH_SUPPORT - #include <google/dense_hash_map> -#endif - -/** - * \defgroup SparseExtra_Module SparseExtra module - * - * This module contains some experimental features extending the sparse module. - * - * \code - * #include <Eigen/SparseExtra> - * \endcode - */ - - -#include "src/SparseExtra/DynamicSparseMatrix.h" -#include "src/SparseExtra/BlockOfDynamicSparseMatrix.h" -#include "src/SparseExtra/RandomSetter.h" - -#include "src/SparseExtra/MarketIO.h" - -#if !defined(_WIN32) -#include <dirent.h> -#include "src/SparseExtra/MatrixMarketIterator.h" -#endif - -#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_SPARSE_EXTRA_MODULE_H diff --git a/eigen/unsupported/Eigen/SpecialFunctions b/eigen/unsupported/Eigen/SpecialFunctions deleted file mode 100644 index a2ad492..0000000 --- a/eigen/unsupported/Eigen/SpecialFunctions +++ /dev/null @@ -1,63 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Gael Guennebaud <g.gael@free.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPECIALFUNCTIONS_MODULE -#define EIGEN_SPECIALFUNCTIONS_MODULE - -#include <math.h> - -#include "../../Eigen/Core" - -#include "../../Eigen/src/Core/util/DisableStupidWarnings.h" - -namespace Eigen { - -/** - * \defgroup SpecialFunctions_Module Special math functions module - * - * This module features additional coefficient-wise math functions available - * within the numext:: namespace for the scalar version, and as method and/or free - * functions of Array. Those include: - * - * - erf - * - erfc - * - lgamma - * - igamma - * - igammac - * - digamma - * - polygamma - * - zeta - * - betainc - * - * \code - * #include <unsupported/Eigen/SpecialFunctions> - * \endcode - */ -//@{ - -} - -#include "src/SpecialFunctions/SpecialFunctionsImpl.h" -#include "src/SpecialFunctions/SpecialFunctionsPacketMath.h" -#include "src/SpecialFunctions/SpecialFunctionsHalf.h" -#include "src/SpecialFunctions/SpecialFunctionsFunctors.h" -#include "src/SpecialFunctions/SpecialFunctionsArrayAPI.h" - -#if defined EIGEN_VECTORIZE_CUDA - #include "src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h" -#endif - -namespace Eigen { -//@} -} - - -#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_SPECIALFUNCTIONS_MODULE diff --git a/eigen/unsupported/Eigen/Splines b/eigen/unsupported/Eigen/Splines deleted file mode 100644 index 322e6b9..0000000 --- a/eigen/unsupported/Eigen/Splines +++ /dev/null @@ -1,31 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 20010-2011 Hauke Heibel <hauke.heibel@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPLINES_MODULE_H -#define EIGEN_SPLINES_MODULE_H - -namespace Eigen -{ -/** - * \defgroup Splines_Module Spline and spline fitting module - * - * This module provides a simple multi-dimensional spline class while - * offering most basic functionality to fit a spline to point sets. - * - * \code - * #include <unsupported/Eigen/Splines> - * \endcode - */ -} - -#include "src/Splines/SplineFwd.h" -#include "src/Splines/Spline.h" -#include "src/Splines/SplineFitting.h" - -#endif // EIGEN_SPLINES_MODULE_H diff --git a/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h deleted file mode 100644 index 33b6c39..0000000 --- a/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +++ /dev/null @@ -1,108 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_AUTODIFF_JACOBIAN_H -#define EIGEN_AUTODIFF_JACOBIAN_H - -namespace Eigen -{ - -template<typename Functor> class AutoDiffJacobian : public Functor -{ -public: - AutoDiffJacobian() : Functor() {} - AutoDiffJacobian(const Functor& f) : Functor(f) {} - - // forward constructors -#if EIGEN_HAS_VARIADIC_TEMPLATES - template<typename... T> - AutoDiffJacobian(const T& ...Values) : Functor(Values...) {} -#else - template<typename T0> - AutoDiffJacobian(const T0& a0) : Functor(a0) {} - template<typename T0, typename T1> - AutoDiffJacobian(const T0& a0, const T1& a1) : Functor(a0, a1) {} - template<typename T0, typename T1, typename T2> - AutoDiffJacobian(const T0& a0, const T1& a1, const T2& a2) : Functor(a0, a1, a2) {} -#endif - - typedef typename Functor::InputType InputType; - typedef typename Functor::ValueType ValueType; - typedef typename ValueType::Scalar Scalar; - - enum { - InputsAtCompileTime = InputType::RowsAtCompileTime, - ValuesAtCompileTime = ValueType::RowsAtCompileTime - }; - - typedef Matrix<Scalar, ValuesAtCompileTime, InputsAtCompileTime> JacobianType; - typedef typename JacobianType::Index Index; - - typedef Matrix<Scalar, InputsAtCompileTime, 1> DerivativeType; - typedef AutoDiffScalar<DerivativeType> ActiveScalar; - - typedef Matrix<ActiveScalar, InputsAtCompileTime, 1> ActiveInput; - typedef Matrix<ActiveScalar, ValuesAtCompileTime, 1> ActiveValue; - -#if EIGEN_HAS_VARIADIC_TEMPLATES - // Some compilers don't accept variadic parameters after a default parameter, - // i.e., we can't just write _jac=0 but we need to overload operator(): - EIGEN_STRONG_INLINE - void operator() (const InputType& x, ValueType* v) const - { - this->operator()(x, v, 0); - } - template<typename... ParamsType> - void operator() (const InputType& x, ValueType* v, JacobianType* _jac, - const ParamsType&... Params) const -#else - void operator() (const InputType& x, ValueType* v, JacobianType* _jac=0) const -#endif - { - eigen_assert(v!=0); - - if (!_jac) - { -#if EIGEN_HAS_VARIADIC_TEMPLATES - Functor::operator()(x, v, Params...); -#else - Functor::operator()(x, v); -#endif - return; - } - - JacobianType& jac = *_jac; - - ActiveInput ax = x.template cast<ActiveScalar>(); - ActiveValue av(jac.rows()); - - if(InputsAtCompileTime==Dynamic) - for (Index j=0; j<jac.rows(); j++) - av[j].derivatives().resize(x.rows()); - - for (Index i=0; i<jac.cols(); i++) - ax[i].derivatives() = DerivativeType::Unit(x.rows(),i); - -#if EIGEN_HAS_VARIADIC_TEMPLATES - Functor::operator()(ax, &av, Params...); -#else - Functor::operator()(ax, &av); -#endif - - for (Index i=0; i<jac.rows(); i++) - { - (*v)[i] = av[i].value(); - jac.row(i) = av[i].derivatives(); - } - } -}; - -} - -#endif // EIGEN_AUTODIFF_JACOBIAN_H diff --git a/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h deleted file mode 100644 index 2f50e99..0000000 --- a/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ /dev/null @@ -1,694 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_AUTODIFF_SCALAR_H -#define EIGEN_AUTODIFF_SCALAR_H - -namespace Eigen { - -namespace internal { - -template<typename A, typename B> -struct make_coherent_impl { - static void run(A&, B&) {} -}; - -// resize a to match b is a.size()==0, and conversely. -template<typename A, typename B> -void make_coherent(const A& a, const B&b) -{ - make_coherent_impl<A,B>::run(a.const_cast_derived(), b.const_cast_derived()); -} - -template<typename _DerType, bool Enable> struct auto_diff_special_op; - -} // end namespace internal - -template<typename _DerType> class AutoDiffScalar; - -template<typename NewDerType> -inline AutoDiffScalar<NewDerType> MakeAutoDiffScalar(const typename NewDerType::Scalar& value, const NewDerType &der) { - return AutoDiffScalar<NewDerType>(value,der); -} - -/** \class AutoDiffScalar - * \brief A scalar type replacement with automatic differentation capability - * - * \param _DerType the vector type used to store/represent the derivatives. The base scalar type - * as well as the number of derivatives to compute are determined from this type. - * Typical choices include, e.g., \c Vector4f for 4 derivatives, or \c VectorXf - * if the number of derivatives is not known at compile time, and/or, the number - * of derivatives is large. - * Note that _DerType can also be a reference (e.g., \c VectorXf&) to wrap a - * existing vector into an AutoDiffScalar. - * Finally, _DerType can also be any Eigen compatible expression. - * - * This class represents a scalar value while tracking its respective derivatives using Eigen's expression - * template mechanism. - * - * It supports the following list of global math function: - * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, - * - internal::abs, internal::sqrt, numext::pow, internal::exp, internal::log, internal::sin, internal::cos, - * - internal::conj, internal::real, internal::imag, numext::abs2. - * - * AutoDiffScalar can be used as the scalar type of an Eigen::Matrix object. However, - * in that case, the expression template mechanism only occurs at the top Matrix level, - * while derivatives are computed right away. - * - */ - -template<typename _DerType> -class AutoDiffScalar - : public internal::auto_diff_special_op - <_DerType, !internal::is_same<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar, - typename NumTraits<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar>::Real>::value> -{ - public: - typedef internal::auto_diff_special_op - <_DerType, !internal::is_same<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar, - typename NumTraits<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar>::Real>::value> Base; - typedef typename internal::remove_all<_DerType>::type DerType; - typedef typename internal::traits<DerType>::Scalar Scalar; - typedef typename NumTraits<Scalar>::Real Real; - - using Base::operator+; - using Base::operator*; - - /** Default constructor without any initialization. */ - AutoDiffScalar() {} - - /** Constructs an active scalar from its \a value, - and initializes the \a nbDer derivatives such that it corresponds to the \a derNumber -th variable */ - AutoDiffScalar(const Scalar& value, int nbDer, int derNumber) - : m_value(value), m_derivatives(DerType::Zero(nbDer)) - { - m_derivatives.coeffRef(derNumber) = Scalar(1); - } - - /** Conversion from a scalar constant to an active scalar. - * The derivatives are set to zero. */ - /*explicit*/ AutoDiffScalar(const Real& value) - : m_value(value) - { - if(m_derivatives.size()>0) - m_derivatives.setZero(); - } - - /** Constructs an active scalar from its \a value and derivatives \a der */ - AutoDiffScalar(const Scalar& value, const DerType& der) - : m_value(value), m_derivatives(der) - {} - - template<typename OtherDerType> - AutoDiffScalar(const AutoDiffScalar<OtherDerType>& other -#ifndef EIGEN_PARSED_BY_DOXYGEN - , typename internal::enable_if< - internal::is_same<Scalar, typename internal::traits<typename internal::remove_all<OtherDerType>::type>::Scalar>::value - && internal::is_convertible<OtherDerType,DerType>::value , void*>::type = 0 -#endif - ) - : m_value(other.value()), m_derivatives(other.derivatives()) - {} - - friend std::ostream & operator << (std::ostream & s, const AutoDiffScalar& a) - { - return s << a.value(); - } - - AutoDiffScalar(const AutoDiffScalar& other) - : m_value(other.value()), m_derivatives(other.derivatives()) - {} - - template<typename OtherDerType> - inline AutoDiffScalar& operator=(const AutoDiffScalar<OtherDerType>& other) - { - m_value = other.value(); - m_derivatives = other.derivatives(); - return *this; - } - - inline AutoDiffScalar& operator=(const AutoDiffScalar& other) - { - m_value = other.value(); - m_derivatives = other.derivatives(); - return *this; - } - - inline AutoDiffScalar& operator=(const Scalar& other) - { - m_value = other; - if(m_derivatives.size()>0) - m_derivatives.setZero(); - return *this; - } - -// inline operator const Scalar& () const { return m_value; } -// inline operator Scalar& () { return m_value; } - - inline const Scalar& value() const { return m_value; } - inline Scalar& value() { return m_value; } - - inline const DerType& derivatives() const { return m_derivatives; } - inline DerType& derivatives() { return m_derivatives; } - - inline bool operator< (const Scalar& other) const { return m_value < other; } - inline bool operator<=(const Scalar& other) const { return m_value <= other; } - inline bool operator> (const Scalar& other) const { return m_value > other; } - inline bool operator>=(const Scalar& other) const { return m_value >= other; } - inline bool operator==(const Scalar& other) const { return m_value == other; } - inline bool operator!=(const Scalar& other) const { return m_value != other; } - - friend inline bool operator< (const Scalar& a, const AutoDiffScalar& b) { return a < b.value(); } - friend inline bool operator<=(const Scalar& a, const AutoDiffScalar& b) { return a <= b.value(); } - friend inline bool operator> (const Scalar& a, const AutoDiffScalar& b) { return a > b.value(); } - friend inline bool operator>=(const Scalar& a, const AutoDiffScalar& b) { return a >= b.value(); } - friend inline bool operator==(const Scalar& a, const AutoDiffScalar& b) { return a == b.value(); } - friend inline bool operator!=(const Scalar& a, const AutoDiffScalar& b) { return a != b.value(); } - - template<typename OtherDerType> inline bool operator< (const AutoDiffScalar<OtherDerType>& b) const { return m_value < b.value(); } - template<typename OtherDerType> inline bool operator<=(const AutoDiffScalar<OtherDerType>& b) const { return m_value <= b.value(); } - template<typename OtherDerType> inline bool operator> (const AutoDiffScalar<OtherDerType>& b) const { return m_value > b.value(); } - template<typename OtherDerType> inline bool operator>=(const AutoDiffScalar<OtherDerType>& b) const { return m_value >= b.value(); } - template<typename OtherDerType> inline bool operator==(const AutoDiffScalar<OtherDerType>& b) const { return m_value == b.value(); } - template<typename OtherDerType> inline bool operator!=(const AutoDiffScalar<OtherDerType>& b) const { return m_value != b.value(); } - - inline const AutoDiffScalar<DerType&> operator+(const Scalar& other) const - { - return AutoDiffScalar<DerType&>(m_value + other, m_derivatives); - } - - friend inline const AutoDiffScalar<DerType&> operator+(const Scalar& a, const AutoDiffScalar& b) - { - return AutoDiffScalar<DerType&>(a + b.value(), b.derivatives()); - } - -// inline const AutoDiffScalar<DerType&> operator+(const Real& other) const -// { -// return AutoDiffScalar<DerType&>(m_value + other, m_derivatives); -// } - -// friend inline const AutoDiffScalar<DerType&> operator+(const Real& a, const AutoDiffScalar& b) -// { -// return AutoDiffScalar<DerType&>(a + b.value(), b.derivatives()); -// } - - inline AutoDiffScalar& operator+=(const Scalar& other) - { - value() += other; - return *this; - } - - template<typename OtherDerType> - inline const AutoDiffScalar<CwiseBinaryOp<internal::scalar_sum_op<Scalar>,const DerType,const typename internal::remove_all<OtherDerType>::type> > - operator+(const AutoDiffScalar<OtherDerType>& other) const - { - internal::make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar<CwiseBinaryOp<internal::scalar_sum_op<Scalar>,const DerType,const typename internal::remove_all<OtherDerType>::type> >( - m_value + other.value(), - m_derivatives + other.derivatives()); - } - - template<typename OtherDerType> - inline AutoDiffScalar& - operator+=(const AutoDiffScalar<OtherDerType>& other) - { - (*this) = (*this) + other; - return *this; - } - - inline const AutoDiffScalar<DerType&> operator-(const Scalar& b) const - { - return AutoDiffScalar<DerType&>(m_value - b, m_derivatives); - } - - friend inline const AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType> > - operator-(const Scalar& a, const AutoDiffScalar& b) - { - return AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType> > - (a - b.value(), -b.derivatives()); - } - - inline AutoDiffScalar& operator-=(const Scalar& other) - { - value() -= other; - return *this; - } - - template<typename OtherDerType> - inline const AutoDiffScalar<CwiseBinaryOp<internal::scalar_difference_op<Scalar>, const DerType,const typename internal::remove_all<OtherDerType>::type> > - operator-(const AutoDiffScalar<OtherDerType>& other) const - { - internal::make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar<CwiseBinaryOp<internal::scalar_difference_op<Scalar>, const DerType,const typename internal::remove_all<OtherDerType>::type> >( - m_value - other.value(), - m_derivatives - other.derivatives()); - } - - template<typename OtherDerType> - inline AutoDiffScalar& - operator-=(const AutoDiffScalar<OtherDerType>& other) - { - *this = *this - other; - return *this; - } - - inline const AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType> > - operator-() const - { - return AutoDiffScalar<CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const DerType> >( - -m_value, - -m_derivatives); - } - - inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) > - operator*(const Scalar& other) const - { - return MakeAutoDiffScalar(m_value * other, m_derivatives * other); - } - - friend inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) > - operator*(const Scalar& other, const AutoDiffScalar& a) - { - return MakeAutoDiffScalar(a.value() * other, a.derivatives() * other); - } - -// inline const AutoDiffScalar<typename CwiseUnaryOp<internal::scalar_multiple_op<Real>, DerType>::Type > -// operator*(const Real& other) const -// { -// return AutoDiffScalar<typename CwiseUnaryOp<internal::scalar_multiple_op<Real>, DerType>::Type >( -// m_value * other, -// (m_derivatives * other)); -// } -// -// friend inline const AutoDiffScalar<typename CwiseUnaryOp<internal::scalar_multiple_op<Real>, DerType>::Type > -// operator*(const Real& other, const AutoDiffScalar& a) -// { -// return AutoDiffScalar<typename CwiseUnaryOp<internal::scalar_multiple_op<Real>, DerType>::Type >( -// a.value() * other, -// a.derivatives() * other); -// } - - inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) > - operator/(const Scalar& other) const - { - return MakeAutoDiffScalar(m_value / other, (m_derivatives * (Scalar(1)/other))); - } - - friend inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) > - operator/(const Scalar& other, const AutoDiffScalar& a) - { - return MakeAutoDiffScalar(other / a.value(), a.derivatives() * (Scalar(-other) / (a.value()*a.value()))); - } - -// inline const AutoDiffScalar<typename CwiseUnaryOp<internal::scalar_multiple_op<Real>, DerType>::Type > -// operator/(const Real& other) const -// { -// return AutoDiffScalar<typename CwiseUnaryOp<internal::scalar_multiple_op<Real>, DerType>::Type >( -// m_value / other, -// (m_derivatives * (Real(1)/other))); -// } -// -// friend inline const AutoDiffScalar<typename CwiseUnaryOp<internal::scalar_multiple_op<Real>, DerType>::Type > -// operator/(const Real& other, const AutoDiffScalar& a) -// { -// return AutoDiffScalar<typename CwiseUnaryOp<internal::scalar_multiple_op<Real>, DerType>::Type >( -// other / a.value(), -// a.derivatives() * (-Real(1)/other)); -// } - - template<typename OtherDerType> - inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE( - CwiseBinaryOp<internal::scalar_difference_op<Scalar> EIGEN_COMMA - const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) EIGEN_COMMA - const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all<OtherDerType>::type,Scalar,product) >,Scalar,product) > - operator/(const AutoDiffScalar<OtherDerType>& other) const - { - internal::make_coherent(m_derivatives, other.derivatives()); - return MakeAutoDiffScalar( - m_value / other.value(), - ((m_derivatives * other.value()) - (other.derivatives() * m_value)) - * (Scalar(1)/(other.value()*other.value()))); - } - - template<typename OtherDerType> - inline const AutoDiffScalar<CwiseBinaryOp<internal::scalar_sum_op<Scalar>, - const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product), - const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all<OtherDerType>::type,Scalar,product) > > - operator*(const AutoDiffScalar<OtherDerType>& other) const - { - internal::make_coherent(m_derivatives, other.derivatives()); - return MakeAutoDiffScalar( - m_value * other.value(), - (m_derivatives * other.value()) + (other.derivatives() * m_value)); - } - - inline AutoDiffScalar& operator*=(const Scalar& other) - { - *this = *this * other; - return *this; - } - - template<typename OtherDerType> - inline AutoDiffScalar& operator*=(const AutoDiffScalar<OtherDerType>& other) - { - *this = *this * other; - return *this; - } - - inline AutoDiffScalar& operator/=(const Scalar& other) - { - *this = *this / other; - return *this; - } - - template<typename OtherDerType> - inline AutoDiffScalar& operator/=(const AutoDiffScalar<OtherDerType>& other) - { - *this = *this / other; - return *this; - } - - protected: - Scalar m_value; - DerType m_derivatives; - -}; - -namespace internal { - -template<typename _DerType> -struct auto_diff_special_op<_DerType, true> -// : auto_diff_scalar_op<_DerType, typename NumTraits<Scalar>::Real, -// is_same<Scalar,typename NumTraits<Scalar>::Real>::value> -{ - typedef typename remove_all<_DerType>::type DerType; - typedef typename traits<DerType>::Scalar Scalar; - typedef typename NumTraits<Scalar>::Real Real; - -// typedef auto_diff_scalar_op<_DerType, typename NumTraits<Scalar>::Real, -// is_same<Scalar,typename NumTraits<Scalar>::Real>::value> Base; - -// using Base::operator+; -// using Base::operator+=; -// using Base::operator-; -// using Base::operator-=; -// using Base::operator*; -// using Base::operator*=; - - const AutoDiffScalar<_DerType>& derived() const { return *static_cast<const AutoDiffScalar<_DerType>*>(this); } - AutoDiffScalar<_DerType>& derived() { return *static_cast<AutoDiffScalar<_DerType>*>(this); } - - - inline const AutoDiffScalar<DerType&> operator+(const Real& other) const - { - return AutoDiffScalar<DerType&>(derived().value() + other, derived().derivatives()); - } - - friend inline const AutoDiffScalar<DerType&> operator+(const Real& a, const AutoDiffScalar<_DerType>& b) - { - return AutoDiffScalar<DerType&>(a + b.value(), b.derivatives()); - } - - inline AutoDiffScalar<_DerType>& operator+=(const Real& other) - { - derived().value() += other; - return derived(); - } - - - inline const AutoDiffScalar<typename CwiseUnaryOp<bind2nd_op<scalar_product_op<Scalar,Real> >, DerType>::Type > - operator*(const Real& other) const - { - return AutoDiffScalar<typename CwiseUnaryOp<bind2nd_op<scalar_product_op<Scalar,Real> >, DerType>::Type >( - derived().value() * other, - derived().derivatives() * other); - } - - friend inline const AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type > - operator*(const Real& other, const AutoDiffScalar<_DerType>& a) - { - return AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type >( - a.value() * other, - a.derivatives() * other); - } - - inline AutoDiffScalar<_DerType>& operator*=(const Scalar& other) - { - *this = *this * other; - return derived(); - } -}; - -template<typename _DerType> -struct auto_diff_special_op<_DerType, false> -{ - void operator*() const; - void operator-() const; - void operator+() const; -}; - -template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols, typename B> -struct make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>, B> { - typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> A; - static void run(A& a, B& b) { - if((A_Rows==Dynamic || A_Cols==Dynamic) && (a.size()==0)) - { - a.resize(b.size()); - a.setZero(); - } - } -}; - -template<typename A, typename B_Scalar, int B_Rows, int B_Cols, int B_Options, int B_MaxRows, int B_MaxCols> -struct make_coherent_impl<A, Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols> > { - typedef Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols> B; - static void run(A& a, B& b) { - if((B_Rows==Dynamic || B_Cols==Dynamic) && (b.size()==0)) - { - b.resize(a.size()); - b.setZero(); - } - } -}; - -template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols, - typename B_Scalar, int B_Rows, int B_Cols, int B_Options, int B_MaxRows, int B_MaxCols> -struct make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>, - Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols> > { - typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> A; - typedef Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols> B; - static void run(A& a, B& b) { - if((A_Rows==Dynamic || A_Cols==Dynamic) && (a.size()==0)) - { - a.resize(b.size()); - a.setZero(); - } - else if((B_Rows==Dynamic || B_Cols==Dynamic) && (b.size()==0)) - { - b.resize(a.size()); - b.setZero(); - } - } -}; - -} // end namespace internal - -template<typename DerType, typename BinOp> -struct ScalarBinaryOpTraits<AutoDiffScalar<DerType>,typename DerType::Scalar,BinOp> -{ - typedef AutoDiffScalar<DerType> ReturnType; -}; - -template<typename DerType, typename BinOp> -struct ScalarBinaryOpTraits<typename DerType::Scalar,AutoDiffScalar<DerType>, BinOp> -{ - typedef AutoDiffScalar<DerType> ReturnType; -}; - - -// The following is an attempt to let Eigen's known about expression template, but that's more tricky! - -// template<typename DerType, typename BinOp> -// struct ScalarBinaryOpTraits<AutoDiffScalar<DerType>,AutoDiffScalar<DerType>, BinOp> -// { -// enum { Defined = 1 }; -// typedef AutoDiffScalar<typename DerType::PlainObject> ReturnType; -// }; -// -// template<typename DerType1,typename DerType2, typename BinOp> -// struct ScalarBinaryOpTraits<AutoDiffScalar<DerType1>,AutoDiffScalar<DerType2>, BinOp> -// { -// enum { Defined = 1 };//internal::is_same<typename DerType1::Scalar,typename DerType2::Scalar>::value }; -// typedef AutoDiffScalar<typename DerType1::PlainObject> ReturnType; -// }; - -#define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \ - template<typename DerType> \ - inline const Eigen::AutoDiffScalar< \ - EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename Eigen::internal::remove_all<DerType>::type, typename Eigen::internal::traits<typename Eigen::internal::remove_all<DerType>::type>::Scalar, product) > \ - FUNC(const Eigen::AutoDiffScalar<DerType>& x) { \ - using namespace Eigen; \ - typedef typename Eigen::internal::traits<typename Eigen::internal::remove_all<DerType>::type>::Scalar Scalar; \ - EIGEN_UNUSED_VARIABLE(sizeof(Scalar)); \ - CODE; \ - } - -template<typename DerType> -inline const AutoDiffScalar<DerType>& conj(const AutoDiffScalar<DerType>& x) { return x; } -template<typename DerType> -inline const AutoDiffScalar<DerType>& real(const AutoDiffScalar<DerType>& x) { return x; } -template<typename DerType> -inline typename DerType::Scalar imag(const AutoDiffScalar<DerType>&) { return 0.; } -template<typename DerType, typename T> -inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (min)(const AutoDiffScalar<DerType>& x, const T& y) { - typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> ADS; - return (x <= y ? ADS(x) : ADS(y)); -} -template<typename DerType, typename T> -inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (max)(const AutoDiffScalar<DerType>& x, const T& y) { - typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> ADS; - return (x >= y ? ADS(x) : ADS(y)); -} -template<typename DerType, typename T> -inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (min)(const T& x, const AutoDiffScalar<DerType>& y) { - typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> ADS; - return (x < y ? ADS(x) : ADS(y)); -} -template<typename DerType, typename T> -inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (max)(const T& x, const AutoDiffScalar<DerType>& y) { - typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> ADS; - return (x > y ? ADS(x) : ADS(y)); -} -template<typename DerType> -inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (min)(const AutoDiffScalar<DerType>& x, const AutoDiffScalar<DerType>& y) { - return (x.value() < y.value() ? x : y); -} -template<typename DerType> -inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (max)(const AutoDiffScalar<DerType>& x, const AutoDiffScalar<DerType>& y) { - return (x.value() >= y.value() ? x : y); -} - - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs, - using std::abs; - return Eigen::MakeAutoDiffScalar(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs2, - using numext::abs2; - return Eigen::MakeAutoDiffScalar(abs2(x.value()), x.derivatives() * (Scalar(2)*x.value()));) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sqrt, - using std::sqrt; - Scalar sqrtx = sqrt(x.value()); - return Eigen::MakeAutoDiffScalar(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cos, - using std::cos; - using std::sin; - return Eigen::MakeAutoDiffScalar(cos(x.value()), x.derivatives() * (-sin(x.value())));) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sin, - using std::sin; - using std::cos; - return Eigen::MakeAutoDiffScalar(sin(x.value()),x.derivatives() * cos(x.value()));) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(exp, - using std::exp; - Scalar expx = exp(x.value()); - return Eigen::MakeAutoDiffScalar(expx,x.derivatives() * expx);) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(log, - using std::log; - return Eigen::MakeAutoDiffScalar(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));) - -template<typename DerType> -inline const Eigen::AutoDiffScalar< -EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all<DerType>::type,typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar,product) > -pow(const Eigen::AutoDiffScalar<DerType> &x, const typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar &y) -{ - using namespace Eigen; - using std::pow; - return Eigen::MakeAutoDiffScalar(pow(x.value(),y), x.derivatives() * (y * pow(x.value(),y-1))); -} - - -template<typename DerTypeA,typename DerTypeB> -inline const AutoDiffScalar<Matrix<typename internal::traits<typename internal::remove_all<DerTypeA>::type>::Scalar,Dynamic,1> > -atan2(const AutoDiffScalar<DerTypeA>& a, const AutoDiffScalar<DerTypeB>& b) -{ - using std::atan2; - typedef typename internal::traits<typename internal::remove_all<DerTypeA>::type>::Scalar Scalar; - typedef AutoDiffScalar<Matrix<Scalar,Dynamic,1> > PlainADS; - PlainADS ret; - ret.value() = atan2(a.value(), b.value()); - - Scalar squared_hypot = a.value() * a.value() + b.value() * b.value(); - - // if (squared_hypot==0) the derivation is undefined and the following results in a NaN: - ret.derivatives() = (a.derivatives() * b.value() - a.value() * b.derivatives()) / squared_hypot; - - return ret; -} - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(tan, - using std::tan; - using std::cos; - return Eigen::MakeAutoDiffScalar(tan(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cos(x.value()))));) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(asin, - using std::sqrt; - using std::asin; - return Eigen::MakeAutoDiffScalar(asin(x.value()),x.derivatives() * (Scalar(1)/sqrt(1-numext::abs2(x.value()))));) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(acos, - using std::sqrt; - using std::acos; - return Eigen::MakeAutoDiffScalar(acos(x.value()),x.derivatives() * (Scalar(-1)/sqrt(1-numext::abs2(x.value()))));) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(tanh, - using std::cosh; - using std::tanh; - return Eigen::MakeAutoDiffScalar(tanh(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cosh(x.value()))));) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sinh, - using std::sinh; - using std::cosh; - return Eigen::MakeAutoDiffScalar(sinh(x.value()),x.derivatives() * cosh(x.value()));) - -EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cosh, - using std::sinh; - using std::cosh; - return Eigen::MakeAutoDiffScalar(cosh(x.value()),x.derivatives() * sinh(x.value()));) - -#undef EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY - -template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> > - : NumTraits< typename NumTraits<typename internal::remove_all<DerType>::type::Scalar>::Real > -{ - typedef typename internal::remove_all<DerType>::type DerTypeCleaned; - typedef AutoDiffScalar<Matrix<typename NumTraits<typename DerTypeCleaned::Scalar>::Real,DerTypeCleaned::RowsAtCompileTime,DerTypeCleaned::ColsAtCompileTime, - 0, DerTypeCleaned::MaxRowsAtCompileTime, DerTypeCleaned::MaxColsAtCompileTime> > Real; - typedef AutoDiffScalar<DerType> NonInteger; - typedef AutoDiffScalar<DerType> Nested; - typedef typename NumTraits<typename DerTypeCleaned::Scalar>::Literal Literal; - enum{ - RequireInitialization = 1 - }; -}; - -} - -namespace std { -template <typename T> -class numeric_limits<Eigen::AutoDiffScalar<T> > - : public numeric_limits<typename T::Scalar> {}; - -} // namespace std - -#endif // EIGEN_AUTODIFF_SCALAR_H diff --git a/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h b/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h deleted file mode 100644 index 8c2d048..0000000 --- a/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h +++ /dev/null @@ -1,220 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_AUTODIFF_VECTOR_H -#define EIGEN_AUTODIFF_VECTOR_H - -namespace Eigen { - -/* \class AutoDiffScalar - * \brief A scalar type replacement with automatic differentation capability - * - * \param DerType the vector type used to store/represent the derivatives (e.g. Vector3f) - * - * This class represents a scalar value while tracking its respective derivatives. - * - * It supports the following list of global math function: - * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, - * - internal::abs, internal::sqrt, numext::pow, internal::exp, internal::log, internal::sin, internal::cos, - * - internal::conj, internal::real, internal::imag, numext::abs2. - * - * AutoDiffScalar can be used as the scalar type of an Eigen::Matrix object. However, - * in that case, the expression template mechanism only occurs at the top Matrix level, - * while derivatives are computed right away. - * - */ -template<typename ValueType, typename JacobianType> -class AutoDiffVector -{ - public: - //typedef typename internal::traits<ValueType>::Scalar Scalar; - typedef typename internal::traits<ValueType>::Scalar BaseScalar; - typedef AutoDiffScalar<Matrix<BaseScalar,JacobianType::RowsAtCompileTime,1> > ActiveScalar; - typedef ActiveScalar Scalar; - typedef AutoDiffScalar<typename JacobianType::ColXpr> CoeffType; - typedef typename JacobianType::Index Index; - - inline AutoDiffVector() {} - - inline AutoDiffVector(const ValueType& values) - : m_values(values) - { - m_jacobian.setZero(); - } - - - CoeffType operator[] (Index i) { return CoeffType(m_values[i], m_jacobian.col(i)); } - const CoeffType operator[] (Index i) const { return CoeffType(m_values[i], m_jacobian.col(i)); } - - CoeffType operator() (Index i) { return CoeffType(m_values[i], m_jacobian.col(i)); } - const CoeffType operator() (Index i) const { return CoeffType(m_values[i], m_jacobian.col(i)); } - - CoeffType coeffRef(Index i) { return CoeffType(m_values[i], m_jacobian.col(i)); } - const CoeffType coeffRef(Index i) const { return CoeffType(m_values[i], m_jacobian.col(i)); } - - Index size() const { return m_values.size(); } - - // FIXME here we could return an expression of the sum - Scalar sum() const { /*std::cerr << "sum \n\n";*/ /*std::cerr << m_jacobian.rowwise().sum() << "\n\n";*/ return Scalar(m_values.sum(), m_jacobian.rowwise().sum()); } - - - inline AutoDiffVector(const ValueType& values, const JacobianType& jac) - : m_values(values), m_jacobian(jac) - {} - - template<typename OtherValueType, typename OtherJacobianType> - inline AutoDiffVector(const AutoDiffVector<OtherValueType, OtherJacobianType>& other) - : m_values(other.values()), m_jacobian(other.jacobian()) - {} - - inline AutoDiffVector(const AutoDiffVector& other) - : m_values(other.values()), m_jacobian(other.jacobian()) - {} - - template<typename OtherValueType, typename OtherJacobianType> - inline AutoDiffVector& operator=(const AutoDiffVector<OtherValueType, OtherJacobianType>& other) - { - m_values = other.values(); - m_jacobian = other.jacobian(); - return *this; - } - - inline AutoDiffVector& operator=(const AutoDiffVector& other) - { - m_values = other.values(); - m_jacobian = other.jacobian(); - return *this; - } - - inline const ValueType& values() const { return m_values; } - inline ValueType& values() { return m_values; } - - inline const JacobianType& jacobian() const { return m_jacobian; } - inline JacobianType& jacobian() { return m_jacobian; } - - template<typename OtherValueType,typename OtherJacobianType> - inline const AutoDiffVector< - typename MakeCwiseBinaryOp<internal::scalar_sum_op<BaseScalar>,ValueType,OtherValueType>::Type, - typename MakeCwiseBinaryOp<internal::scalar_sum_op<BaseScalar>,JacobianType,OtherJacobianType>::Type > - operator+(const AutoDiffVector<OtherValueType,OtherJacobianType>& other) const - { - return AutoDiffVector< - typename MakeCwiseBinaryOp<internal::scalar_sum_op<BaseScalar>,ValueType,OtherValueType>::Type, - typename MakeCwiseBinaryOp<internal::scalar_sum_op<BaseScalar>,JacobianType,OtherJacobianType>::Type >( - m_values + other.values(), - m_jacobian + other.jacobian()); - } - - template<typename OtherValueType, typename OtherJacobianType> - inline AutoDiffVector& - operator+=(const AutoDiffVector<OtherValueType,OtherJacobianType>& other) - { - m_values += other.values(); - m_jacobian += other.jacobian(); - return *this; - } - - template<typename OtherValueType,typename OtherJacobianType> - inline const AutoDiffVector< - typename MakeCwiseBinaryOp<internal::scalar_difference_op<Scalar>,ValueType,OtherValueType>::Type, - typename MakeCwiseBinaryOp<internal::scalar_difference_op<Scalar>,JacobianType,OtherJacobianType>::Type > - operator-(const AutoDiffVector<OtherValueType,OtherJacobianType>& other) const - { - return AutoDiffVector< - typename MakeCwiseBinaryOp<internal::scalar_difference_op<Scalar>,ValueType,OtherValueType>::Type, - typename MakeCwiseBinaryOp<internal::scalar_difference_op<Scalar>,JacobianType,OtherJacobianType>::Type >( - m_values - other.values(), - m_jacobian - other.jacobian()); - } - - template<typename OtherValueType, typename OtherJacobianType> - inline AutoDiffVector& - operator-=(const AutoDiffVector<OtherValueType,OtherJacobianType>& other) - { - m_values -= other.values(); - m_jacobian -= other.jacobian(); - return *this; - } - - inline const AutoDiffVector< - typename MakeCwiseUnaryOp<internal::scalar_opposite_op<Scalar>, ValueType>::Type, - typename MakeCwiseUnaryOp<internal::scalar_opposite_op<Scalar>, JacobianType>::Type > - operator-() const - { - return AutoDiffVector< - typename MakeCwiseUnaryOp<internal::scalar_opposite_op<Scalar>, ValueType>::Type, - typename MakeCwiseUnaryOp<internal::scalar_opposite_op<Scalar>, JacobianType>::Type >( - -m_values, - -m_jacobian); - } - - inline const AutoDiffVector< - typename MakeCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, ValueType>::Type, - typename MakeCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, JacobianType>::Type> - operator*(const BaseScalar& other) const - { - return AutoDiffVector< - typename MakeCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, ValueType>::Type, - typename MakeCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, JacobianType>::Type >( - m_values * other, - m_jacobian * other); - } - - friend inline const AutoDiffVector< - typename MakeCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, ValueType>::Type, - typename MakeCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, JacobianType>::Type > - operator*(const Scalar& other, const AutoDiffVector& v) - { - return AutoDiffVector< - typename MakeCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, ValueType>::Type, - typename MakeCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, JacobianType>::Type >( - v.values() * other, - v.jacobian() * other); - } - -// template<typename OtherValueType,typename OtherJacobianType> -// inline const AutoDiffVector< -// CwiseBinaryOp<internal::scalar_multiple_op<Scalar>, ValueType, OtherValueType> -// CwiseBinaryOp<internal::scalar_sum_op<Scalar>, -// CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, JacobianType>, -// CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, OtherJacobianType> > > -// operator*(const AutoDiffVector<OtherValueType,OtherJacobianType>& other) const -// { -// return AutoDiffVector< -// CwiseBinaryOp<internal::scalar_multiple_op<Scalar>, ValueType, OtherValueType> -// CwiseBinaryOp<internal::scalar_sum_op<Scalar>, -// CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, JacobianType>, -// CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, OtherJacobianType> > >( -// m_values.cwise() * other.values(), -// (m_jacobian * other.values()) + (m_values * other.jacobian())); -// } - - inline AutoDiffVector& operator*=(const Scalar& other) - { - m_values *= other; - m_jacobian *= other; - return *this; - } - - template<typename OtherValueType,typename OtherJacobianType> - inline AutoDiffVector& operator*=(const AutoDiffVector<OtherValueType,OtherJacobianType>& other) - { - *this = *this * other; - return *this; - } - - protected: - ValueType m_values; - JacobianType m_jacobian; - -}; - -} - -#endif // EIGEN_AUTODIFF_VECTOR_H diff --git a/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h b/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h deleted file mode 100644 index 994c8af..0000000 --- a/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h +++ /dev/null @@ -1,293 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Ilya Baran <ibaran@mit.edu> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_BVALGORITHMS_H -#define EIGEN_BVALGORITHMS_H - -namespace Eigen { - -namespace internal { - -#ifndef EIGEN_PARSED_BY_DOXYGEN -template<typename BVH, typename Intersector> -bool intersect_helper(const BVH &tree, Intersector &intersector, typename BVH::Index root) -{ - typedef typename BVH::Index Index; - typedef typename BVH::VolumeIterator VolIter; - typedef typename BVH::ObjectIterator ObjIter; - - VolIter vBegin = VolIter(), vEnd = VolIter(); - ObjIter oBegin = ObjIter(), oEnd = ObjIter(); - - std::vector<Index> todo(1, root); - - while(!todo.empty()) { - tree.getChildren(todo.back(), vBegin, vEnd, oBegin, oEnd); - todo.pop_back(); - - for(; vBegin != vEnd; ++vBegin) //go through child volumes - if(intersector.intersectVolume(tree.getVolume(*vBegin))) - todo.push_back(*vBegin); - - for(; oBegin != oEnd; ++oBegin) //go through child objects - if(intersector.intersectObject(*oBegin)) - return true; //intersector said to stop query - } - return false; -} -#endif //not EIGEN_PARSED_BY_DOXYGEN - -template<typename Volume1, typename Object1, typename Object2, typename Intersector> -struct intersector_helper1 -{ - intersector_helper1(const Object2 &inStored, Intersector &in) : stored(inStored), intersector(in) {} - bool intersectVolume(const Volume1 &vol) { return intersector.intersectVolumeObject(vol, stored); } - bool intersectObject(const Object1 &obj) { return intersector.intersectObjectObject(obj, stored); } - Object2 stored; - Intersector &intersector; -private: - intersector_helper1& operator=(const intersector_helper1&); -}; - -template<typename Volume2, typename Object2, typename Object1, typename Intersector> -struct intersector_helper2 -{ - intersector_helper2(const Object1 &inStored, Intersector &in) : stored(inStored), intersector(in) {} - bool intersectVolume(const Volume2 &vol) { return intersector.intersectObjectVolume(stored, vol); } - bool intersectObject(const Object2 &obj) { return intersector.intersectObjectObject(stored, obj); } - Object1 stored; - Intersector &intersector; -private: - intersector_helper2& operator=(const intersector_helper2&); -}; - -} // end namespace internal - -/** Given a BVH, runs the query encapsulated by \a intersector. - * The Intersector type must provide the following members: \code - bool intersectVolume(const BVH::Volume &volume) //returns true if volume intersects the query - bool intersectObject(const BVH::Object &object) //returns true if the search should terminate immediately - \endcode - */ -template<typename BVH, typename Intersector> -void BVIntersect(const BVH &tree, Intersector &intersector) -{ - internal::intersect_helper(tree, intersector, tree.getRootIndex()); -} - -/** Given two BVH's, runs the query on their Cartesian product encapsulated by \a intersector. - * The Intersector type must provide the following members: \code - bool intersectVolumeVolume(const BVH1::Volume &v1, const BVH2::Volume &v2) //returns true if product of volumes intersects the query - bool intersectVolumeObject(const BVH1::Volume &v1, const BVH2::Object &o2) //returns true if the volume-object product intersects the query - bool intersectObjectVolume(const BVH1::Object &o1, const BVH2::Volume &v2) //returns true if the volume-object product intersects the query - bool intersectObjectObject(const BVH1::Object &o1, const BVH2::Object &o2) //returns true if the search should terminate immediately - \endcode - */ -template<typename BVH1, typename BVH2, typename Intersector> -void BVIntersect(const BVH1 &tree1, const BVH2 &tree2, Intersector &intersector) //TODO: tandem descent when it makes sense -{ - typedef typename BVH1::Index Index1; - typedef typename BVH2::Index Index2; - typedef internal::intersector_helper1<typename BVH1::Volume, typename BVH1::Object, typename BVH2::Object, Intersector> Helper1; - typedef internal::intersector_helper2<typename BVH2::Volume, typename BVH2::Object, typename BVH1::Object, Intersector> Helper2; - typedef typename BVH1::VolumeIterator VolIter1; - typedef typename BVH1::ObjectIterator ObjIter1; - typedef typename BVH2::VolumeIterator VolIter2; - typedef typename BVH2::ObjectIterator ObjIter2; - - VolIter1 vBegin1 = VolIter1(), vEnd1 = VolIter1(); - ObjIter1 oBegin1 = ObjIter1(), oEnd1 = ObjIter1(); - VolIter2 vBegin2 = VolIter2(), vEnd2 = VolIter2(), vCur2 = VolIter2(); - ObjIter2 oBegin2 = ObjIter2(), oEnd2 = ObjIter2(), oCur2 = ObjIter2(); - - std::vector<std::pair<Index1, Index2> > todo(1, std::make_pair(tree1.getRootIndex(), tree2.getRootIndex())); - - while(!todo.empty()) { - tree1.getChildren(todo.back().first, vBegin1, vEnd1, oBegin1, oEnd1); - tree2.getChildren(todo.back().second, vBegin2, vEnd2, oBegin2, oEnd2); - todo.pop_back(); - - for(; vBegin1 != vEnd1; ++vBegin1) { //go through child volumes of first tree - const typename BVH1::Volume &vol1 = tree1.getVolume(*vBegin1); - for(vCur2 = vBegin2; vCur2 != vEnd2; ++vCur2) { //go through child volumes of second tree - if(intersector.intersectVolumeVolume(vol1, tree2.getVolume(*vCur2))) - todo.push_back(std::make_pair(*vBegin1, *vCur2)); - } - - for(oCur2 = oBegin2; oCur2 != oEnd2; ++oCur2) {//go through child objects of second tree - Helper1 helper(*oCur2, intersector); - if(internal::intersect_helper(tree1, helper, *vBegin1)) - return; //intersector said to stop query - } - } - - for(; oBegin1 != oEnd1; ++oBegin1) { //go through child objects of first tree - for(vCur2 = vBegin2; vCur2 != vEnd2; ++vCur2) { //go through child volumes of second tree - Helper2 helper(*oBegin1, intersector); - if(internal::intersect_helper(tree2, helper, *vCur2)) - return; //intersector said to stop query - } - - for(oCur2 = oBegin2; oCur2 != oEnd2; ++oCur2) {//go through child objects of second tree - if(intersector.intersectObjectObject(*oBegin1, *oCur2)) - return; //intersector said to stop query - } - } - } -} - -namespace internal { - -#ifndef EIGEN_PARSED_BY_DOXYGEN -template<typename BVH, typename Minimizer> -typename Minimizer::Scalar minimize_helper(const BVH &tree, Minimizer &minimizer, typename BVH::Index root, typename Minimizer::Scalar minimum) -{ - typedef typename Minimizer::Scalar Scalar; - typedef typename BVH::Index Index; - typedef std::pair<Scalar, Index> QueueElement; //first element is priority - typedef typename BVH::VolumeIterator VolIter; - typedef typename BVH::ObjectIterator ObjIter; - - VolIter vBegin = VolIter(), vEnd = VolIter(); - ObjIter oBegin = ObjIter(), oEnd = ObjIter(); - std::priority_queue<QueueElement, std::vector<QueueElement>, std::greater<QueueElement> > todo; //smallest is at the top - - todo.push(std::make_pair(Scalar(), root)); - - while(!todo.empty()) { - tree.getChildren(todo.top().second, vBegin, vEnd, oBegin, oEnd); - todo.pop(); - - for(; oBegin != oEnd; ++oBegin) //go through child objects - minimum = (std::min)(minimum, minimizer.minimumOnObject(*oBegin)); - - for(; vBegin != vEnd; ++vBegin) { //go through child volumes - Scalar val = minimizer.minimumOnVolume(tree.getVolume(*vBegin)); - if(val < minimum) - todo.push(std::make_pair(val, *vBegin)); - } - } - - return minimum; -} -#endif //not EIGEN_PARSED_BY_DOXYGEN - - -template<typename Volume1, typename Object1, typename Object2, typename Minimizer> -struct minimizer_helper1 -{ - typedef typename Minimizer::Scalar Scalar; - minimizer_helper1(const Object2 &inStored, Minimizer &m) : stored(inStored), minimizer(m) {} - Scalar minimumOnVolume(const Volume1 &vol) { return minimizer.minimumOnVolumeObject(vol, stored); } - Scalar minimumOnObject(const Object1 &obj) { return minimizer.minimumOnObjectObject(obj, stored); } - Object2 stored; - Minimizer &minimizer; -private: - minimizer_helper1& operator=(const minimizer_helper1&); -}; - -template<typename Volume2, typename Object2, typename Object1, typename Minimizer> -struct minimizer_helper2 -{ - typedef typename Minimizer::Scalar Scalar; - minimizer_helper2(const Object1 &inStored, Minimizer &m) : stored(inStored), minimizer(m) {} - Scalar minimumOnVolume(const Volume2 &vol) { return minimizer.minimumOnObjectVolume(stored, vol); } - Scalar minimumOnObject(const Object2 &obj) { return minimizer.minimumOnObjectObject(stored, obj); } - Object1 stored; - Minimizer &minimizer; -private: - minimizer_helper2& operator=(const minimizer_helper2&); -}; - -} // end namespace internal - -/** Given a BVH, runs the query encapsulated by \a minimizer. - * \returns the minimum value. - * The Minimizer type must provide the following members: \code - typedef Scalar //the numeric type of what is being minimized--not necessarily the Scalar type of the BVH (if it has one) - Scalar minimumOnVolume(const BVH::Volume &volume) - Scalar minimumOnObject(const BVH::Object &object) - \endcode - */ -template<typename BVH, typename Minimizer> -typename Minimizer::Scalar BVMinimize(const BVH &tree, Minimizer &minimizer) -{ - return internal::minimize_helper(tree, minimizer, tree.getRootIndex(), (std::numeric_limits<typename Minimizer::Scalar>::max)()); -} - -/** Given two BVH's, runs the query on their cartesian product encapsulated by \a minimizer. - * \returns the minimum value. - * The Minimizer type must provide the following members: \code - typedef Scalar //the numeric type of what is being minimized--not necessarily the Scalar type of the BVH (if it has one) - Scalar minimumOnVolumeVolume(const BVH1::Volume &v1, const BVH2::Volume &v2) - Scalar minimumOnVolumeObject(const BVH1::Volume &v1, const BVH2::Object &o2) - Scalar minimumOnObjectVolume(const BVH1::Object &o1, const BVH2::Volume &v2) - Scalar minimumOnObjectObject(const BVH1::Object &o1, const BVH2::Object &o2) - \endcode - */ -template<typename BVH1, typename BVH2, typename Minimizer> -typename Minimizer::Scalar BVMinimize(const BVH1 &tree1, const BVH2 &tree2, Minimizer &minimizer) -{ - typedef typename Minimizer::Scalar Scalar; - typedef typename BVH1::Index Index1; - typedef typename BVH2::Index Index2; - typedef internal::minimizer_helper1<typename BVH1::Volume, typename BVH1::Object, typename BVH2::Object, Minimizer> Helper1; - typedef internal::minimizer_helper2<typename BVH2::Volume, typename BVH2::Object, typename BVH1::Object, Minimizer> Helper2; - typedef std::pair<Scalar, std::pair<Index1, Index2> > QueueElement; //first element is priority - typedef typename BVH1::VolumeIterator VolIter1; - typedef typename BVH1::ObjectIterator ObjIter1; - typedef typename BVH2::VolumeIterator VolIter2; - typedef typename BVH2::ObjectIterator ObjIter2; - - VolIter1 vBegin1 = VolIter1(), vEnd1 = VolIter1(); - ObjIter1 oBegin1 = ObjIter1(), oEnd1 = ObjIter1(); - VolIter2 vBegin2 = VolIter2(), vEnd2 = VolIter2(), vCur2 = VolIter2(); - ObjIter2 oBegin2 = ObjIter2(), oEnd2 = ObjIter2(), oCur2 = ObjIter2(); - std::priority_queue<QueueElement, std::vector<QueueElement>, std::greater<QueueElement> > todo; //smallest is at the top - - Scalar minimum = (std::numeric_limits<Scalar>::max)(); - todo.push(std::make_pair(Scalar(), std::make_pair(tree1.getRootIndex(), tree2.getRootIndex()))); - - while(!todo.empty()) { - tree1.getChildren(todo.top().second.first, vBegin1, vEnd1, oBegin1, oEnd1); - tree2.getChildren(todo.top().second.second, vBegin2, vEnd2, oBegin2, oEnd2); - todo.pop(); - - for(; oBegin1 != oEnd1; ++oBegin1) { //go through child objects of first tree - for(oCur2 = oBegin2; oCur2 != oEnd2; ++oCur2) {//go through child objects of second tree - minimum = (std::min)(minimum, minimizer.minimumOnObjectObject(*oBegin1, *oCur2)); - } - - for(vCur2 = vBegin2; vCur2 != vEnd2; ++vCur2) { //go through child volumes of second tree - Helper2 helper(*oBegin1, minimizer); - minimum = (std::min)(minimum, internal::minimize_helper(tree2, helper, *vCur2, minimum)); - } - } - - for(; vBegin1 != vEnd1; ++vBegin1) { //go through child volumes of first tree - const typename BVH1::Volume &vol1 = tree1.getVolume(*vBegin1); - - for(oCur2 = oBegin2; oCur2 != oEnd2; ++oCur2) {//go through child objects of second tree - Helper1 helper(*oCur2, minimizer); - minimum = (std::min)(minimum, internal::minimize_helper(tree1, helper, *vBegin1, minimum)); - } - - for(vCur2 = vBegin2; vCur2 != vEnd2; ++vCur2) { //go through child volumes of second tree - Scalar val = minimizer.minimumOnVolumeVolume(vol1, tree2.getVolume(*vCur2)); - if(val < minimum) - todo.push(std::make_pair(val, std::make_pair(*vBegin1, *vCur2))); - } - } - } - return minimum; -} - -} // end namespace Eigen - -#endif // EIGEN_BVALGORITHMS_H diff --git a/eigen/unsupported/Eigen/src/BVH/KdBVH.h b/eigen/unsupported/Eigen/src/BVH/KdBVH.h deleted file mode 100644 index 5e39af2..0000000 --- a/eigen/unsupported/Eigen/src/BVH/KdBVH.h +++ /dev/null @@ -1,223 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Ilya Baran <ibaran@mit.edu> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef KDBVH_H_INCLUDED -#define KDBVH_H_INCLUDED - -namespace Eigen { - -namespace internal { - -//internal pair class for the BVH--used instead of std::pair because of alignment -template<typename Scalar, int Dim> -struct vector_int_pair -{ -EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar, Dim) - typedef Matrix<Scalar, Dim, 1> VectorType; - - vector_int_pair(const VectorType &v, int i) : first(v), second(i) {} - - VectorType first; - int second; -}; - -//these templates help the tree initializer get the bounding boxes either from a provided -//iterator range or using bounding_box in a unified way -template<typename ObjectList, typename VolumeList, typename BoxIter> -struct get_boxes_helper { - void operator()(const ObjectList &objects, BoxIter boxBegin, BoxIter boxEnd, VolumeList &outBoxes) - { - outBoxes.insert(outBoxes.end(), boxBegin, boxEnd); - eigen_assert(outBoxes.size() == objects.size()); - EIGEN_ONLY_USED_FOR_DEBUG(objects); - } -}; - -template<typename ObjectList, typename VolumeList> -struct get_boxes_helper<ObjectList, VolumeList, int> { - void operator()(const ObjectList &objects, int, int, VolumeList &outBoxes) - { - outBoxes.reserve(objects.size()); - for(int i = 0; i < (int)objects.size(); ++i) - outBoxes.push_back(bounding_box(objects[i])); - } -}; - -} // end namespace internal - - -/** \class KdBVH - * \brief A simple bounding volume hierarchy based on AlignedBox - * - * \param _Scalar The underlying scalar type of the bounding boxes - * \param _Dim The dimension of the space in which the hierarchy lives - * \param _Object The object type that lives in the hierarchy. It must have value semantics. Either bounding_box(_Object) must - * be defined and return an AlignedBox<_Scalar, _Dim> or bounding boxes must be provided to the tree initializer. - * - * This class provides a simple (as opposed to optimized) implementation of a bounding volume hierarchy analogous to a Kd-tree. - * Given a sequence of objects, it computes their bounding boxes, constructs a Kd-tree of their centers - * and builds a BVH with the structure of that Kd-tree. When the elements of the tree are too expensive to be copied around, - * it is useful for _Object to be a pointer. - */ -template<typename _Scalar, int _Dim, typename _Object> class KdBVH -{ -public: - enum { Dim = _Dim }; - typedef _Object Object; - typedef std::vector<Object, aligned_allocator<Object> > ObjectList; - typedef _Scalar Scalar; - typedef AlignedBox<Scalar, Dim> Volume; - typedef std::vector<Volume, aligned_allocator<Volume> > VolumeList; - typedef int Index; - typedef const int *VolumeIterator; //the iterators are just pointers into the tree's vectors - typedef const Object *ObjectIterator; - - KdBVH() {} - - /** Given an iterator range over \a Object references, constructs the BVH. Requires that bounding_box(Object) return a Volume. */ - template<typename Iter> KdBVH(Iter begin, Iter end) { init(begin, end, 0, 0); } //int is recognized by init as not being an iterator type - - /** Given an iterator range over \a Object references and an iterator range over their bounding boxes, constructs the BVH */ - template<typename OIter, typename BIter> KdBVH(OIter begin, OIter end, BIter boxBegin, BIter boxEnd) { init(begin, end, boxBegin, boxEnd); } - - /** Given an iterator range over \a Object references, constructs the BVH, overwriting whatever is in there currently. - * Requires that bounding_box(Object) return a Volume. */ - template<typename Iter> void init(Iter begin, Iter end) { init(begin, end, 0, 0); } - - /** Given an iterator range over \a Object references and an iterator range over their bounding boxes, - * constructs the BVH, overwriting whatever is in there currently. */ - template<typename OIter, typename BIter> void init(OIter begin, OIter end, BIter boxBegin, BIter boxEnd) - { - objects.clear(); - boxes.clear(); - children.clear(); - - objects.insert(objects.end(), begin, end); - int n = static_cast<int>(objects.size()); - - if(n < 2) - return; //if we have at most one object, we don't need any internal nodes - - VolumeList objBoxes; - VIPairList objCenters; - - //compute the bounding boxes depending on BIter type - internal::get_boxes_helper<ObjectList, VolumeList, BIter>()(objects, boxBegin, boxEnd, objBoxes); - - objCenters.reserve(n); - boxes.reserve(n - 1); - children.reserve(2 * n - 2); - - for(int i = 0; i < n; ++i) - objCenters.push_back(VIPair(objBoxes[i].center(), i)); - - build(objCenters, 0, n, objBoxes, 0); //the recursive part of the algorithm - - ObjectList tmp(n); - tmp.swap(objects); - for(int i = 0; i < n; ++i) - objects[i] = tmp[objCenters[i].second]; - } - - /** \returns the index of the root of the hierarchy */ - inline Index getRootIndex() const { return (int)boxes.size() - 1; } - - /** Given an \a index of a node, on exit, \a outVBegin and \a outVEnd range over the indices of the volume children of the node - * and \a outOBegin and \a outOEnd range over the object children of the node */ - EIGEN_STRONG_INLINE void getChildren(Index index, VolumeIterator &outVBegin, VolumeIterator &outVEnd, - ObjectIterator &outOBegin, ObjectIterator &outOEnd) const - { //inlining this function should open lots of optimization opportunities to the compiler - if(index < 0) { - outVBegin = outVEnd; - if(!objects.empty()) - outOBegin = &(objects[0]); - outOEnd = outOBegin + objects.size(); //output all objects--necessary when the tree has only one object - return; - } - - int numBoxes = static_cast<int>(boxes.size()); - - int idx = index * 2; - if(children[idx + 1] < numBoxes) { //second index is always bigger - outVBegin = &(children[idx]); - outVEnd = outVBegin + 2; - outOBegin = outOEnd; - } - else if(children[idx] >= numBoxes) { //if both children are objects - outVBegin = outVEnd; - outOBegin = &(objects[children[idx] - numBoxes]); - outOEnd = outOBegin + 2; - } else { //if the first child is a volume and the second is an object - outVBegin = &(children[idx]); - outVEnd = outVBegin + 1; - outOBegin = &(objects[children[idx + 1] - numBoxes]); - outOEnd = outOBegin + 1; - } - } - - /** \returns the bounding box of the node at \a index */ - inline const Volume &getVolume(Index index) const - { - return boxes[index]; - } - -private: - typedef internal::vector_int_pair<Scalar, Dim> VIPair; - typedef std::vector<VIPair, aligned_allocator<VIPair> > VIPairList; - typedef Matrix<Scalar, Dim, 1> VectorType; - struct VectorComparator //compares vectors, or, more specificall, VIPairs along a particular dimension - { - VectorComparator(int inDim) : dim(inDim) {} - inline bool operator()(const VIPair &v1, const VIPair &v2) const { return v1.first[dim] < v2.first[dim]; } - int dim; - }; - - //Build the part of the tree between objects[from] and objects[to] (not including objects[to]). - //This routine partitions the objCenters in [from, to) along the dimension dim, recursively constructs - //the two halves, and adds their parent node. TODO: a cache-friendlier layout - void build(VIPairList &objCenters, int from, int to, const VolumeList &objBoxes, int dim) - { - eigen_assert(to - from > 1); - if(to - from == 2) { - boxes.push_back(objBoxes[objCenters[from].second].merged(objBoxes[objCenters[from + 1].second])); - children.push_back(from + (int)objects.size() - 1); //there are objects.size() - 1 tree nodes - children.push_back(from + (int)objects.size()); - } - else if(to - from == 3) { - int mid = from + 2; - std::nth_element(objCenters.begin() + from, objCenters.begin() + mid, - objCenters.begin() + to, VectorComparator(dim)); //partition - build(objCenters, from, mid, objBoxes, (dim + 1) % Dim); - int idx1 = (int)boxes.size() - 1; - boxes.push_back(boxes[idx1].merged(objBoxes[objCenters[mid].second])); - children.push_back(idx1); - children.push_back(mid + (int)objects.size() - 1); - } - else { - int mid = from + (to - from) / 2; - nth_element(objCenters.begin() + from, objCenters.begin() + mid, - objCenters.begin() + to, VectorComparator(dim)); //partition - build(objCenters, from, mid, objBoxes, (dim + 1) % Dim); - int idx1 = (int)boxes.size() - 1; - build(objCenters, mid, to, objBoxes, (dim + 1) % Dim); - int idx2 = (int)boxes.size() - 1; - boxes.push_back(boxes[idx1].merged(boxes[idx2])); - children.push_back(idx1); - children.push_back(idx2); - } - } - - std::vector<int> children; //children of x are children[2x] and children[2x+1], indices bigger than boxes.size() index into objects. - VolumeList boxes; - ObjectList objects; -}; - -} // end namespace Eigen - -#endif //KDBVH_H_INCLUDED diff --git a/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h b/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h deleted file mode 100644 index 866a8a4..0000000 --- a/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +++ /dev/null @@ -1,805 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 David Harmon <dharmon@gmail.com> -// -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see <http://www.gnu.org/licenses/>. - -#ifndef EIGEN_ARPACKGENERALIZEDSELFADJOINTEIGENSOLVER_H -#define EIGEN_ARPACKGENERALIZEDSELFADJOINTEIGENSOLVER_H - -#include <Eigen/Dense> - -namespace Eigen { - -namespace internal { - template<typename Scalar, typename RealScalar> struct arpack_wrapper; - template<typename MatrixSolver, typename MatrixType, typename Scalar, bool BisSPD> struct OP; -} - - - -template<typename MatrixType, typename MatrixSolver=SimplicialLLT<MatrixType>, bool BisSPD=false> -class ArpackGeneralizedSelfAdjointEigenSolver -{ -public: - //typedef typename MatrixSolver::MatrixType MatrixType; - - /** \brief Scalar type for matrices of type \p MatrixType. */ - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - - /** \brief Real scalar type for \p MatrixType. - * - * This is just \c Scalar if #Scalar is real (e.g., \c float or - * \c Scalar), and the type of the real part of \c Scalar if #Scalar is - * complex. - */ - typedef typename NumTraits<Scalar>::Real RealScalar; - - /** \brief Type for vector of eigenvalues as returned by eigenvalues(). - * - * This is a column vector with entries of type #RealScalar. - * The length of the vector is the size of \p nbrEigenvalues. - */ - typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVectorType; - - /** \brief Default constructor. - * - * The default constructor is for cases in which the user intends to - * perform decompositions via compute(). - * - */ - ArpackGeneralizedSelfAdjointEigenSolver() - : m_eivec(), - m_eivalues(), - m_isInitialized(false), - m_eigenvectorsOk(false), - m_nbrConverged(0), - m_nbrIterations(0) - { } - - /** \brief Constructor; computes generalized eigenvalues of given matrix with respect to another matrix. - * - * \param[in] A Self-adjoint matrix whose eigenvalues / eigenvectors will - * computed. By default, the upper triangular part is used, but can be changed - * through the template parameter. - * \param[in] B Self-adjoint matrix for the generalized eigenvalue problem. - * \param[in] nbrEigenvalues The number of eigenvalues / eigenvectors to compute. - * Must be less than the size of the input matrix, or an error is returned. - * \param[in] eigs_sigma String containing either "LM", "SM", "LA", or "SA", with - * respective meanings to find the largest magnitude , smallest magnitude, - * largest algebraic, or smallest algebraic eigenvalues. Alternatively, this - * value can contain floating point value in string form, in which case the - * eigenvalues closest to this value will be found. - * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly. - * \param[in] tol What tolerance to find the eigenvalues to. Default is 0, which - * means machine precision. - * - * This constructor calls compute(const MatrixType&, const MatrixType&, Index, string, int, RealScalar) - * to compute the eigenvalues of the matrix \p A with respect to \p B. The eigenvectors are computed if - * \p options equals #ComputeEigenvectors. - * - */ - ArpackGeneralizedSelfAdjointEigenSolver(const MatrixType& A, const MatrixType& B, - Index nbrEigenvalues, std::string eigs_sigma="LM", - int options=ComputeEigenvectors, RealScalar tol=0.0) - : m_eivec(), - m_eivalues(), - m_isInitialized(false), - m_eigenvectorsOk(false), - m_nbrConverged(0), - m_nbrIterations(0) - { - compute(A, B, nbrEigenvalues, eigs_sigma, options, tol); - } - - /** \brief Constructor; computes eigenvalues of given matrix. - * - * \param[in] A Self-adjoint matrix whose eigenvalues / eigenvectors will - * computed. By default, the upper triangular part is used, but can be changed - * through the template parameter. - * \param[in] nbrEigenvalues The number of eigenvalues / eigenvectors to compute. - * Must be less than the size of the input matrix, or an error is returned. - * \param[in] eigs_sigma String containing either "LM", "SM", "LA", or "SA", with - * respective meanings to find the largest magnitude , smallest magnitude, - * largest algebraic, or smallest algebraic eigenvalues. Alternatively, this - * value can contain floating point value in string form, in which case the - * eigenvalues closest to this value will be found. - * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly. - * \param[in] tol What tolerance to find the eigenvalues to. Default is 0, which - * means machine precision. - * - * This constructor calls compute(const MatrixType&, Index, string, int, RealScalar) - * to compute the eigenvalues of the matrix \p A. The eigenvectors are computed if - * \p options equals #ComputeEigenvectors. - * - */ - - ArpackGeneralizedSelfAdjointEigenSolver(const MatrixType& A, - Index nbrEigenvalues, std::string eigs_sigma="LM", - int options=ComputeEigenvectors, RealScalar tol=0.0) - : m_eivec(), - m_eivalues(), - m_isInitialized(false), - m_eigenvectorsOk(false), - m_nbrConverged(0), - m_nbrIterations(0) - { - compute(A, nbrEigenvalues, eigs_sigma, options, tol); - } - - - /** \brief Computes generalized eigenvalues / eigenvectors of given matrix using the external ARPACK library. - * - * \param[in] A Selfadjoint matrix whose eigendecomposition is to be computed. - * \param[in] B Selfadjoint matrix for generalized eigenvalues. - * \param[in] nbrEigenvalues The number of eigenvalues / eigenvectors to compute. - * Must be less than the size of the input matrix, or an error is returned. - * \param[in] eigs_sigma String containing either "LM", "SM", "LA", or "SA", with - * respective meanings to find the largest magnitude , smallest magnitude, - * largest algebraic, or smallest algebraic eigenvalues. Alternatively, this - * value can contain floating point value in string form, in which case the - * eigenvalues closest to this value will be found. - * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly. - * \param[in] tol What tolerance to find the eigenvalues to. Default is 0, which - * means machine precision. - * - * \returns Reference to \c *this - * - * This function computes the generalized eigenvalues of \p A with respect to \p B using ARPACK. The eigenvalues() - * function can be used to retrieve them. If \p options equals #ComputeEigenvectors, - * then the eigenvectors are also computed and can be retrieved by - * calling eigenvectors(). - * - */ - ArpackGeneralizedSelfAdjointEigenSolver& compute(const MatrixType& A, const MatrixType& B, - Index nbrEigenvalues, std::string eigs_sigma="LM", - int options=ComputeEigenvectors, RealScalar tol=0.0); - - /** \brief Computes eigenvalues / eigenvectors of given matrix using the external ARPACK library. - * - * \param[in] A Selfadjoint matrix whose eigendecomposition is to be computed. - * \param[in] nbrEigenvalues The number of eigenvalues / eigenvectors to compute. - * Must be less than the size of the input matrix, or an error is returned. - * \param[in] eigs_sigma String containing either "LM", "SM", "LA", or "SA", with - * respective meanings to find the largest magnitude , smallest magnitude, - * largest algebraic, or smallest algebraic eigenvalues. Alternatively, this - * value can contain floating point value in string form, in which case the - * eigenvalues closest to this value will be found. - * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly. - * \param[in] tol What tolerance to find the eigenvalues to. Default is 0, which - * means machine precision. - * - * \returns Reference to \c *this - * - * This function computes the eigenvalues of \p A using ARPACK. The eigenvalues() - * function can be used to retrieve them. If \p options equals #ComputeEigenvectors, - * then the eigenvectors are also computed and can be retrieved by - * calling eigenvectors(). - * - */ - ArpackGeneralizedSelfAdjointEigenSolver& compute(const MatrixType& A, - Index nbrEigenvalues, std::string eigs_sigma="LM", - int options=ComputeEigenvectors, RealScalar tol=0.0); - - - /** \brief Returns the eigenvectors of given matrix. - * - * \returns A const reference to the matrix whose columns are the eigenvectors. - * - * \pre The eigenvectors have been computed before. - * - * Column \f$ k \f$ of the returned matrix is an eigenvector corresponding - * to eigenvalue number \f$ k \f$ as returned by eigenvalues(). The - * eigenvectors are normalized to have (Euclidean) norm equal to one. If - * this object was used to solve the eigenproblem for the selfadjoint - * matrix \f$ A \f$, then the matrix returned by this function is the - * matrix \f$ V \f$ in the eigendecomposition \f$ A V = D V \f$. - * For the generalized eigenproblem, the matrix returned is the solution \f$ A V = D B V \f$ - * - * Example: \include SelfAdjointEigenSolver_eigenvectors.cpp - * Output: \verbinclude SelfAdjointEigenSolver_eigenvectors.out - * - * \sa eigenvalues() - */ - const Matrix<Scalar, Dynamic, Dynamic>& eigenvectors() const - { - eigen_assert(m_isInitialized && "ArpackGeneralizedSelfAdjointEigenSolver is not initialized."); - eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues."); - return m_eivec; - } - - /** \brief Returns the eigenvalues of given matrix. - * - * \returns A const reference to the column vector containing the eigenvalues. - * - * \pre The eigenvalues have been computed before. - * - * The eigenvalues are repeated according to their algebraic multiplicity, - * so there are as many eigenvalues as rows in the matrix. The eigenvalues - * are sorted in increasing order. - * - * Example: \include SelfAdjointEigenSolver_eigenvalues.cpp - * Output: \verbinclude SelfAdjointEigenSolver_eigenvalues.out - * - * \sa eigenvectors(), MatrixBase::eigenvalues() - */ - const Matrix<Scalar, Dynamic, 1>& eigenvalues() const - { - eigen_assert(m_isInitialized && "ArpackGeneralizedSelfAdjointEigenSolver is not initialized."); - return m_eivalues; - } - - /** \brief Computes the positive-definite square root of the matrix. - * - * \returns the positive-definite square root of the matrix - * - * \pre The eigenvalues and eigenvectors of a positive-definite matrix - * have been computed before. - * - * The square root of a positive-definite matrix \f$ A \f$ is the - * positive-definite matrix whose square equals \f$ A \f$. This function - * uses the eigendecomposition \f$ A = V D V^{-1} \f$ to compute the - * square root as \f$ A^{1/2} = V D^{1/2} V^{-1} \f$. - * - * Example: \include SelfAdjointEigenSolver_operatorSqrt.cpp - * Output: \verbinclude SelfAdjointEigenSolver_operatorSqrt.out - * - * \sa operatorInverseSqrt(), - * \ref MatrixFunctions_Module "MatrixFunctions Module" - */ - Matrix<Scalar, Dynamic, Dynamic> operatorSqrt() const - { - eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized."); - eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues."); - return m_eivec * m_eivalues.cwiseSqrt().asDiagonal() * m_eivec.adjoint(); - } - - /** \brief Computes the inverse square root of the matrix. - * - * \returns the inverse positive-definite square root of the matrix - * - * \pre The eigenvalues and eigenvectors of a positive-definite matrix - * have been computed before. - * - * This function uses the eigendecomposition \f$ A = V D V^{-1} \f$ to - * compute the inverse square root as \f$ V D^{-1/2} V^{-1} \f$. This is - * cheaper than first computing the square root with operatorSqrt() and - * then its inverse with MatrixBase::inverse(). - * - * Example: \include SelfAdjointEigenSolver_operatorInverseSqrt.cpp - * Output: \verbinclude SelfAdjointEigenSolver_operatorInverseSqrt.out - * - * \sa operatorSqrt(), MatrixBase::inverse(), - * \ref MatrixFunctions_Module "MatrixFunctions Module" - */ - Matrix<Scalar, Dynamic, Dynamic> operatorInverseSqrt() const - { - eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized."); - eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues."); - return m_eivec * m_eivalues.cwiseInverse().cwiseSqrt().asDiagonal() * m_eivec.adjoint(); - } - - /** \brief Reports whether previous computation was successful. - * - * \returns \c Success if computation was succesful, \c NoConvergence otherwise. - */ - ComputationInfo info() const - { - eigen_assert(m_isInitialized && "ArpackGeneralizedSelfAdjointEigenSolver is not initialized."); - return m_info; - } - - size_t getNbrConvergedEigenValues() const - { return m_nbrConverged; } - - size_t getNbrIterations() const - { return m_nbrIterations; } - -protected: - Matrix<Scalar, Dynamic, Dynamic> m_eivec; - Matrix<Scalar, Dynamic, 1> m_eivalues; - ComputationInfo m_info; - bool m_isInitialized; - bool m_eigenvectorsOk; - - size_t m_nbrConverged; - size_t m_nbrIterations; -}; - - - - - -template<typename MatrixType, typename MatrixSolver, bool BisSPD> -ArpackGeneralizedSelfAdjointEigenSolver<MatrixType, MatrixSolver, BisSPD>& - ArpackGeneralizedSelfAdjointEigenSolver<MatrixType, MatrixSolver, BisSPD> -::compute(const MatrixType& A, Index nbrEigenvalues, - std::string eigs_sigma, int options, RealScalar tol) -{ - MatrixType B(0,0); - compute(A, B, nbrEigenvalues, eigs_sigma, options, tol); - - return *this; -} - - -template<typename MatrixType, typename MatrixSolver, bool BisSPD> -ArpackGeneralizedSelfAdjointEigenSolver<MatrixType, MatrixSolver, BisSPD>& - ArpackGeneralizedSelfAdjointEigenSolver<MatrixType, MatrixSolver, BisSPD> -::compute(const MatrixType& A, const MatrixType& B, Index nbrEigenvalues, - std::string eigs_sigma, int options, RealScalar tol) -{ - eigen_assert(A.cols() == A.rows()); - eigen_assert(B.cols() == B.rows()); - eigen_assert(B.rows() == 0 || A.cols() == B.rows()); - eigen_assert((options &~ (EigVecMask | GenEigMask)) == 0 - && (options & EigVecMask) != EigVecMask - && "invalid option parameter"); - - bool isBempty = (B.rows() == 0) || (B.cols() == 0); - - // For clarity, all parameters match their ARPACK name - // - // Always 0 on the first call - // - int ido = 0; - - int n = (int)A.cols(); - - // User options: "LA", "SA", "SM", "LM", "BE" - // - char whch[3] = "LM"; - - // Specifies the shift if iparam[6] = { 3, 4, 5 }, not used if iparam[6] = { 1, 2 } - // - RealScalar sigma = 0.0; - - if (eigs_sigma.length() >= 2 && isalpha(eigs_sigma[0]) && isalpha(eigs_sigma[1])) - { - eigs_sigma[0] = toupper(eigs_sigma[0]); - eigs_sigma[1] = toupper(eigs_sigma[1]); - - // In the following special case we're going to invert the problem, since solving - // for larger magnitude is much much faster - // i.e., if 'SM' is specified, we're going to really use 'LM', the default - // - if (eigs_sigma.substr(0,2) != "SM") - { - whch[0] = eigs_sigma[0]; - whch[1] = eigs_sigma[1]; - } - } - else - { - eigen_assert(false && "Specifying clustered eigenvalues is not yet supported!"); - - // If it's not scalar values, then the user may be explicitly - // specifying the sigma value to cluster the evs around - // - sigma = atof(eigs_sigma.c_str()); - - // If atof fails, it returns 0.0, which is a fine default - // - } - - // "I" means normal eigenvalue problem, "G" means generalized - // - char bmat[2] = "I"; - if (eigs_sigma.substr(0,2) == "SM" || !(isalpha(eigs_sigma[0]) && isalpha(eigs_sigma[1])) || (!isBempty && !BisSPD)) - bmat[0] = 'G'; - - // Now we determine the mode to use - // - int mode = (bmat[0] == 'G') + 1; - if (eigs_sigma.substr(0,2) == "SM" || !(isalpha(eigs_sigma[0]) && isalpha(eigs_sigma[1]))) - { - // We're going to use shift-and-invert mode, and basically find - // the largest eigenvalues of the inverse operator - // - mode = 3; - } - - // The user-specified number of eigenvalues/vectors to compute - // - int nev = (int)nbrEigenvalues; - - // Allocate space for ARPACK to store the residual - // - Scalar *resid = new Scalar[n]; - - // Number of Lanczos vectors, must satisfy nev < ncv <= n - // Note that this indicates that nev != n, and we cannot compute - // all eigenvalues of a mtrix - // - int ncv = std::min(std::max(2*nev, 20), n); - - // The working n x ncv matrix, also store the final eigenvectors (if computed) - // - Scalar *v = new Scalar[n*ncv]; - int ldv = n; - - // Working space - // - Scalar *workd = new Scalar[3*n]; - int lworkl = ncv*ncv+8*ncv; // Must be at least this length - Scalar *workl = new Scalar[lworkl]; - - int *iparam= new int[11]; - iparam[0] = 1; // 1 means we let ARPACK perform the shifts, 0 means we'd have to do it - iparam[2] = std::max(300, (int)std::ceil(2*n/std::max(ncv,1))); - iparam[6] = mode; // The mode, 1 is standard ev problem, 2 for generalized ev, 3 for shift-and-invert - - // Used during reverse communicate to notify where arrays start - // - int *ipntr = new int[11]; - - // Error codes are returned in here, initial value of 0 indicates a random initial - // residual vector is used, any other values means resid contains the initial residual - // vector, possibly from a previous run - // - int info = 0; - - Scalar scale = 1.0; - //if (!isBempty) - //{ - //Scalar scale = B.norm() / std::sqrt(n); - //scale = std::pow(2, std::floor(std::log(scale+1))); - ////M /= scale; - //for (size_t i=0; i<(size_t)B.outerSize(); i++) - // for (typename MatrixType::InnerIterator it(B, i); it; ++it) - // it.valueRef() /= scale; - //} - - MatrixSolver OP; - if (mode == 1 || mode == 2) - { - if (!isBempty) - OP.compute(B); - } - else if (mode == 3) - { - if (sigma == 0.0) - { - OP.compute(A); - } - else - { - // Note: We will never enter here because sigma must be 0.0 - // - if (isBempty) - { - MatrixType AminusSigmaB(A); - for (Index i=0; i<A.rows(); ++i) - AminusSigmaB.coeffRef(i,i) -= sigma; - - OP.compute(AminusSigmaB); - } - else - { - MatrixType AminusSigmaB = A - sigma * B; - OP.compute(AminusSigmaB); - } - } - } - - if (!(mode == 1 && isBempty) && !(mode == 2 && isBempty) && OP.info() != Success) - std::cout << "Error factoring matrix" << std::endl; - - do - { - internal::arpack_wrapper<Scalar, RealScalar>::saupd(&ido, bmat, &n, whch, &nev, &tol, resid, - &ncv, v, &ldv, iparam, ipntr, workd, workl, - &lworkl, &info); - - if (ido == -1 || ido == 1) - { - Scalar *in = workd + ipntr[0] - 1; - Scalar *out = workd + ipntr[1] - 1; - - if (ido == 1 && mode != 2) - { - Scalar *out2 = workd + ipntr[2] - 1; - if (isBempty || mode == 1) - Matrix<Scalar, Dynamic, 1>::Map(out2, n) = Matrix<Scalar, Dynamic, 1>::Map(in, n); - else - Matrix<Scalar, Dynamic, 1>::Map(out2, n) = B * Matrix<Scalar, Dynamic, 1>::Map(in, n); - - in = workd + ipntr[2] - 1; - } - - if (mode == 1) - { - if (isBempty) - { - // OP = A - // - Matrix<Scalar, Dynamic, 1>::Map(out, n) = A * Matrix<Scalar, Dynamic, 1>::Map(in, n); - } - else - { - // OP = L^{-1}AL^{-T} - // - internal::OP<MatrixSolver, MatrixType, Scalar, BisSPD>::applyOP(OP, A, n, in, out); - } - } - else if (mode == 2) - { - if (ido == 1) - Matrix<Scalar, Dynamic, 1>::Map(in, n) = A * Matrix<Scalar, Dynamic, 1>::Map(in, n); - - // OP = B^{-1} A - // - Matrix<Scalar, Dynamic, 1>::Map(out, n) = OP.solve(Matrix<Scalar, Dynamic, 1>::Map(in, n)); - } - else if (mode == 3) - { - // OP = (A-\sigmaB)B (\sigma could be 0, and B could be I) - // The B * in is already computed and stored at in if ido == 1 - // - if (ido == 1 || isBempty) - Matrix<Scalar, Dynamic, 1>::Map(out, n) = OP.solve(Matrix<Scalar, Dynamic, 1>::Map(in, n)); - else - Matrix<Scalar, Dynamic, 1>::Map(out, n) = OP.solve(B * Matrix<Scalar, Dynamic, 1>::Map(in, n)); - } - } - else if (ido == 2) - { - Scalar *in = workd + ipntr[0] - 1; - Scalar *out = workd + ipntr[1] - 1; - - if (isBempty || mode == 1) - Matrix<Scalar, Dynamic, 1>::Map(out, n) = Matrix<Scalar, Dynamic, 1>::Map(in, n); - else - Matrix<Scalar, Dynamic, 1>::Map(out, n) = B * Matrix<Scalar, Dynamic, 1>::Map(in, n); - } - } while (ido != 99); - - if (info == 1) - m_info = NoConvergence; - else if (info == 3) - m_info = NumericalIssue; - else if (info < 0) - m_info = InvalidInput; - else if (info != 0) - eigen_assert(false && "Unknown ARPACK return value!"); - else - { - // Do we compute eigenvectors or not? - // - int rvec = (options & ComputeEigenvectors) == ComputeEigenvectors; - - // "A" means "All", use "S" to choose specific eigenvalues (not yet supported in ARPACK)) - // - char howmny[2] = "A"; - - // if howmny == "S", specifies the eigenvalues to compute (not implemented in ARPACK) - // - int *select = new int[ncv]; - - // Final eigenvalues - // - m_eivalues.resize(nev, 1); - - internal::arpack_wrapper<Scalar, RealScalar>::seupd(&rvec, howmny, select, m_eivalues.data(), v, &ldv, - &sigma, bmat, &n, whch, &nev, &tol, resid, &ncv, - v, &ldv, iparam, ipntr, workd, workl, &lworkl, &info); - - if (info == -14) - m_info = NoConvergence; - else if (info != 0) - m_info = InvalidInput; - else - { - if (rvec) - { - m_eivec.resize(A.rows(), nev); - for (int i=0; i<nev; i++) - for (int j=0; j<n; j++) - m_eivec(j,i) = v[i*n+j] / scale; - - if (mode == 1 && !isBempty && BisSPD) - internal::OP<MatrixSolver, MatrixType, Scalar, BisSPD>::project(OP, n, nev, m_eivec.data()); - - m_eigenvectorsOk = true; - } - - m_nbrIterations = iparam[2]; - m_nbrConverged = iparam[4]; - - m_info = Success; - } - - delete[] select; - } - - delete[] v; - delete[] iparam; - delete[] ipntr; - delete[] workd; - delete[] workl; - delete[] resid; - - m_isInitialized = true; - - return *this; -} - - -// Single precision -// -extern "C" void ssaupd_(int *ido, char *bmat, int *n, char *which, - int *nev, float *tol, float *resid, int *ncv, - float *v, int *ldv, int *iparam, int *ipntr, - float *workd, float *workl, int *lworkl, - int *info); - -extern "C" void sseupd_(int *rvec, char *All, int *select, float *d, - float *z, int *ldz, float *sigma, - char *bmat, int *n, char *which, int *nev, - float *tol, float *resid, int *ncv, float *v, - int *ldv, int *iparam, int *ipntr, float *workd, - float *workl, int *lworkl, int *ierr); - -// Double precision -// -extern "C" void dsaupd_(int *ido, char *bmat, int *n, char *which, - int *nev, double *tol, double *resid, int *ncv, - double *v, int *ldv, int *iparam, int *ipntr, - double *workd, double *workl, int *lworkl, - int *info); - -extern "C" void dseupd_(int *rvec, char *All, int *select, double *d, - double *z, int *ldz, double *sigma, - char *bmat, int *n, char *which, int *nev, - double *tol, double *resid, int *ncv, double *v, - int *ldv, int *iparam, int *ipntr, double *workd, - double *workl, int *lworkl, int *ierr); - - -namespace internal { - -template<typename Scalar, typename RealScalar> struct arpack_wrapper -{ - static inline void saupd(int *ido, char *bmat, int *n, char *which, - int *nev, RealScalar *tol, Scalar *resid, int *ncv, - Scalar *v, int *ldv, int *iparam, int *ipntr, - Scalar *workd, Scalar *workl, int *lworkl, int *info) - { - EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL) - } - - static inline void seupd(int *rvec, char *All, int *select, Scalar *d, - Scalar *z, int *ldz, RealScalar *sigma, - char *bmat, int *n, char *which, int *nev, - RealScalar *tol, Scalar *resid, int *ncv, Scalar *v, - int *ldv, int *iparam, int *ipntr, Scalar *workd, - Scalar *workl, int *lworkl, int *ierr) - { - EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL) - } -}; - -template <> struct arpack_wrapper<float, float> -{ - static inline void saupd(int *ido, char *bmat, int *n, char *which, - int *nev, float *tol, float *resid, int *ncv, - float *v, int *ldv, int *iparam, int *ipntr, - float *workd, float *workl, int *lworkl, int *info) - { - ssaupd_(ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, info); - } - - static inline void seupd(int *rvec, char *All, int *select, float *d, - float *z, int *ldz, float *sigma, - char *bmat, int *n, char *which, int *nev, - float *tol, float *resid, int *ncv, float *v, - int *ldv, int *iparam, int *ipntr, float *workd, - float *workl, int *lworkl, int *ierr) - { - sseupd_(rvec, All, select, d, z, ldz, sigma, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, - workd, workl, lworkl, ierr); - } -}; - -template <> struct arpack_wrapper<double, double> -{ - static inline void saupd(int *ido, char *bmat, int *n, char *which, - int *nev, double *tol, double *resid, int *ncv, - double *v, int *ldv, int *iparam, int *ipntr, - double *workd, double *workl, int *lworkl, int *info) - { - dsaupd_(ido, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, info); - } - - static inline void seupd(int *rvec, char *All, int *select, double *d, - double *z, int *ldz, double *sigma, - char *bmat, int *n, char *which, int *nev, - double *tol, double *resid, int *ncv, double *v, - int *ldv, int *iparam, int *ipntr, double *workd, - double *workl, int *lworkl, int *ierr) - { - dseupd_(rvec, All, select, d, v, ldv, sigma, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, - workd, workl, lworkl, ierr); - } -}; - - -template<typename MatrixSolver, typename MatrixType, typename Scalar, bool BisSPD> -struct OP -{ - static inline void applyOP(MatrixSolver &OP, const MatrixType &A, int n, Scalar *in, Scalar *out); - static inline void project(MatrixSolver &OP, int n, int k, Scalar *vecs); -}; - -template<typename MatrixSolver, typename MatrixType, typename Scalar> -struct OP<MatrixSolver, MatrixType, Scalar, true> -{ - static inline void applyOP(MatrixSolver &OP, const MatrixType &A, int n, Scalar *in, Scalar *out) -{ - // OP = L^{-1} A L^{-T} (B = LL^T) - // - // First solve L^T out = in - // - Matrix<Scalar, Dynamic, 1>::Map(out, n) = OP.matrixU().solve(Matrix<Scalar, Dynamic, 1>::Map(in, n)); - Matrix<Scalar, Dynamic, 1>::Map(out, n) = OP.permutationPinv() * Matrix<Scalar, Dynamic, 1>::Map(out, n); - - // Then compute out = A out - // - Matrix<Scalar, Dynamic, 1>::Map(out, n) = A * Matrix<Scalar, Dynamic, 1>::Map(out, n); - - // Then solve L out = out - // - Matrix<Scalar, Dynamic, 1>::Map(out, n) = OP.permutationP() * Matrix<Scalar, Dynamic, 1>::Map(out, n); - Matrix<Scalar, Dynamic, 1>::Map(out, n) = OP.matrixL().solve(Matrix<Scalar, Dynamic, 1>::Map(out, n)); -} - - static inline void project(MatrixSolver &OP, int n, int k, Scalar *vecs) -{ - // Solve L^T out = in - // - Matrix<Scalar, Dynamic, Dynamic>::Map(vecs, n, k) = OP.matrixU().solve(Matrix<Scalar, Dynamic, Dynamic>::Map(vecs, n, k)); - Matrix<Scalar, Dynamic, Dynamic>::Map(vecs, n, k) = OP.permutationPinv() * Matrix<Scalar, Dynamic, Dynamic>::Map(vecs, n, k); -} - -}; - -template<typename MatrixSolver, typename MatrixType, typename Scalar> -struct OP<MatrixSolver, MatrixType, Scalar, false> -{ - static inline void applyOP(MatrixSolver &OP, const MatrixType &A, int n, Scalar *in, Scalar *out) -{ - eigen_assert(false && "Should never be in here..."); -} - - static inline void project(MatrixSolver &OP, int n, int k, Scalar *vecs) -{ - eigen_assert(false && "Should never be in here..."); -} - -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_ARPACKSELFADJOINTEIGENSOLVER_H - diff --git a/eigen/unsupported/Eigen/src/EulerAngles/CMakeLists.txt b/eigen/unsupported/Eigen/src/EulerAngles/CMakeLists.txt deleted file mode 100644 index 40af550..0000000 --- a/eigen/unsupported/Eigen/src/EulerAngles/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_EulerAngles_SRCS "*.h") - -INSTALL(FILES - ${Eigen_EulerAngles_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/EulerAngles COMPONENT Devel - ) diff --git a/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h b/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h deleted file mode 100644 index 13a0da1..0000000 --- a/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h +++ /dev/null @@ -1,386 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_EULERANGLESCLASS_H// TODO: Fix previous "EIGEN_EULERANGLES_H" definition? -#define EIGEN_EULERANGLESCLASS_H - -namespace Eigen -{ - /*template<typename Other, - int OtherRows=Other::RowsAtCompileTime, - int OtherCols=Other::ColsAtCompileTime> - struct ei_eulerangles_assign_impl;*/ - - /** \class EulerAngles - * - * \ingroup EulerAngles_Module - * - * \brief Represents a rotation in a 3 dimensional space as three Euler angles. - * - * Euler rotation is a set of three rotation of three angles over three fixed axes, defined by the EulerSystem given as a template parameter. - * - * Here is how intrinsic Euler angles works: - * - first, rotate the axes system over the alpha axis in angle alpha - * - then, rotate the axes system over the beta axis(which was rotated in the first stage) in angle beta - * - then, rotate the axes system over the gamma axis(which was rotated in the two stages above) in angle gamma - * - * \note This class support only intrinsic Euler angles for simplicity, - * see EulerSystem how to easily overcome this for extrinsic systems. - * - * ### Rotation representation and conversions ### - * - * It has been proved(see Wikipedia link below) that every rotation can be represented - * by Euler angles, but there is no singular representation (e.g. unlike rotation matrices). - * Therefore, you can convert from Eigen rotation and to them - * (including rotation matrices, which is not called "rotations" by Eigen design). - * - * Euler angles usually used for: - * - convenient human representation of rotation, especially in interactive GUI. - * - gimbal systems and robotics - * - efficient encoding(i.e. 3 floats only) of rotation for network protocols. - * - * However, Euler angles are slow comparing to quaternion or matrices, - * because their unnatural math definition, although it's simple for human. - * To overcome this, this class provide easy movement from the math friendly representation - * to the human friendly representation, and vise-versa. - * - * All the user need to do is a safe simple C++ type conversion, - * and this class take care for the math. - * Additionally, some axes related computation is done in compile time. - * - * #### Euler angles ranges in conversions #### - * - * When converting some rotation to Euler angles, there are some ways you can guarantee - * the Euler angles ranges. - * - * #### implicit ranges #### - * When using implicit ranges, all angles are guarantee to be in the range [-PI, +PI], - * unless you convert from some other Euler angles. - * In this case, the range is __undefined__ (might be even less than -PI or greater than +2*PI). - * \sa EulerAngles(const MatrixBase<Derived>&) - * \sa EulerAngles(const RotationBase<Derived, 3>&) - * - * #### explicit ranges #### - * When using explicit ranges, all angles are guarantee to be in the range you choose. - * In the range Boolean parameter, you're been ask whether you prefer the positive range or not: - * - _true_ - force the range between [0, +2*PI] - * - _false_ - force the range between [-PI, +PI] - * - * ##### compile time ranges ##### - * This is when you have compile time ranges and you prefer to - * use template parameter. (e.g. for performance) - * \sa FromRotation() - * - * ##### run-time time ranges ##### - * Run-time ranges are also supported. - * \sa EulerAngles(const MatrixBase<Derived>&, bool, bool, bool) - * \sa EulerAngles(const RotationBase<Derived, 3>&, bool, bool, bool) - * - * ### Convenient user typedefs ### - * - * Convenient typedefs for EulerAngles exist for float and double scalar, - * in a form of EulerAngles{A}{B}{C}{scalar}, - * e.g. \ref EulerAnglesXYZd, \ref EulerAnglesZYZf. - * - * Only for positive axes{+x,+y,+z} Euler systems are have convenient typedef. - * If you need negative axes{-x,-y,-z}, it is recommended to create you own typedef with - * a word that represent what you need. - * - * ### Example ### - * - * \include EulerAngles.cpp - * Output: \verbinclude EulerAngles.out - * - * ### Additional reading ### - * - * If you're want to get more idea about how Euler system work in Eigen see EulerSystem. - * - * More information about Euler angles: https://en.wikipedia.org/wiki/Euler_angles - * - * \tparam _Scalar the scalar type, i.e., the type of the angles. - * - * \tparam _System the EulerSystem to use, which represents the axes of rotation. - */ - template <typename _Scalar, class _System> - class EulerAngles : public RotationBase<EulerAngles<_Scalar, _System>, 3> - { - public: - /** the scalar type of the angles */ - typedef _Scalar Scalar; - - /** the EulerSystem to use, which represents the axes of rotation. */ - typedef _System System; - - typedef Matrix<Scalar,3,3> Matrix3; /*!< the equivalent rotation matrix type */ - typedef Matrix<Scalar,3,1> Vector3; /*!< the equivalent 3 dimension vector type */ - typedef Quaternion<Scalar> QuaternionType; /*!< the equivalent quaternion type */ - typedef AngleAxis<Scalar> AngleAxisType; /*!< the equivalent angle-axis type */ - - /** \returns the axis vector of the first (alpha) rotation */ - static Vector3 AlphaAxisVector() { - const Vector3& u = Vector3::Unit(System::AlphaAxisAbs - 1); - return System::IsAlphaOpposite ? -u : u; - } - - /** \returns the axis vector of the second (beta) rotation */ - static Vector3 BetaAxisVector() { - const Vector3& u = Vector3::Unit(System::BetaAxisAbs - 1); - return System::IsBetaOpposite ? -u : u; - } - - /** \returns the axis vector of the third (gamma) rotation */ - static Vector3 GammaAxisVector() { - const Vector3& u = Vector3::Unit(System::GammaAxisAbs - 1); - return System::IsGammaOpposite ? -u : u; - } - - private: - Vector3 m_angles; - - public: - /** Default constructor without initialization. */ - EulerAngles() {} - /** Constructs and initialize Euler angles(\p alpha, \p beta, \p gamma). */ - EulerAngles(const Scalar& alpha, const Scalar& beta, const Scalar& gamma) : - m_angles(alpha, beta, gamma) {} - - /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m. - * - * \note All angles will be in the range [-PI, PI]. - */ - template<typename Derived> - EulerAngles(const MatrixBase<Derived>& m) { *this = m; } - - /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m, - * with options to choose for each angle the requested range. - * - * If positive range is true, then the specified angle will be in the range [0, +2*PI]. - * Otherwise, the specified angle will be in the range [-PI, +PI]. - * - * \param m The 3x3 rotation matrix to convert - * \param positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - * \param positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - * \param positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - */ - template<typename Derived> - EulerAngles( - const MatrixBase<Derived>& m, - bool positiveRangeAlpha, - bool positiveRangeBeta, - bool positiveRangeGamma) { - - System::CalcEulerAngles(*this, m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma); - } - - /** Constructs and initialize Euler angles from a rotation \p rot. - * - * \note All angles will be in the range [-PI, PI], unless \p rot is an EulerAngles. - * If rot is an EulerAngles, expected EulerAngles range is __undefined__. - * (Use other functions here for enforcing range if this effect is desired) - */ - template<typename Derived> - EulerAngles(const RotationBase<Derived, 3>& rot) { *this = rot; } - - /** Constructs and initialize Euler angles from a rotation \p rot, - * with options to choose for each angle the requested range. - * - * If positive range is true, then the specified angle will be in the range [0, +2*PI]. - * Otherwise, the specified angle will be in the range [-PI, +PI]. - * - * \param rot The 3x3 rotation matrix to convert - * \param positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - * \param positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - * \param positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - */ - template<typename Derived> - EulerAngles( - const RotationBase<Derived, 3>& rot, - bool positiveRangeAlpha, - bool positiveRangeBeta, - bool positiveRangeGamma) { - - System::CalcEulerAngles(*this, rot.toRotationMatrix(), positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma); - } - - /** \returns The angle values stored in a vector (alpha, beta, gamma). */ - const Vector3& angles() const { return m_angles; } - /** \returns A read-write reference to the angle values stored in a vector (alpha, beta, gamma). */ - Vector3& angles() { return m_angles; } - - /** \returns The value of the first angle. */ - Scalar alpha() const { return m_angles[0]; } - /** \returns A read-write reference to the angle of the first angle. */ - Scalar& alpha() { return m_angles[0]; } - - /** \returns The value of the second angle. */ - Scalar beta() const { return m_angles[1]; } - /** \returns A read-write reference to the angle of the second angle. */ - Scalar& beta() { return m_angles[1]; } - - /** \returns The value of the third angle. */ - Scalar gamma() const { return m_angles[2]; } - /** \returns A read-write reference to the angle of the third angle. */ - Scalar& gamma() { return m_angles[2]; } - - /** \returns The Euler angles rotation inverse (which is as same as the negative), - * (-alpha, -beta, -gamma). - */ - EulerAngles inverse() const - { - EulerAngles res; - res.m_angles = -m_angles; - return res; - } - - /** \returns The Euler angles rotation negative (which is as same as the inverse), - * (-alpha, -beta, -gamma). - */ - EulerAngles operator -() const - { - return inverse(); - } - - /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m, - * with options to choose for each angle the requested range (__only in compile time__). - * - * If positive range is true, then the specified angle will be in the range [0, +2*PI]. - * Otherwise, the specified angle will be in the range [-PI, +PI]. - * - * \param m The 3x3 rotation matrix to convert - * \tparam positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - * \tparam positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - * \tparam positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - */ - template< - bool PositiveRangeAlpha, - bool PositiveRangeBeta, - bool PositiveRangeGamma, - typename Derived> - static EulerAngles FromRotation(const MatrixBase<Derived>& m) - { - EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived, 3, 3) - - EulerAngles e; - System::template CalcEulerAngles< - PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma, _Scalar>(e, m); - return e; - } - - /** Constructs and initialize Euler angles from a rotation \p rot, - * with options to choose for each angle the requested range (__only in compile time__). - * - * If positive range is true, then the specified angle will be in the range [0, +2*PI]. - * Otherwise, the specified angle will be in the range [-PI, +PI]. - * - * \param rot The 3x3 rotation matrix to convert - * \tparam positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - * \tparam positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - * \tparam positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. - */ - template< - bool PositiveRangeAlpha, - bool PositiveRangeBeta, - bool PositiveRangeGamma, - typename Derived> - static EulerAngles FromRotation(const RotationBase<Derived, 3>& rot) - { - return FromRotation<PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma>(rot.toRotationMatrix()); - } - - /*EulerAngles& fromQuaternion(const QuaternionType& q) - { - // TODO: Implement it in a faster way for quaternions - // According to http://www.euclideanspace.com/maths/geometry/rotations/conversions/quaternionToEuler/ - // we can compute only the needed matrix cells and then convert to euler angles. (see ZYX example below) - // Currently we compute all matrix cells from quaternion. - - // Special case only for ZYX - //Scalar y2 = q.y() * q.y(); - //m_angles[0] = std::atan2(2*(q.w()*q.z() + q.x()*q.y()), (1 - 2*(y2 + q.z()*q.z()))); - //m_angles[1] = std::asin( 2*(q.w()*q.y() - q.z()*q.x())); - //m_angles[2] = std::atan2(2*(q.w()*q.x() + q.y()*q.z()), (1 - 2*(q.x()*q.x() + y2))); - }*/ - - /** Set \c *this from a rotation matrix(i.e. pure orthogonal matrix with determinant of +1). */ - template<typename Derived> - EulerAngles& operator=(const MatrixBase<Derived>& m) { - EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived, 3, 3) - - System::CalcEulerAngles(*this, m); - return *this; - } - - // TODO: Assign and construct from another EulerAngles (with different system) - - /** Set \c *this from a rotation. */ - template<typename Derived> - EulerAngles& operator=(const RotationBase<Derived, 3>& rot) { - System::CalcEulerAngles(*this, rot.toRotationMatrix()); - return *this; - } - - // TODO: Support isApprox function - - /** \returns an equivalent 3x3 rotation matrix. */ - Matrix3 toRotationMatrix() const - { - return static_cast<QuaternionType>(*this).toRotationMatrix(); - } - - /** Convert the Euler angles to quaternion. */ - operator QuaternionType() const - { - return - AngleAxisType(alpha(), AlphaAxisVector()) * - AngleAxisType(beta(), BetaAxisVector()) * - AngleAxisType(gamma(), GammaAxisVector()); - } - - friend std::ostream& operator<<(std::ostream& s, const EulerAngles<Scalar, System>& eulerAngles) - { - s << eulerAngles.angles().transpose(); - return s; - } - }; - -#define EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(AXES, SCALAR_TYPE, SCALAR_POSTFIX) \ - /** \ingroup EulerAngles_Module */ \ - typedef EulerAngles<SCALAR_TYPE, EulerSystem##AXES> EulerAngles##AXES##SCALAR_POSTFIX; - -#define EIGEN_EULER_ANGLES_TYPEDEFS(SCALAR_TYPE, SCALAR_POSTFIX) \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XYZ, SCALAR_TYPE, SCALAR_POSTFIX) \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XYX, SCALAR_TYPE, SCALAR_POSTFIX) \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XZY, SCALAR_TYPE, SCALAR_POSTFIX) \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XZX, SCALAR_TYPE, SCALAR_POSTFIX) \ - \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YZX, SCALAR_TYPE, SCALAR_POSTFIX) \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YZY, SCALAR_TYPE, SCALAR_POSTFIX) \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YXZ, SCALAR_TYPE, SCALAR_POSTFIX) \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YXY, SCALAR_TYPE, SCALAR_POSTFIX) \ - \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZXY, SCALAR_TYPE, SCALAR_POSTFIX) \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZXZ, SCALAR_TYPE, SCALAR_POSTFIX) \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZYX, SCALAR_TYPE, SCALAR_POSTFIX) \ - EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZYZ, SCALAR_TYPE, SCALAR_POSTFIX) - -EIGEN_EULER_ANGLES_TYPEDEFS(float, f) -EIGEN_EULER_ANGLES_TYPEDEFS(double, d) - - namespace internal - { - template<typename _Scalar, class _System> - struct traits<EulerAngles<_Scalar, _System> > - { - typedef _Scalar Scalar; - }; - } - -} - -#endif // EIGEN_EULERANGLESCLASS_H diff --git a/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h deleted file mode 100644 index 98f9f64..0000000 --- a/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h +++ /dev/null @@ -1,326 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_EULERSYSTEM_H -#define EIGEN_EULERSYSTEM_H - -namespace Eigen -{ - // Forward declerations - template <typename _Scalar, class _System> - class EulerAngles; - - namespace internal - { - // TODO: Check if already exists on the rest API - template <int Num, bool IsPositive = (Num > 0)> - struct Abs - { - enum { value = Num }; - }; - - template <int Num> - struct Abs<Num, false> - { - enum { value = -Num }; - }; - - template <int Axis> - struct IsValidAxis - { - enum { value = Axis != 0 && Abs<Axis>::value <= 3 }; - }; - } - - #define EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(COND,MSG) typedef char static_assertion_##MSG[(COND)?1:-1] - - /** \brief Representation of a fixed signed rotation axis for EulerSystem. - * - * \ingroup EulerAngles_Module - * - * Values here represent: - * - The axis of the rotation: X, Y or Z. - * - The sign (i.e. direction of the rotation along the axis): positive(+) or negative(-) - * - * Therefore, this could express all the axes {+X,+Y,+Z,-X,-Y,-Z} - * - * For positive axis, use +EULER_{axis}, and for negative axis use -EULER_{axis}. - */ - enum EulerAxis - { - EULER_X = 1, /*!< the X axis */ - EULER_Y = 2, /*!< the Y axis */ - EULER_Z = 3 /*!< the Z axis */ - }; - - /** \class EulerSystem - * - * \ingroup EulerAngles_Module - * - * \brief Represents a fixed Euler rotation system. - * - * This meta-class goal is to represent the Euler system in compilation time, for EulerAngles. - * - * You can use this class to get two things: - * - Build an Euler system, and then pass it as a template parameter to EulerAngles. - * - Query some compile time data about an Euler system. (e.g. Whether it's tait bryan) - * - * Euler rotation is a set of three rotation on fixed axes. (see \ref EulerAngles) - * This meta-class store constantly those signed axes. (see \ref EulerAxis) - * - * ### Types of Euler systems ### - * - * All and only valid 3 dimension Euler rotation over standard - * signed axes{+X,+Y,+Z,-X,-Y,-Z} are supported: - * - all axes X, Y, Z in each valid order (see below what order is valid) - * - rotation over the axis is supported both over the positive and negative directions. - * - both tait bryan and proper/classic Euler angles (i.e. the opposite). - * - * Since EulerSystem support both positive and negative directions, - * you may call this rotation distinction in other names: - * - _right handed_ or _left handed_ - * - _counterclockwise_ or _clockwise_ - * - * Notice all axed combination are valid, and would trigger a static assertion. - * Same unsigned axes can't be neighbors, e.g. {X,X,Y} is invalid. - * This yield two and only two classes: - * - _tait bryan_ - all unsigned axes are distinct, e.g. {X,Y,Z} - * - _proper/classic Euler angles_ - The first and the third unsigned axes is equal, - * and the second is different, e.g. {X,Y,X} - * - * ### Intrinsic vs extrinsic Euler systems ### - * - * Only intrinsic Euler systems are supported for simplicity. - * If you want to use extrinsic Euler systems, - * just use the equal intrinsic opposite order for axes and angles. - * I.e axes (A,B,C) becomes (C,B,A), and angles (a,b,c) becomes (c,b,a). - * - * ### Convenient user typedefs ### - * - * Convenient typedefs for EulerSystem exist (only for positive axes Euler systems), - * in a form of EulerSystem{A}{B}{C}, e.g. \ref EulerSystemXYZ. - * - * ### Additional reading ### - * - * More information about Euler angles: https://en.wikipedia.org/wiki/Euler_angles - * - * \tparam _AlphaAxis the first fixed EulerAxis - * - * \tparam _AlphaAxis the second fixed EulerAxis - * - * \tparam _AlphaAxis the third fixed EulerAxis - */ - template <int _AlphaAxis, int _BetaAxis, int _GammaAxis> - class EulerSystem - { - public: - // It's defined this way and not as enum, because I think - // that enum is not guerantee to support negative numbers - - /** The first rotation axis */ - static const int AlphaAxis = _AlphaAxis; - - /** The second rotation axis */ - static const int BetaAxis = _BetaAxis; - - /** The third rotation axis */ - static const int GammaAxis = _GammaAxis; - - enum - { - AlphaAxisAbs = internal::Abs<AlphaAxis>::value, /*!< the first rotation axis unsigned */ - BetaAxisAbs = internal::Abs<BetaAxis>::value, /*!< the second rotation axis unsigned */ - GammaAxisAbs = internal::Abs<GammaAxis>::value, /*!< the third rotation axis unsigned */ - - IsAlphaOpposite = (AlphaAxis < 0) ? 1 : 0, /*!< weather alpha axis is negative */ - IsBetaOpposite = (BetaAxis < 0) ? 1 : 0, /*!< weather beta axis is negative */ - IsGammaOpposite = (GammaAxis < 0) ? 1 : 0, /*!< weather gamma axis is negative */ - - IsOdd = ((AlphaAxisAbs)%3 == (BetaAxisAbs - 1)%3) ? 0 : 1, /*!< weather the Euler system is odd */ - IsEven = IsOdd ? 0 : 1, /*!< weather the Euler system is even */ - - IsTaitBryan = ((unsigned)AlphaAxisAbs != (unsigned)GammaAxisAbs) ? 1 : 0 /*!< weather the Euler system is tait bryan */ - }; - - private: - - EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis<AlphaAxis>::value, - ALPHA_AXIS_IS_INVALID); - - EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis<BetaAxis>::value, - BETA_AXIS_IS_INVALID); - - EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis<GammaAxis>::value, - GAMMA_AXIS_IS_INVALID); - - EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT((unsigned)AlphaAxisAbs != (unsigned)BetaAxisAbs, - ALPHA_AXIS_CANT_BE_EQUAL_TO_BETA_AXIS); - - EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT((unsigned)BetaAxisAbs != (unsigned)GammaAxisAbs, - BETA_AXIS_CANT_BE_EQUAL_TO_GAMMA_AXIS); - - enum - { - // I, J, K are the pivot indexes permutation for the rotation matrix, that match this Euler system. - // They are used in this class converters. - // They are always different from each other, and their possible values are: 0, 1, or 2. - I = AlphaAxisAbs - 1, - J = (AlphaAxisAbs - 1 + 1 + IsOdd)%3, - K = (AlphaAxisAbs - 1 + 2 - IsOdd)%3 - }; - - // TODO: Get @mat parameter in form that avoids double evaluation. - template <typename Derived> - static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar, 3, 1>& res, const MatrixBase<Derived>& mat, internal::true_type /*isTaitBryan*/) - { - using std::atan2; - using std::sin; - using std::cos; - - typedef typename Derived::Scalar Scalar; - typedef Matrix<Scalar,2,1> Vector2; - - res[0] = atan2(mat(J,K), mat(K,K)); - Scalar c2 = Vector2(mat(I,I), mat(I,J)).norm(); - if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) { - if(res[0] > Scalar(0)) { - res[0] -= Scalar(EIGEN_PI); - } - else { - res[0] += Scalar(EIGEN_PI); - } - res[1] = atan2(-mat(I,K), -c2); - } - else - res[1] = atan2(-mat(I,K), c2); - Scalar s1 = sin(res[0]); - Scalar c1 = cos(res[0]); - res[2] = atan2(s1*mat(K,I)-c1*mat(J,I), c1*mat(J,J) - s1 * mat(K,J)); - } - - template <typename Derived> - static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar,3,1>& res, const MatrixBase<Derived>& mat, internal::false_type /*isTaitBryan*/) - { - using std::atan2; - using std::sin; - using std::cos; - - typedef typename Derived::Scalar Scalar; - typedef Matrix<Scalar,2,1> Vector2; - - res[0] = atan2(mat(J,I), mat(K,I)); - if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) - { - if(res[0] > Scalar(0)) { - res[0] -= Scalar(EIGEN_PI); - } - else { - res[0] += Scalar(EIGEN_PI); - } - Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm(); - res[1] = -atan2(s2, mat(I,I)); - } - else - { - Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm(); - res[1] = atan2(s2, mat(I,I)); - } - - // With a=(0,1,0), we have i=0; j=1; k=2, and after computing the first two angles, - // we can compute their respective rotation, and apply its inverse to M. Since the result must - // be a rotation around x, we have: - // - // c2 s1.s2 c1.s2 1 0 0 - // 0 c1 -s1 * M = 0 c3 s3 - // -s2 s1.c2 c1.c2 0 -s3 c3 - // - // Thus: m11.c1 - m21.s1 = c3 & m12.c1 - m22.s1 = s3 - - Scalar s1 = sin(res[0]); - Scalar c1 = cos(res[0]); - res[2] = atan2(c1*mat(J,K)-s1*mat(K,K), c1*mat(J,J) - s1 * mat(K,J)); - } - - template<typename Scalar> - static void CalcEulerAngles( - EulerAngles<Scalar, EulerSystem>& res, - const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat) - { - CalcEulerAngles(res, mat, false, false, false); - } - - template< - bool PositiveRangeAlpha, - bool PositiveRangeBeta, - bool PositiveRangeGamma, - typename Scalar> - static void CalcEulerAngles( - EulerAngles<Scalar, EulerSystem>& res, - const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat) - { - CalcEulerAngles(res, mat, PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma); - } - - template<typename Scalar> - static void CalcEulerAngles( - EulerAngles<Scalar, EulerSystem>& res, - const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat, - bool PositiveRangeAlpha, - bool PositiveRangeBeta, - bool PositiveRangeGamma) - { - CalcEulerAngles_imp( - res.angles(), mat, - typename internal::conditional<IsTaitBryan, internal::true_type, internal::false_type>::type()); - - if (IsAlphaOpposite == IsOdd) - res.alpha() = -res.alpha(); - - if (IsBetaOpposite == IsOdd) - res.beta() = -res.beta(); - - if (IsGammaOpposite == IsOdd) - res.gamma() = -res.gamma(); - - // Saturate results to the requested range - if (PositiveRangeAlpha && (res.alpha() < 0)) - res.alpha() += Scalar(2 * EIGEN_PI); - - if (PositiveRangeBeta && (res.beta() < 0)) - res.beta() += Scalar(2 * EIGEN_PI); - - if (PositiveRangeGamma && (res.gamma() < 0)) - res.gamma() += Scalar(2 * EIGEN_PI); - } - - template <typename _Scalar, class _System> - friend class Eigen::EulerAngles; - }; - -#define EIGEN_EULER_SYSTEM_TYPEDEF(A, B, C) \ - /** \ingroup EulerAngles_Module */ \ - typedef EulerSystem<EULER_##A, EULER_##B, EULER_##C> EulerSystem##A##B##C; - - EIGEN_EULER_SYSTEM_TYPEDEF(X,Y,Z) - EIGEN_EULER_SYSTEM_TYPEDEF(X,Y,X) - EIGEN_EULER_SYSTEM_TYPEDEF(X,Z,Y) - EIGEN_EULER_SYSTEM_TYPEDEF(X,Z,X) - - EIGEN_EULER_SYSTEM_TYPEDEF(Y,Z,X) - EIGEN_EULER_SYSTEM_TYPEDEF(Y,Z,Y) - EIGEN_EULER_SYSTEM_TYPEDEF(Y,X,Z) - EIGEN_EULER_SYSTEM_TYPEDEF(Y,X,Y) - - EIGEN_EULER_SYSTEM_TYPEDEF(Z,X,Y) - EIGEN_EULER_SYSTEM_TYPEDEF(Z,X,Z) - EIGEN_EULER_SYSTEM_TYPEDEF(Z,Y,X) - EIGEN_EULER_SYSTEM_TYPEDEF(Z,Y,Z) -} - -#endif // EIGEN_EULERSYSTEM_H diff --git a/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h deleted file mode 100644 index d49aa17..0000000 --- a/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h +++ /dev/null @@ -1,261 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Mark Borgerding mark a borgerding net -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -namespace Eigen { - -namespace internal { - - // FFTW uses non-const arguments - // so we must use ugly const_cast calls for all the args it uses - // - // This should be safe as long as - // 1. we use FFTW_ESTIMATE for all our planning - // see the FFTW docs section 4.3.2 "Planner Flags" - // 2. fftw_complex is compatible with std::complex - // This assumes std::complex<T> layout is array of size 2 with real,imag - template <typename T> - inline - T * fftw_cast(const T* p) - { - return const_cast<T*>( p); - } - - inline - fftw_complex * fftw_cast( const std::complex<double> * p) - { - return const_cast<fftw_complex*>( reinterpret_cast<const fftw_complex*>(p) ); - } - - inline - fftwf_complex * fftw_cast( const std::complex<float> * p) - { - return const_cast<fftwf_complex*>( reinterpret_cast<const fftwf_complex*>(p) ); - } - - inline - fftwl_complex * fftw_cast( const std::complex<long double> * p) - { - return const_cast<fftwl_complex*>( reinterpret_cast<const fftwl_complex*>(p) ); - } - - template <typename T> - struct fftw_plan {}; - - template <> - struct fftw_plan<float> - { - typedef float scalar_type; - typedef fftwf_complex complex_type; - fftwf_plan m_plan; - fftw_plan() :m_plan(NULL) {} - ~fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);} - - inline - void fwd(complex_type * dst,complex_type * src,int nfft) { - if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwf_execute_dft( m_plan, src,dst); - } - inline - void inv(complex_type * dst,complex_type * src,int nfft) { - if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwf_execute_dft( m_plan, src,dst); - } - inline - void fwd(complex_type * dst,scalar_type * src,int nfft) { - if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwf_execute_dft_r2c( m_plan,src,dst); - } - inline - void inv(scalar_type * dst,complex_type * src,int nfft) { - if (m_plan==NULL) - m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwf_execute_dft_c2r( m_plan, src,dst); - } - - inline - void fwd2( complex_type * dst,complex_type * src,int n0,int n1) { - if (m_plan==NULL) m_plan = fftwf_plan_dft_2d(n0,n1,src,dst,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwf_execute_dft( m_plan, src,dst); - } - inline - void inv2( complex_type * dst,complex_type * src,int n0,int n1) { - if (m_plan==NULL) m_plan = fftwf_plan_dft_2d(n0,n1,src,dst,FFTW_BACKWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwf_execute_dft( m_plan, src,dst); - } - - }; - template <> - struct fftw_plan<double> - { - typedef double scalar_type; - typedef fftw_complex complex_type; - ::fftw_plan m_plan; - fftw_plan() :m_plan(NULL) {} - ~fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);} - - inline - void fwd(complex_type * dst,complex_type * src,int nfft) { - if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftw_execute_dft( m_plan, src,dst); - } - inline - void inv(complex_type * dst,complex_type * src,int nfft) { - if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftw_execute_dft( m_plan, src,dst); - } - inline - void fwd(complex_type * dst,scalar_type * src,int nfft) { - if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftw_execute_dft_r2c( m_plan,src,dst); - } - inline - void inv(scalar_type * dst,complex_type * src,int nfft) { - if (m_plan==NULL) - m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftw_execute_dft_c2r( m_plan, src,dst); - } - inline - void fwd2( complex_type * dst,complex_type * src,int n0,int n1) { - if (m_plan==NULL) m_plan = fftw_plan_dft_2d(n0,n1,src,dst,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftw_execute_dft( m_plan, src,dst); - } - inline - void inv2( complex_type * dst,complex_type * src,int n0,int n1) { - if (m_plan==NULL) m_plan = fftw_plan_dft_2d(n0,n1,src,dst,FFTW_BACKWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftw_execute_dft( m_plan, src,dst); - } - }; - template <> - struct fftw_plan<long double> - { - typedef long double scalar_type; - typedef fftwl_complex complex_type; - fftwl_plan m_plan; - fftw_plan() :m_plan(NULL) {} - ~fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);} - - inline - void fwd(complex_type * dst,complex_type * src,int nfft) { - if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwl_execute_dft( m_plan, src,dst); - } - inline - void inv(complex_type * dst,complex_type * src,int nfft) { - if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwl_execute_dft( m_plan, src,dst); - } - inline - void fwd(complex_type * dst,scalar_type * src,int nfft) { - if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwl_execute_dft_r2c( m_plan,src,dst); - } - inline - void inv(scalar_type * dst,complex_type * src,int nfft) { - if (m_plan==NULL) - m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwl_execute_dft_c2r( m_plan, src,dst); - } - inline - void fwd2( complex_type * dst,complex_type * src,int n0,int n1) { - if (m_plan==NULL) m_plan = fftwl_plan_dft_2d(n0,n1,src,dst,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwl_execute_dft( m_plan, src,dst); - } - inline - void inv2( complex_type * dst,complex_type * src,int n0,int n1) { - if (m_plan==NULL) m_plan = fftwl_plan_dft_2d(n0,n1,src,dst,FFTW_BACKWARD,FFTW_ESTIMATE|FFTW_PRESERVE_INPUT); - fftwl_execute_dft( m_plan, src,dst); - } - }; - - template <typename _Scalar> - struct fftw_impl - { - typedef _Scalar Scalar; - typedef std::complex<Scalar> Complex; - - inline - void clear() - { - m_plans.clear(); - } - - // complex-to-complex forward FFT - inline - void fwd( Complex * dst,const Complex *src,int nfft) - { - get_plan(nfft,false,dst,src).fwd(fftw_cast(dst), fftw_cast(src),nfft ); - } - - // real-to-complex forward FFT - inline - void fwd( Complex * dst,const Scalar * src,int nfft) - { - get_plan(nfft,false,dst,src).fwd(fftw_cast(dst), fftw_cast(src) ,nfft); - } - - // 2-d complex-to-complex - inline - void fwd2(Complex * dst, const Complex * src, int n0,int n1) - { - get_plan(n0,n1,false,dst,src).fwd2(fftw_cast(dst), fftw_cast(src) ,n0,n1); - } - - // inverse complex-to-complex - inline - void inv(Complex * dst,const Complex *src,int nfft) - { - get_plan(nfft,true,dst,src).inv(fftw_cast(dst), fftw_cast(src),nfft ); - } - - // half-complex to scalar - inline - void inv( Scalar * dst,const Complex * src,int nfft) - { - get_plan(nfft,true,dst,src).inv(fftw_cast(dst), fftw_cast(src),nfft ); - } - - // 2-d complex-to-complex - inline - void inv2(Complex * dst, const Complex * src, int n0,int n1) - { - get_plan(n0,n1,true,dst,src).inv2(fftw_cast(dst), fftw_cast(src) ,n0,n1); - } - - - protected: - typedef fftw_plan<Scalar> PlanData; - - typedef std::map<int64_t,PlanData> PlanMap; - - PlanMap m_plans; - - inline - PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src) - { - bool inplace = (dst==src); - bool aligned = ( (reinterpret_cast<size_t>(src)&15) | (reinterpret_cast<size_t>(dst)&15) ) == 0; - int64_t key = ( (nfft<<3 ) | (inverse<<2) | (inplace<<1) | aligned ) << 1; - return m_plans[key]; - } - - inline - PlanData & get_plan(int n0,int n1,bool inverse,void * dst,const void * src) - { - bool inplace = (dst==src); - bool aligned = ( (reinterpret_cast<size_t>(src)&15) | (reinterpret_cast<size_t>(dst)&15) ) == 0; - int64_t key = ( ( (((int64_t)n0) << 30)|(n1<<3 ) | (inverse<<2) | (inplace<<1) | aligned ) << 1 ) + 1; - return m_plans[key]; - } - }; - -} // end namespace internal - -} // end namespace Eigen - -/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h deleted file mode 100644 index be51b4e..0000000 --- a/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ /dev/null @@ -1,420 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Mark Borgerding mark a borgerding net -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -namespace Eigen { - -namespace internal { - - // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft - // Copyright 2003-2009 Mark Borgerding - -template <typename _Scalar> -struct kiss_cpx_fft -{ - typedef _Scalar Scalar; - typedef std::complex<Scalar> Complex; - std::vector<Complex> m_twiddles; - std::vector<int> m_stageRadix; - std::vector<int> m_stageRemainder; - std::vector<Complex> m_scratchBuf; - bool m_inverse; - - inline - void make_twiddles(int nfft,bool inverse) - { - using std::acos; - m_inverse = inverse; - m_twiddles.resize(nfft); - Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; - for (int i=0;i<nfft;++i) - m_twiddles[i] = exp( Complex(0,i*phinc) ); - } - - void factorize(int nfft) - { - //start factoring out 4's, then 2's, then 3,5,7,9,... - int n= nfft; - int p=4; - do { - while (n % p) { - switch (p) { - case 4: p = 2; break; - case 2: p = 3; break; - default: p += 2; break; - } - if (p*p>n) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - if ( p > 5 ) - m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic - }while(n>1); - } - - template <typename _Src> - inline - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - inline - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;k<m;++k) { - Complex t = Fout[m+k] * m_twiddles[k*fstride]; - Fout[m+k] = Fout[k] - t; - Fout[k] += t; - } - } - - inline - void bfly4( Complex * Fout, const size_t fstride, const size_t m) - { - Complex scratch[6]; - int negative_if_inverse = m_inverse * -2 +1; - for (size_t k=0;k<m;++k) { - scratch[0] = Fout[k+m] * m_twiddles[k*fstride]; - scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2]; - scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3]; - scratch[5] = Fout[k] - scratch[1]; - - Fout[k] += scratch[1]; - scratch[3] = scratch[0] + scratch[2]; - scratch[4] = scratch[0] - scratch[2]; - scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse ); - - Fout[k+2*m] = Fout[k] - scratch[3]; - Fout[k] += scratch[3]; - Fout[k+m] = scratch[5] + scratch[4]; - Fout[k+3*m] = scratch[5] - scratch[4]; - } - } - - inline - void bfly3( Complex * Fout, const size_t fstride, const size_t m) - { - size_t k=m; - const size_t m2 = 2*m; - Complex *tw1,*tw2; - Complex scratch[5]; - Complex epi3; - epi3 = m_twiddles[fstride*m]; - - tw1=tw2=&m_twiddles[0]; - - do{ - scratch[1]=Fout[m] * *tw1; - scratch[2]=Fout[m2] * *tw2; - - scratch[3]=scratch[1]+scratch[2]; - scratch[0]=scratch[1]-scratch[2]; - tw1 += fstride; - tw2 += fstride*2; - Fout[m] = Complex( Fout->real() - Scalar(.5)*scratch[3].real() , Fout->imag() - Scalar(.5)*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - inline - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u<m; ++u ) { - scratch[0] = *Fout0; - - scratch[1] = *Fout1 * tw[u*fstride]; - scratch[2] = *Fout2 * tw[2*u*fstride]; - scratch[3] = *Fout3 * tw[3*u*fstride]; - scratch[4] = *Fout4 * tw[4*u*fstride]; - - scratch[7] = scratch[1] + scratch[4]; - scratch[10] = scratch[1] - scratch[4]; - scratch[8] = scratch[2] + scratch[3]; - scratch[9] = scratch[2] - scratch[3]; - - *Fout0 += scratch[7]; - *Fout0 += scratch[8]; - - scratch[5] = scratch[0] + Complex( - (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ), - (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real()) - ); - - scratch[6] = Complex( - (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()), - -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag()) - ); - - *Fout1 = scratch[5] - scratch[6]; - *Fout4 = scratch[5] + scratch[6]; - - scratch[11] = scratch[0] + - Complex( - (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()), - (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real()) - ); - - scratch[12] = Complex( - -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()), - (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag()) - ); - - *Fout2=scratch[11]+scratch[12]; - *Fout3=scratch[11]-scratch[12]; - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } - - /* perform the butterfly for one stage of a mixed radix FFT */ - inline - void bfly_generic( - Complex * Fout, - const size_t fstride, - int m, - int p - ) - { - int u,k,q1,q; - Complex * twiddles = &m_twiddles[0]; - Complex t; - int Norig = static_cast<int>(m_twiddles.size()); - Complex * scratchbuf = &m_scratchBuf[0]; - - for ( u=0; u<m; ++u ) { - k=u; - for ( q1=0 ; q1<p ; ++q1 ) { - scratchbuf[q1] = Fout[ k ]; - k += m; - } - - k=u; - for ( q1=0 ; q1<p ; ++q1 ) { - int twidx=0; - Fout[ k ] = scratchbuf[0]; - for (q=1;q<p;++q ) { - twidx += static_cast<int>(fstride) * k; - if (twidx>=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; - } - } - } -}; - -template <typename _Scalar> -struct kissfft_impl -{ - typedef _Scalar Scalar; - typedef std::complex<Scalar> Complex; - - void clear() - { - m_plans.clear(); - m_realTwiddles.clear(); - } - - inline - void fwd( Complex * dst,const Complex *src,int nfft) - { - get_plan(nfft,false).work(0, dst, src, 1,1); - } - - inline - void fwd2( Complex * dst,const Complex *src,int n0,int n1) - { - EIGEN_UNUSED_VARIABLE(dst); - EIGEN_UNUSED_VARIABLE(src); - EIGEN_UNUSED_VARIABLE(n0); - EIGEN_UNUSED_VARIABLE(n1); - } - - inline - void inv2( Complex * dst,const Complex *src,int n0,int n1) - { - EIGEN_UNUSED_VARIABLE(dst); - EIGEN_UNUSED_VARIABLE(src); - EIGEN_UNUSED_VARIABLE(n0); - EIGEN_UNUSED_VARIABLE(n1); - } - - // real-to-complex forward FFT - // perform two FFTs of src even and src odd - // then twiddle to recombine them into the half-spectrum format - // then fill in the conjugate symmetric half - inline - void fwd( Complex * dst,const Scalar * src,int nfft) - { - if ( nfft&3 ) { - // use generic mode for odd - m_tmpBuf1.resize(nfft); - get_plan(nfft,false).work(0, &m_tmpBuf1[0], src, 1,1); - std::copy(m_tmpBuf1.begin(),m_tmpBuf1.begin()+(nfft>>1)+1,dst ); - }else{ - int ncfft = nfft>>1; - int ncfft2 = nfft>>2; - Complex * rtw = real_twiddles(ncfft2); - - // use optimized mode for even real - fwd( dst, reinterpret_cast<const Complex*> (src), ncfft); - Complex dc = dst[0].real() + dst[0].imag(); - Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft2 ; ++k ) { - Complex fpk = dst[k]; - Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpk - fpnk; - Complex tw= f2k * rtw[k-1]; - dst[k] = (f1k + tw) * Scalar(.5); - dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); - } - dst[0] = dc; - dst[ncfft] = nyquist; - } - } - - // inverse complex-to-complex - inline - void inv(Complex * dst,const Complex *src,int nfft) - { - get_plan(nfft,true).work(0, dst, src, 1,1); - } - - // half-complex to scalar - inline - void inv( Scalar * dst,const Complex * src,int nfft) - { - if (nfft&3) { - m_tmpBuf1.resize(nfft); - m_tmpBuf2.resize(nfft); - std::copy(src,src+(nfft>>1)+1,m_tmpBuf1.begin() ); - for (int k=1;k<(nfft>>1)+1;++k) - m_tmpBuf1[nfft-k] = conj(m_tmpBuf1[k]); - inv(&m_tmpBuf2[0],&m_tmpBuf1[0],nfft); - for (int k=0;k<nfft;++k) - dst[k] = m_tmpBuf2[k].real(); - }else{ - // optimized version for multiple of 4 - int ncfft = nfft>>1; - int ncfft2 = nfft>>2; - Complex * rtw = real_twiddles(ncfft2); - m_tmpBuf1.resize(ncfft); - m_tmpBuf1[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); - for (int k = 1; k <= ncfft / 2; ++k) { - Complex fk = src[k]; - Complex fnkc = conj(src[ncfft-k]); - Complex fek = fk + fnkc; - Complex tmp = fk - fnkc; - Complex fok = tmp * conj(rtw[k-1]); - m_tmpBuf1[k] = fek + fok; - m_tmpBuf1[ncfft-k] = conj(fek - fok); - } - get_plan(ncfft,true).work(0, reinterpret_cast<Complex*>(dst), &m_tmpBuf1[0], 1,1); - } - } - - protected: - typedef kiss_cpx_fft<Scalar> PlanData; - typedef std::map<int,PlanData> PlanMap; - - PlanMap m_plans; - std::map<int, std::vector<Complex> > m_realTwiddles; - std::vector<Complex> m_tmpBuf1; - std::vector<Complex> m_tmpBuf2; - - inline - int PlanKey(int nfft, bool isinverse) const { return (nfft<<1) | int(isinverse); } - - inline - PlanData & get_plan(int nfft, bool inverse) - { - // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles - PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; - if ( pd.m_twiddles.size() == 0 ) { - pd.make_twiddles(nfft,inverse); - pd.factorize(nfft); - } - return pd; - } - - inline - Complex * real_twiddles(int ncfft2) - { - using std::acos; - std::vector<Complex> & twidref = m_realTwiddles[ncfft2];// creates new if not there - if ( (int)twidref.size() != ncfft2 ) { - twidref.resize(ncfft2); - int ncfft= ncfft2<<1; - Scalar pi = acos( Scalar(-1) ); - for (int k=1;k<=ncfft2;++k) - twidref[k-1] = exp( Complex(0,-pi * (Scalar(k) / ncfft + Scalar(.5)) ) ); - } - return &twidref[0]; - } -}; - -} // end namespace internal - -} // end namespace Eigen - -/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h b/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h deleted file mode 100644 index dc0093e..0000000 --- a/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h +++ /dev/null @@ -1,189 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> - -/* NOTE The functions of this file have been adapted from the GMM++ library */ - -//======================================================================== -// -// Copyright (C) 2002-2007 Yves Renard -// -// This file is a part of GETFEM++ -// -// Getfem++ is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; version 2.1 of the License. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// You should have received a copy of the GNU Lesser General Public -// License along with this program; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, -// USA. -// -//======================================================================== - -#include "../../../../Eigen/src/Core/util/NonMPL2.h" - -#ifndef EIGEN_CONSTRAINEDCG_H -#define EIGEN_CONSTRAINEDCG_H - -#include <Eigen/Core> - -namespace Eigen { - -namespace internal { - -/** \ingroup IterativeSolvers_Module - * Compute the pseudo inverse of the non-square matrix C such that - * \f$ CINV = (C * C^T)^{-1} * C \f$ based on a conjugate gradient method. - * - * This function is internally used by constrained_cg. - */ -template <typename CMatrix, typename CINVMatrix> -void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV) -{ - // optimisable : copie de la ligne, precalcul de C * trans(C). - typedef typename CMatrix::Scalar Scalar; - typedef typename CMatrix::Index Index; - // FIXME use sparse vectors ? - typedef Matrix<Scalar,Dynamic,1> TmpVec; - - Index rows = C.rows(), cols = C.cols(); - - TmpVec d(rows), e(rows), l(cols), p(rows), q(rows), r(rows); - Scalar rho, rho_1, alpha; - d.setZero(); - - typedef Triplet<double> T; - std::vector<T> tripletList; - - for (Index i = 0; i < rows; ++i) - { - d[i] = 1.0; - rho = 1.0; - e.setZero(); - r = d; - p = d; - - while (rho >= 1e-38) - { /* conjugate gradient to compute e */ - /* which is the i-th row of inv(C * trans(C)) */ - l = C.transpose() * p; - q = C * l; - alpha = rho / p.dot(q); - e += alpha * p; - r += -alpha * q; - rho_1 = rho; - rho = r.dot(r); - p = (rho/rho_1) * p + r; - } - - l = C.transpose() * e; // l is the i-th row of CINV - // FIXME add a generic "prune/filter" expression for both dense and sparse object to sparse - for (Index j=0; j<l.size(); ++j) - if (l[j]<1e-15) - tripletList.push_back(T(i,j,l(j))); - - - d[i] = 0.0; - } - CINV.setFromTriplets(tripletList.begin(), tripletList.end()); -} - - - -/** \ingroup IterativeSolvers_Module - * Constrained conjugate gradient - * - * Computes the minimum of \f$ 1/2((Ax).x) - bx \f$ under the contraint \f$ Cx \le f \f$ - */ -template<typename TMatrix, typename CMatrix, - typename VectorX, typename VectorB, typename VectorF> -void constrained_cg(const TMatrix& A, const CMatrix& C, VectorX& x, - const VectorB& b, const VectorF& f, IterationController &iter) -{ - using std::sqrt; - typedef typename TMatrix::Scalar Scalar; - typedef typename TMatrix::Index Index; - typedef Matrix<Scalar,Dynamic,1> TmpVec; - - Scalar rho = 1.0, rho_1, lambda, gamma; - Index xSize = x.size(); - TmpVec p(xSize), q(xSize), q2(xSize), - r(xSize), old_z(xSize), z(xSize), - memox(xSize); - std::vector<bool> satured(C.rows()); - p.setZero(); - iter.setRhsNorm(sqrt(b.dot(b))); // gael vect_sp(PS, b, b) - if (iter.rhsNorm() == 0.0) iter.setRhsNorm(1.0); - - SparseMatrix<Scalar,RowMajor> CINV(C.rows(), C.cols()); - pseudo_inverse(C, CINV); - - while(true) - { - // computation of residual - old_z = z; - memox = x; - r = b; - r += A * -x; - z = r; - bool transition = false; - for (Index i = 0; i < C.rows(); ++i) - { - Scalar al = C.row(i).dot(x) - f.coeff(i); - if (al >= -1.0E-15) - { - if (!satured[i]) - { - satured[i] = true; - transition = true; - } - Scalar bb = CINV.row(i).dot(z); - if (bb > 0.0) - // FIXME: we should allow that: z += -bb * C.row(i); - for (typename CMatrix::InnerIterator it(C,i); it; ++it) - z.coeffRef(it.index()) -= bb*it.value(); - } - else - satured[i] = false; - } - - // descent direction - rho_1 = rho; - rho = r.dot(z); - - if (iter.finished(rho)) break; - - if (iter.noiseLevel() > 0 && transition) std::cerr << "CCG: transition\n"; - if (transition || iter.first()) gamma = 0.0; - else gamma = (std::max)(0.0, (rho - old_z.dot(z)) / rho_1); - p = z + gamma*p; - - ++iter; - // one dimensionnal optimization - q = A * p; - lambda = rho / q.dot(p); - for (Index i = 0; i < C.rows(); ++i) - { - if (!satured[i]) - { - Scalar bb = C.row(i).dot(p) - f[i]; - if (bb > 0.0) - lambda = (std::min)(lambda, (f.coeff(i)-C.row(i).dot(x)) / bb); - } - } - x += lambda * p; - memox -= x; - } -} - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CONSTRAINEDCG_H diff --git a/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h deleted file mode 100644 index 4079e23..0000000 --- a/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +++ /dev/null @@ -1,510 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_DGMRES_H -#define EIGEN_DGMRES_H - -#include <Eigen/Eigenvalues> - -namespace Eigen { - -template< typename _MatrixType, - typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> > -class DGMRES; - -namespace internal { - -template< typename _MatrixType, typename _Preconditioner> -struct traits<DGMRES<_MatrixType,_Preconditioner> > -{ - typedef _MatrixType MatrixType; - typedef _Preconditioner Preconditioner; -}; - -/** \brief Computes a permutation vector to have a sorted sequence - * \param vec The vector to reorder. - * \param perm gives the sorted sequence on output. Must be initialized with 0..n-1 - * \param ncut Put the ncut smallest elements at the end of the vector - * WARNING This is an expensive sort, so should be used only - * for small size vectors - * TODO Use modified QuickSplit or std::nth_element to get the smallest values - */ -template <typename VectorType, typename IndexType> -void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::Scalar& ncut) -{ - eigen_assert(vec.size() == perm.size()); - bool flag; - for (Index k = 0; k < ncut; k++) - { - flag = false; - for (Index j = 0; j < vec.size()-1; j++) - { - if ( vec(perm(j)) < vec(perm(j+1)) ) - { - std::swap(perm(j),perm(j+1)); - flag = true; - } - if (!flag) break; // The vector is in sorted order - } - } -} - -} -/** - * \ingroup IterativeLInearSolvers_Module - * \brief A Restarted GMRES with deflation. - * This class implements a modification of the GMRES solver for - * sparse linear systems. The basis is built with modified - * Gram-Schmidt. At each restart, a few approximated eigenvectors - * corresponding to the smallest eigenvalues are used to build a - * preconditioner for the next cycle. This preconditioner - * for deflation can be combined with any other preconditioner, - * the IncompleteLUT for instance. The preconditioner is applied - * at right of the matrix and the combination is multiplicative. - * - * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix. - * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner - * Typical usage : - * \code - * SparseMatrix<double> A; - * VectorXd x, b; - * //Fill A and b ... - * DGMRES<SparseMatrix<double> > solver; - * solver.set_restart(30); // Set restarting value - * solver.setEigenv(1); // Set the number of eigenvalues to deflate - * solver.compute(A); - * x = solver.solve(b); - * \endcode - * - * DGMRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. - * - * References : - * [1] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid - * Algebraic Solvers for Linear Systems Arising from Compressible - * Flows, Computers and Fluids, In Press, - * http://dx.doi.org/10.1016/j.compfluid.2012.03.023 - * [2] K. Burrage and J. Erhel, On the performance of various - * adaptive preconditioned GMRES strategies, 5(1998), 101-121. - * [3] J. Erhel, K. Burrage and B. Pohl, Restarted GMRES - * preconditioned by deflation,J. Computational and Applied - * Mathematics, 69(1996), 303-318. - - * - */ -template< typename _MatrixType, typename _Preconditioner> -class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> > -{ - typedef IterativeSolverBase<DGMRES> Base; - using Base::matrix; - using Base::m_error; - using Base::m_iterations; - using Base::m_info; - using Base::m_isInitialized; - using Base::m_tolerance; - public: - using Base::_solve_impl; - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::StorageIndex StorageIndex; - typedef typename MatrixType::RealScalar RealScalar; - typedef _Preconditioner Preconditioner; - typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix; - typedef Matrix<RealScalar,Dynamic,Dynamic> DenseRealMatrix; - typedef Matrix<Scalar,Dynamic,1> DenseVector; - typedef Matrix<RealScalar,Dynamic,1> DenseRealVector; - typedef Matrix<std::complex<RealScalar>, Dynamic, 1> ComplexVector; - - - /** Default constructor. */ - DGMRES() : Base(),m_restart(30),m_neig(0),m_r(0),m_maxNeig(5),m_isDeflAllocated(false),m_isDeflInitialized(false) {} - - /** Initialize the solver with matrix \a A for further \c Ax=b solving. - * - * This constructor is a shortcut for the default constructor followed - * by a call to compute(). - * - * \warning this class stores a reference to the matrix A as well as some - * precomputed values that depend on it. Therefore, if \a A is changed - * this class becomes invalid. Call compute() to update it with the new - * matrix A, or modify a copy of A. - */ - template<typename MatrixDerived> - explicit DGMRES(const EigenBase<MatrixDerived>& A) : Base(A.derived()), m_restart(30),m_neig(0),m_r(0),m_maxNeig(5),m_isDeflAllocated(false),m_isDeflInitialized(false) {} - - ~DGMRES() {} - - /** \internal */ - template<typename Rhs,typename Dest> - void _solve_with_guess_impl(const Rhs& b, Dest& x) const - { - bool failed = false; - for(Index j=0; j<b.cols(); ++j) - { - m_iterations = Base::maxIterations(); - m_error = Base::m_tolerance; - - typename Dest::ColXpr xj(x,j); - dgmres(matrix(), b.col(j), xj, Base::m_preconditioner); - } - m_info = failed ? NumericalIssue - : m_error <= Base::m_tolerance ? Success - : NoConvergence; - m_isInitialized = true; - } - - /** \internal */ - template<typename Rhs,typename Dest> - void _solve_impl(const Rhs& b, MatrixBase<Dest>& x) const - { - x = b; - _solve_with_guess_impl(b,x.derived()); - } - /** - * Get the restart value - */ - Index restart() { return m_restart; } - - /** - * Set the restart value (default is 30) - */ - void set_restart(const Index restart) { m_restart=restart; } - - /** - * Set the number of eigenvalues to deflate at each restart - */ - void setEigenv(const Index neig) - { - m_neig = neig; - if (neig+1 > m_maxNeig) m_maxNeig = neig+1; // To allow for complex conjugates - } - - /** - * Get the size of the deflation subspace size - */ - Index deflSize() {return m_r; } - - /** - * Set the maximum size of the deflation subspace - */ - void setMaxEigenv(const Index maxNeig) { m_maxNeig = maxNeig; } - - protected: - // DGMRES algorithm - template<typename Rhs, typename Dest> - void dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x, const Preconditioner& precond) const; - // Perform one cycle of GMRES - template<typename Dest> - Index dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const; - // Compute data to use for deflation - Index dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const; - // Apply deflation to a vector - template<typename RhsType, typename DestType> - Index dgmresApplyDeflation(const RhsType& In, DestType& Out) const; - ComplexVector schurValues(const ComplexSchur<DenseMatrix>& schurofH) const; - ComplexVector schurValues(const RealSchur<DenseMatrix>& schurofH) const; - // Init data for deflation - void dgmresInitDeflation(Index& rows) const; - mutable DenseMatrix m_V; // Krylov basis vectors - mutable DenseMatrix m_H; // Hessenberg matrix - mutable DenseMatrix m_Hes; // Initial hessenberg matrix wihout Givens rotations applied - mutable Index m_restart; // Maximum size of the Krylov subspace - mutable DenseMatrix m_U; // Vectors that form the basis of the invariant subspace - mutable DenseMatrix m_MU; // matrix operator applied to m_U (for next cycles) - mutable DenseMatrix m_T; /* T=U^T*M^{-1}*A*U */ - mutable PartialPivLU<DenseMatrix> m_luT; // LU factorization of m_T - mutable StorageIndex m_neig; //Number of eigenvalues to extract at each restart - mutable Index m_r; // Current number of deflated eigenvalues, size of m_U - mutable Index m_maxNeig; // Maximum number of eigenvalues to deflate - mutable RealScalar m_lambdaN; //Modulus of the largest eigenvalue of A - mutable bool m_isDeflAllocated; - mutable bool m_isDeflInitialized; - - //Adaptive strategy - mutable RealScalar m_smv; // Smaller multiple of the remaining number of steps allowed - mutable bool m_force; // Force the use of deflation at each restart - -}; -/** - * \brief Perform several cycles of restarted GMRES with modified Gram Schmidt, - * - * A right preconditioner is used combined with deflation. - * - */ -template< typename _MatrixType, typename _Preconditioner> -template<typename Rhs, typename Dest> -void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x, - const Preconditioner& precond) const -{ - //Initialization - Index n = mat.rows(); - DenseVector r0(n); - Index nbIts = 0; - m_H.resize(m_restart+1, m_restart); - m_Hes.resize(m_restart, m_restart); - m_V.resize(n,m_restart+1); - //Initial residual vector and intial norm - x = precond.solve(x); - r0 = rhs - mat * x; - RealScalar beta = r0.norm(); - RealScalar normRhs = rhs.norm(); - m_error = beta/normRhs; - if(m_error < m_tolerance) - m_info = Success; - else - m_info = NoConvergence; - - // Iterative process - while (nbIts < m_iterations && m_info == NoConvergence) - { - dgmresCycle(mat, precond, x, r0, beta, normRhs, nbIts); - - // Compute the new residual vector for the restart - if (nbIts < m_iterations && m_info == NoConvergence) - r0 = rhs - mat * x; - } -} - -/** - * \brief Perform one restart cycle of DGMRES - * \param mat The coefficient matrix - * \param precond The preconditioner - * \param x the new approximated solution - * \param r0 The initial residual vector - * \param beta The norm of the residual computed so far - * \param normRhs The norm of the right hand side vector - * \param nbIts The number of iterations - */ -template< typename _MatrixType, typename _Preconditioner> -template<typename Dest> -Index DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const -{ - //Initialization - DenseVector g(m_restart+1); // Right hand side of the least square problem - g.setZero(); - g(0) = Scalar(beta); - m_V.col(0) = r0/beta; - m_info = NoConvergence; - std::vector<JacobiRotation<Scalar> >gr(m_restart); // Givens rotations - Index it = 0; // Number of inner iterations - Index n = mat.rows(); - DenseVector tv1(n), tv2(n); //Temporary vectors - while (m_info == NoConvergence && it < m_restart && nbIts < m_iterations) - { - // Apply preconditioner(s) at right - if (m_isDeflInitialized ) - { - dgmresApplyDeflation(m_V.col(it), tv1); // Deflation - tv2 = precond.solve(tv1); - } - else - { - tv2 = precond.solve(m_V.col(it)); // User's selected preconditioner - } - tv1 = mat * tv2; - - // Orthogonalize it with the previous basis in the basis using modified Gram-Schmidt - Scalar coef; - for (Index i = 0; i <= it; ++i) - { - coef = tv1.dot(m_V.col(i)); - tv1 = tv1 - coef * m_V.col(i); - m_H(i,it) = coef; - m_Hes(i,it) = coef; - } - // Normalize the vector - coef = tv1.norm(); - m_V.col(it+1) = tv1/coef; - m_H(it+1, it) = coef; -// m_Hes(it+1,it) = coef; - - // FIXME Check for happy breakdown - - // Update Hessenberg matrix with Givens rotations - for (Index i = 1; i <= it; ++i) - { - m_H.col(it).applyOnTheLeft(i-1,i,gr[i-1].adjoint()); - } - // Compute the new plane rotation - gr[it].makeGivens(m_H(it, it), m_H(it+1,it)); - // Apply the new rotation - m_H.col(it).applyOnTheLeft(it,it+1,gr[it].adjoint()); - g.applyOnTheLeft(it,it+1, gr[it].adjoint()); - - beta = std::abs(g(it+1)); - m_error = beta/normRhs; - // std::cerr << nbIts << " Relative Residual Norm " << m_error << std::endl; - it++; nbIts++; - - if (m_error < m_tolerance) - { - // The method has converged - m_info = Success; - break; - } - } - - // Compute the new coefficients by solving the least square problem -// it++; - //FIXME Check first if the matrix is singular ... zero diagonal - DenseVector nrs(m_restart); - nrs = m_H.topLeftCorner(it,it).template triangularView<Upper>().solve(g.head(it)); - - // Form the new solution - if (m_isDeflInitialized) - { - tv1 = m_V.leftCols(it) * nrs; - dgmresApplyDeflation(tv1, tv2); - x = x + precond.solve(tv2); - } - else - x = x + precond.solve(m_V.leftCols(it) * nrs); - - // Go for a new cycle and compute data for deflation - if(nbIts < m_iterations && m_info == NoConvergence && m_neig > 0 && (m_r+m_neig) < m_maxNeig) - dgmresComputeDeflationData(mat, precond, it, m_neig); - return 0; - -} - - -template< typename _MatrixType, typename _Preconditioner> -void DGMRES<_MatrixType, _Preconditioner>::dgmresInitDeflation(Index& rows) const -{ - m_U.resize(rows, m_maxNeig); - m_MU.resize(rows, m_maxNeig); - m_T.resize(m_maxNeig, m_maxNeig); - m_lambdaN = 0.0; - m_isDeflAllocated = true; -} - -template< typename _MatrixType, typename _Preconditioner> -inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_MatrixType, _Preconditioner>::schurValues(const ComplexSchur<DenseMatrix>& schurofH) const -{ - return schurofH.matrixT().diagonal(); -} - -template< typename _MatrixType, typename _Preconditioner> -inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_MatrixType, _Preconditioner>::schurValues(const RealSchur<DenseMatrix>& schurofH) const -{ - const DenseMatrix& T = schurofH.matrixT(); - Index it = T.rows(); - ComplexVector eig(it); - Index j = 0; - while (j < it-1) - { - if (T(j+1,j) ==Scalar(0)) - { - eig(j) = std::complex<RealScalar>(T(j,j),RealScalar(0)); - j++; - } - else - { - eig(j) = std::complex<RealScalar>(T(j,j),T(j+1,j)); - eig(j+1) = std::complex<RealScalar>(T(j,j+1),T(j+1,j+1)); - j++; - } - } - if (j < it-1) eig(j) = std::complex<RealScalar>(T(j,j),RealScalar(0)); - return eig; -} - -template< typename _MatrixType, typename _Preconditioner> -Index DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const -{ - // First, find the Schur form of the Hessenberg matrix H - typename internal::conditional<NumTraits<Scalar>::IsComplex, ComplexSchur<DenseMatrix>, RealSchur<DenseMatrix> >::type schurofH; - bool computeU = true; - DenseMatrix matrixQ(it,it); - matrixQ.setIdentity(); - schurofH.computeFromHessenberg(m_Hes.topLeftCorner(it,it), matrixQ, computeU); - - ComplexVector eig(it); - Matrix<StorageIndex,Dynamic,1>perm(it); - eig = this->schurValues(schurofH); - - // Reorder the absolute values of Schur values - DenseRealVector modulEig(it); - for (Index j=0; j<it; ++j) modulEig(j) = std::abs(eig(j)); - perm.setLinSpaced(it,0,internal::convert_index<StorageIndex>(it-1)); - internal::sortWithPermutation(modulEig, perm, neig); - - if (!m_lambdaN) - { - m_lambdaN = (std::max)(modulEig.maxCoeff(), m_lambdaN); - } - //Count the real number of extracted eigenvalues (with complex conjugates) - Index nbrEig = 0; - while (nbrEig < neig) - { - if(eig(perm(it-nbrEig-1)).imag() == RealScalar(0)) nbrEig++; - else nbrEig += 2; - } - // Extract the Schur vectors corresponding to the smallest Ritz values - DenseMatrix Sr(it, nbrEig); - Sr.setZero(); - for (Index j = 0; j < nbrEig; j++) - { - Sr.col(j) = schurofH.matrixU().col(perm(it-j-1)); - } - - // Form the Schur vectors of the initial matrix using the Krylov basis - DenseMatrix X; - X = m_V.leftCols(it) * Sr; - if (m_r) - { - // Orthogonalize X against m_U using modified Gram-Schmidt - for (Index j = 0; j < nbrEig; j++) - for (Index k =0; k < m_r; k++) - X.col(j) = X.col(j) - (m_U.col(k).dot(X.col(j)))*m_U.col(k); - } - - // Compute m_MX = A * M^-1 * X - Index m = m_V.rows(); - if (!m_isDeflAllocated) - dgmresInitDeflation(m); - DenseMatrix MX(m, nbrEig); - DenseVector tv1(m); - for (Index j = 0; j < nbrEig; j++) - { - tv1 = mat * X.col(j); - MX.col(j) = precond.solve(tv1); - } - - //Update m_T = [U'MU U'MX; X'MU X'MX] - m_T.block(m_r, m_r, nbrEig, nbrEig) = X.transpose() * MX; - if(m_r) - { - m_T.block(0, m_r, m_r, nbrEig) = m_U.leftCols(m_r).transpose() * MX; - m_T.block(m_r, 0, nbrEig, m_r) = X.transpose() * m_MU.leftCols(m_r); - } - - // Save X into m_U and m_MX in m_MU - for (Index j = 0; j < nbrEig; j++) m_U.col(m_r+j) = X.col(j); - for (Index j = 0; j < nbrEig; j++) m_MU.col(m_r+j) = MX.col(j); - // Increase the size of the invariant subspace - m_r += nbrEig; - - // Factorize m_T into m_luT - m_luT.compute(m_T.topLeftCorner(m_r, m_r)); - - //FIXME CHeck if the factorization was correctly done (nonsingular matrix) - m_isDeflInitialized = true; - return 0; -} -template<typename _MatrixType, typename _Preconditioner> -template<typename RhsType, typename DestType> -Index DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const -{ - DenseVector x1 = m_U.leftCols(m_r).transpose() * x; - y = x + m_U.leftCols(m_r) * ( m_lambdaN * m_luT.solve(x1) - x1); - return 0; -} - -} // end namespace Eigen -#endif diff --git a/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h deleted file mode 100644 index 92618b1..0000000 --- a/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h +++ /dev/null @@ -1,343 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> -// Copyright (C) 2012, 2014 Kolja Brix <brix@igpm.rwth-aaachen.de> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_GMRES_H -#define EIGEN_GMRES_H - -namespace Eigen { - -namespace internal { - -/** -* Generalized Minimal Residual Algorithm based on the -* Arnoldi algorithm implemented with Householder reflections. -* -* Parameters: -* \param mat matrix of linear system of equations -* \param rhs right hand side vector of linear system of equations -* \param x on input: initial guess, on output: solution -* \param precond preconditioner used -* \param iters on input: maximum number of iterations to perform -* on output: number of iterations performed -* \param restart number of iterations for a restart -* \param tol_error on input: relative residual tolerance -* on output: residuum achieved -* -* \sa IterativeMethods::bicgstab() -* -* -* For references, please see: -* -* Saad, Y. and Schultz, M. H. -* GMRES: A Generalized Minimal Residual Algorithm for Solving Nonsymmetric Linear Systems. -* SIAM J.Sci.Stat.Comp. 7, 1986, pp. 856 - 869. -* -* Saad, Y. -* Iterative Methods for Sparse Linear Systems. -* Society for Industrial and Applied Mathematics, Philadelphia, 2003. -* -* Walker, H. F. -* Implementations of the GMRES method. -* Comput.Phys.Comm. 53, 1989, pp. 311 - 320. -* -* Walker, H. F. -* Implementation of the GMRES Method using Householder Transformations. -* SIAM J.Sci.Stat.Comp. 9, 1988, pp. 152 - 163. -* -*/ -template<typename MatrixType, typename Rhs, typename Dest, typename Preconditioner> -bool gmres(const MatrixType & mat, const Rhs & rhs, Dest & x, const Preconditioner & precond, - Index &iters, const Index &restart, typename Dest::RealScalar & tol_error) { - - using std::sqrt; - using std::abs; - - typedef typename Dest::RealScalar RealScalar; - typedef typename Dest::Scalar Scalar; - typedef Matrix < Scalar, Dynamic, 1 > VectorType; - typedef Matrix < Scalar, Dynamic, Dynamic, ColMajor> FMatrixType; - - RealScalar tol = tol_error; - const Index maxIters = iters; - iters = 0; - - const Index m = mat.rows(); - - // residual and preconditioned residual - VectorType p0 = rhs - mat*x; - VectorType r0 = precond.solve(p0); - - const RealScalar r0Norm = r0.norm(); - - // is initial guess already good enough? - if(r0Norm == 0) - { - tol_error = 0; - return true; - } - - // storage for Hessenberg matrix and Householder data - FMatrixType H = FMatrixType::Zero(m, restart + 1); - VectorType w = VectorType::Zero(restart + 1); - VectorType tau = VectorType::Zero(restart + 1); - - // storage for Jacobi rotations - std::vector < JacobiRotation < Scalar > > G(restart); - - // storage for temporaries - VectorType t(m), v(m), workspace(m), x_new(m); - - // generate first Householder vector - Ref<VectorType> H0_tail = H.col(0).tail(m - 1); - RealScalar beta; - r0.makeHouseholder(H0_tail, tau.coeffRef(0), beta); - w(0) = Scalar(beta); - - for (Index k = 1; k <= restart; ++k) - { - ++iters; - - v = VectorType::Unit(m, k - 1); - - // apply Householder reflections H_{1} ... H_{k-1} to v - // TODO: use a HouseholderSequence - for (Index i = k - 1; i >= 0; --i) { - v.tail(m - i).applyHouseholderOnTheLeft(H.col(i).tail(m - i - 1), tau.coeffRef(i), workspace.data()); - } - - // apply matrix M to v: v = mat * v; - t.noalias() = mat * v; - v = precond.solve(t); - - // apply Householder reflections H_{k-1} ... H_{1} to v - // TODO: use a HouseholderSequence - for (Index i = 0; i < k; ++i) { - v.tail(m - i).applyHouseholderOnTheLeft(H.col(i).tail(m - i - 1), tau.coeffRef(i), workspace.data()); - } - - if (v.tail(m - k).norm() != 0.0) - { - if (k <= restart) - { - // generate new Householder vector - Ref<VectorType> Hk_tail = H.col(k).tail(m - k - 1); - v.tail(m - k).makeHouseholder(Hk_tail, tau.coeffRef(k), beta); - - // apply Householder reflection H_{k} to v - v.tail(m - k).applyHouseholderOnTheLeft(Hk_tail, tau.coeffRef(k), workspace.data()); - } - } - - if (k > 1) - { - for (Index i = 0; i < k - 1; ++i) - { - // apply old Givens rotations to v - v.applyOnTheLeft(i, i + 1, G[i].adjoint()); - } - } - - if (k<m && v(k) != (Scalar) 0) - { - // determine next Givens rotation - G[k - 1].makeGivens(v(k - 1), v(k)); - - // apply Givens rotation to v and w - v.applyOnTheLeft(k - 1, k, G[k - 1].adjoint()); - w.applyOnTheLeft(k - 1, k, G[k - 1].adjoint()); - } - - // insert coefficients into upper matrix triangle - H.col(k-1).head(k) = v.head(k); - - tol_error = abs(w(k)) / r0Norm; - bool stop = (k==m || tol_error < tol || iters == maxIters); - - if (stop || k == restart) - { - // solve upper triangular system - Ref<VectorType> y = w.head(k); - H.topLeftCorner(k, k).template triangularView <Upper>().solveInPlace(y); - - // use Horner-like scheme to calculate solution vector - x_new.setZero(); - for (Index i = k - 1; i >= 0; --i) - { - x_new(i) += y(i); - // apply Householder reflection H_{i} to x_new - x_new.tail(m - i).applyHouseholderOnTheLeft(H.col(i).tail(m - i - 1), tau.coeffRef(i), workspace.data()); - } - - x += x_new; - - if(stop) - { - return true; - } - else - { - k=0; - - // reset data for restart - p0.noalias() = rhs - mat*x; - r0 = precond.solve(p0); - - // clear Hessenberg matrix and Householder data - H.setZero(); - w.setZero(); - tau.setZero(); - - // generate first Householder vector - r0.makeHouseholder(H0_tail, tau.coeffRef(0), beta); - w(0) = Scalar(beta); - } - } - } - - return false; - -} - -} - -template< typename _MatrixType, - typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> > -class GMRES; - -namespace internal { - -template< typename _MatrixType, typename _Preconditioner> -struct traits<GMRES<_MatrixType,_Preconditioner> > -{ - typedef _MatrixType MatrixType; - typedef _Preconditioner Preconditioner; -}; - -} - -/** \ingroup IterativeLinearSolvers_Module - * \brief A GMRES solver for sparse square problems - * - * This class allows to solve for A.x = b sparse linear problems using a generalized minimal - * residual method. The vectors x and b can be either dense or sparse. - * - * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix. - * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner - * - * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() - * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations - * and NumTraits<Scalar>::epsilon() for the tolerance. - * - * This class can be used as the direct solver classes. Here is a typical usage example: - * \code - * int n = 10000; - * VectorXd x(n), b(n); - * SparseMatrix<double> A(n,n); - * // fill A and b - * GMRES<SparseMatrix<double> > solver(A); - * x = solver.solve(b); - * std::cout << "#iterations: " << solver.iterations() << std::endl; - * std::cout << "estimated error: " << solver.error() << std::endl; - * // update b, and solve again - * x = solver.solve(b); - * \endcode - * - * By default the iterations start with x=0 as an initial guess of the solution. - * One can control the start using the solveWithGuess() method. - * - * GMRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. - * - * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner - */ -template< typename _MatrixType, typename _Preconditioner> -class GMRES : public IterativeSolverBase<GMRES<_MatrixType,_Preconditioner> > -{ - typedef IterativeSolverBase<GMRES> Base; - using Base::matrix; - using Base::m_error; - using Base::m_iterations; - using Base::m_info; - using Base::m_isInitialized; - -private: - Index m_restart; - -public: - using Base::_solve_impl; - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef _Preconditioner Preconditioner; - -public: - - /** Default constructor. */ - GMRES() : Base(), m_restart(30) {} - - /** Initialize the solver with matrix \a A for further \c Ax=b solving. - * - * This constructor is a shortcut for the default constructor followed - * by a call to compute(). - * - * \warning this class stores a reference to the matrix A as well as some - * precomputed values that depend on it. Therefore, if \a A is changed - * this class becomes invalid. Call compute() to update it with the new - * matrix A, or modify a copy of A. - */ - template<typename MatrixDerived> - explicit GMRES(const EigenBase<MatrixDerived>& A) : Base(A.derived()), m_restart(30) {} - - ~GMRES() {} - - /** Get the number of iterations after that a restart is performed. - */ - Index get_restart() { return m_restart; } - - /** Set the number of iterations after that a restart is performed. - * \param restart number of iterations for a restarti, default is 30. - */ - void set_restart(const Index restart) { m_restart=restart; } - - /** \internal */ - template<typename Rhs,typename Dest> - void _solve_with_guess_impl(const Rhs& b, Dest& x) const - { - bool failed = false; - for(Index j=0; j<b.cols(); ++j) - { - m_iterations = Base::maxIterations(); - m_error = Base::m_tolerance; - - typename Dest::ColXpr xj(x,j); - if(!internal::gmres(matrix(), b.col(j), xj, Base::m_preconditioner, m_iterations, m_restart, m_error)) - failed = true; - } - m_info = failed ? NumericalIssue - : m_error <= Base::m_tolerance ? Success - : NoConvergence; - m_isInitialized = true; - } - - /** \internal */ - template<typename Rhs,typename Dest> - void _solve_impl(const Rhs& b, MatrixBase<Dest> &x) const - { - x = b; - if(x.squaredNorm() == 0) return; // Check Zero right hand side - _solve_with_guess_impl(b,x.derived()); - } - -protected: - -}; - -} // end namespace Eigen - -#endif // EIGEN_GMRES_H diff --git a/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h b/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h deleted file mode 100644 index 7d08c35..0000000 --- a/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h +++ /dev/null @@ -1,90 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_INCOMPLETE_LU_H -#define EIGEN_INCOMPLETE_LU_H - -namespace Eigen { - -template <typename _Scalar> -class IncompleteLU : public SparseSolverBase<IncompleteLU<_Scalar> > -{ - protected: - typedef SparseSolverBase<IncompleteLU<_Scalar> > Base; - using Base::m_isInitialized; - - typedef _Scalar Scalar; - typedef Matrix<Scalar,Dynamic,1> Vector; - typedef typename Vector::Index Index; - typedef SparseMatrix<Scalar,RowMajor> FactorType; - - public: - typedef Matrix<Scalar,Dynamic,Dynamic> MatrixType; - - IncompleteLU() {} - - template<typename MatrixType> - IncompleteLU(const MatrixType& mat) - { - compute(mat); - } - - Index rows() const { return m_lu.rows(); } - Index cols() const { return m_lu.cols(); } - - template<typename MatrixType> - IncompleteLU& compute(const MatrixType& mat) - { - m_lu = mat; - int size = mat.cols(); - Vector diag(size); - for(int i=0; i<size; ++i) - { - typename FactorType::InnerIterator k_it(m_lu,i); - for(; k_it && k_it.index()<i; ++k_it) - { - int k = k_it.index(); - k_it.valueRef() /= diag(k); - - typename FactorType::InnerIterator j_it(k_it); - typename FactorType::InnerIterator kj_it(m_lu, k); - while(kj_it && kj_it.index()<=k) ++kj_it; - for(++j_it; j_it; ) - { - if(kj_it.index()==j_it.index()) - { - j_it.valueRef() -= k_it.value() * kj_it.value(); - ++j_it; - ++kj_it; - } - else if(kj_it.index()<j_it.index()) ++kj_it; - else ++j_it; - } - } - if(k_it && k_it.index()==i) diag(i) = k_it.value(); - else diag(i) = 1; - } - m_isInitialized = true; - return *this; - } - - template<typename Rhs, typename Dest> - void _solve_impl(const Rhs& b, Dest& x) const - { - x = m_lu.template triangularView<UnitLower>().solve(b); - x = m_lu.template triangularView<Upper>().solve(x); - } - - protected: - FactorType m_lu; -}; - -} // end namespace Eigen - -#endif // EIGEN_INCOMPLETE_LU_H diff --git a/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h b/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h deleted file mode 100644 index c9c1a4b..0000000 --- a/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h +++ /dev/null @@ -1,154 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> - -/* NOTE The class IterationController has been adapted from the iteration - * class of the GMM++ and ITL libraries. - */ - -//======================================================================= -// Copyright (C) 1997-2001 -// Authors: Andrew Lumsdaine <lums@osl.iu.edu> -// Lie-Quan Lee <llee@osl.iu.edu> -// -// This file is part of the Iterative Template Library -// -// You should have received a copy of the License Agreement for the -// Iterative Template Library along with the software; see the -// file LICENSE. -// -// Permission to modify the code and to distribute modified code is -// granted, provided the text of this NOTICE is retained, a notice that -// the code was modified is included with the above COPYRIGHT NOTICE and -// with the COPYRIGHT NOTICE in the LICENSE file, and that the LICENSE -// file is distributed with the modified code. -// -// LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. -// By way of example, but not limitation, Licensor MAKES NO -// REPRESENTATIONS OR WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY -// PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE COMPONENTS -// OR DOCUMENTATION WILL NOT INFRINGE ANY PATENTS, COPYRIGHTS, TRADEMARKS -// OR OTHER RIGHTS. -//======================================================================= - -//======================================================================== -// -// Copyright (C) 2002-2007 Yves Renard -// -// This file is a part of GETFEM++ -// -// Getfem++ is free software; you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as -// published by the Free Software Foundation; version 2.1 of the License. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// You should have received a copy of the GNU Lesser General Public -// License along with this program; if not, write to the Free Software -// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, -// USA. -// -//======================================================================== - -#include "../../../../Eigen/src/Core/util/NonMPL2.h" - -#ifndef EIGEN_ITERATION_CONTROLLER_H -#define EIGEN_ITERATION_CONTROLLER_H - -namespace Eigen { - -/** \ingroup IterativeSolvers_Module - * \class IterationController - * - * \brief Controls the iterations of the iterative solvers - * - * This class has been adapted from the iteration class of GMM++ and ITL libraries. - * - */ -class IterationController -{ - protected : - double m_rhsn; ///< Right hand side norm - size_t m_maxiter; ///< Max. number of iterations - int m_noise; ///< if noise > 0 iterations are printed - double m_resmax; ///< maximum residual - double m_resminreach, m_resadd; - size_t m_nit; ///< iteration number - double m_res; ///< last computed residual - bool m_written; - void (*m_callback)(const IterationController&); - public : - - void init() - { - m_nit = 0; m_res = 0.0; m_written = false; - m_resminreach = 1E50; m_resadd = 0.0; - m_callback = 0; - } - - IterationController(double r = 1.0E-8, int noi = 0, size_t mit = size_t(-1)) - : m_rhsn(1.0), m_maxiter(mit), m_noise(noi), m_resmax(r) { init(); } - - void operator ++(int) { m_nit++; m_written = false; m_resadd += m_res; } - void operator ++() { (*this)++; } - - bool first() { return m_nit == 0; } - - /* get/set the "noisyness" (verbosity) of the solvers */ - int noiseLevel() const { return m_noise; } - void setNoiseLevel(int n) { m_noise = n; } - void reduceNoiseLevel() { if (m_noise > 0) m_noise--; } - - double maxResidual() const { return m_resmax; } - void setMaxResidual(double r) { m_resmax = r; } - - double residual() const { return m_res; } - - /* change the user-definable callback, called after each iteration */ - void setCallback(void (*t)(const IterationController&)) - { - m_callback = t; - } - - size_t iteration() const { return m_nit; } - void setIteration(size_t i) { m_nit = i; } - - size_t maxIterarions() const { return m_maxiter; } - void setMaxIterations(size_t i) { m_maxiter = i; } - - double rhsNorm() const { return m_rhsn; } - void setRhsNorm(double r) { m_rhsn = r; } - - bool converged() const { return m_res <= m_rhsn * m_resmax; } - bool converged(double nr) - { - using std::abs; - m_res = abs(nr); - m_resminreach = (std::min)(m_resminreach, m_res); - return converged(); - } - template<typename VectorType> bool converged(const VectorType &v) - { return converged(v.squaredNorm()); } - - bool finished(double nr) - { - if (m_callback) m_callback(*this); - if (m_noise > 0 && !m_written) - { - converged(nr); - m_written = true; - } - return (m_nit >= m_maxiter || converged(nr)); - } - template <typename VectorType> - bool finished(const MatrixBase<VectorType> &v) - { return finished(double(v.squaredNorm())); } - -}; - -} // end namespace Eigen - -#endif // EIGEN_ITERATION_CONTROLLER_H diff --git a/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h deleted file mode 100644 index 256990c..0000000 --- a/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h +++ /dev/null @@ -1,289 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Giacomo Po <gpo@ucla.edu> -// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#ifndef EIGEN_MINRES_H_ -#define EIGEN_MINRES_H_ - - -namespace Eigen { - - namespace internal { - - /** \internal Low-level MINRES algorithm - * \param mat The matrix A - * \param rhs The right hand side vector b - * \param x On input and initial solution, on output the computed solution. - * \param precond A right preconditioner being able to efficiently solve for an - * approximation of Ax=b (regardless of b) - * \param iters On input the max number of iteration, on output the number of performed iterations. - * \param tol_error On input the tolerance error, on output an estimation of the relative error. - */ - template<typename MatrixType, typename Rhs, typename Dest, typename Preconditioner> - EIGEN_DONT_INLINE - void minres(const MatrixType& mat, const Rhs& rhs, Dest& x, - const Preconditioner& precond, Index& iters, - typename Dest::RealScalar& tol_error) - { - using std::sqrt; - typedef typename Dest::RealScalar RealScalar; - typedef typename Dest::Scalar Scalar; - typedef Matrix<Scalar,Dynamic,1> VectorType; - - // Check for zero rhs - const RealScalar rhsNorm2(rhs.squaredNorm()); - if(rhsNorm2 == 0) - { - x.setZero(); - iters = 0; - tol_error = 0; - return; - } - - // initialize - const Index maxIters(iters); // initialize maxIters to iters - const Index N(mat.cols()); // the size of the matrix - const RealScalar threshold2(tol_error*tol_error*rhsNorm2); // convergence threshold (compared to residualNorm2) - - // Initialize preconditioned Lanczos - VectorType v_old(N); // will be initialized inside loop - VectorType v( VectorType::Zero(N) ); //initialize v - VectorType v_new(rhs-mat*x); //initialize v_new - RealScalar residualNorm2(v_new.squaredNorm()); - VectorType w(N); // will be initialized inside loop - VectorType w_new(precond.solve(v_new)); // initialize w_new -// RealScalar beta; // will be initialized inside loop - RealScalar beta_new2(v_new.dot(w_new)); - eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE"); - RealScalar beta_new(sqrt(beta_new2)); - const RealScalar beta_one(beta_new); - v_new /= beta_new; - w_new /= beta_new; - // Initialize other variables - RealScalar c(1.0); // the cosine of the Givens rotation - RealScalar c_old(1.0); - RealScalar s(0.0); // the sine of the Givens rotation - RealScalar s_old(0.0); // the sine of the Givens rotation - VectorType p_oold(N); // will be initialized in loop - VectorType p_old(VectorType::Zero(N)); // initialize p_old=0 - VectorType p(p_old); // initialize p=0 - RealScalar eta(1.0); - - iters = 0; // reset iters - while ( iters < maxIters ) - { - // Preconditioned Lanczos - /* Note that there are 4 variants on the Lanczos algorithm. These are - * described in Paige, C. C. (1972). Computational variants of - * the Lanczos method for the eigenproblem. IMA Journal of Applied - * Mathematics, 10(3), 373–381. The current implementation corresponds - * to the case A(2,7) in the paper. It also corresponds to - * algorithm 6.14 in Y. Saad, Iterative Methods for Sparse Linear - * Systems, 2003 p.173. For the preconditioned version see - * A. Greenbaum, Iterative Methods for Solving Linear Systems, SIAM (1987). - */ - const RealScalar beta(beta_new); - v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter -// const VectorType v_old(v); // NOT SURE IF CREATING v_old EVERY ITERATION IS EFFICIENT - v = v_new; // update - w = w_new; // update -// const VectorType w(w_new); // NOT SURE IF CREATING w EVERY ITERATION IS EFFICIENT - v_new.noalias() = mat*w - beta*v_old; // compute v_new - const RealScalar alpha = v_new.dot(w); - v_new -= alpha*v; // overwrite v_new - w_new = precond.solve(v_new); // overwrite w_new - beta_new2 = v_new.dot(w_new); // compute beta_new - eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE"); - beta_new = sqrt(beta_new2); // compute beta_new - v_new /= beta_new; // overwrite v_new for next iteration - w_new /= beta_new; // overwrite w_new for next iteration - - // Givens rotation - const RealScalar r2 =s*alpha+c*c_old*beta; // s, s_old, c and c_old are still from previous iteration - const RealScalar r3 =s_old*beta; // s, s_old, c and c_old are still from previous iteration - const RealScalar r1_hat=c*alpha-c_old*s*beta; - const RealScalar r1 =sqrt( std::pow(r1_hat,2) + std::pow(beta_new,2) ); - c_old = c; // store for next iteration - s_old = s; // store for next iteration - c=r1_hat/r1; // new cosine - s=beta_new/r1; // new sine - - // Update solution - p_oold = p_old; -// const VectorType p_oold(p_old); // NOT SURE IF CREATING p_oold EVERY ITERATION IS EFFICIENT - p_old = p; - p.noalias()=(w-r2*p_old-r3*p_oold) /r1; // IS NOALIAS REQUIRED? - x += beta_one*c*eta*p; - - /* Update the squared residual. Note that this is the estimated residual. - The real residual |Ax-b|^2 may be slightly larger */ - residualNorm2 *= s*s; - - if ( residualNorm2 < threshold2) - { - break; - } - - eta=-s*eta; // update eta - iters++; // increment iteration number (for output purposes) - } - - /* Compute error. Note that this is the estimated error. The real - error |Ax-b|/|b| may be slightly larger */ - tol_error = std::sqrt(residualNorm2 / rhsNorm2); - } - - } - - template< typename _MatrixType, int _UpLo=Lower, - typename _Preconditioner = IdentityPreconditioner> - class MINRES; - - namespace internal { - - template< typename _MatrixType, int _UpLo, typename _Preconditioner> - struct traits<MINRES<_MatrixType,_UpLo,_Preconditioner> > - { - typedef _MatrixType MatrixType; - typedef _Preconditioner Preconditioner; - }; - - } - - /** \ingroup IterativeLinearSolvers_Module - * \brief A minimal residual solver for sparse symmetric problems - * - * This class allows to solve for A.x = b sparse linear problems using the MINRES algorithm - * of Paige and Saunders (1975). The sparse matrix A must be symmetric (possibly indefinite). - * The vectors x and b can be either dense or sparse. - * - * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix. - * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower, - * Upper, or Lower|Upper in which the full matrix entries will be considered. Default is Lower. - * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner - * - * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() - * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations - * and NumTraits<Scalar>::epsilon() for the tolerance. - * - * This class can be used as the direct solver classes. Here is a typical usage example: - * \code - * int n = 10000; - * VectorXd x(n), b(n); - * SparseMatrix<double> A(n,n); - * // fill A and b - * MINRES<SparseMatrix<double> > mr; - * mr.compute(A); - * x = mr.solve(b); - * std::cout << "#iterations: " << mr.iterations() << std::endl; - * std::cout << "estimated error: " << mr.error() << std::endl; - * // update b, and solve again - * x = mr.solve(b); - * \endcode - * - * By default the iterations start with x=0 as an initial guess of the solution. - * One can control the start using the solveWithGuess() method. - * - * MINRES can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. - * - * \sa class ConjugateGradient, BiCGSTAB, SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner - */ - template< typename _MatrixType, int _UpLo, typename _Preconditioner> - class MINRES : public IterativeSolverBase<MINRES<_MatrixType,_UpLo,_Preconditioner> > - { - - typedef IterativeSolverBase<MINRES> Base; - using Base::matrix; - using Base::m_error; - using Base::m_iterations; - using Base::m_info; - using Base::m_isInitialized; - public: - using Base::_solve_impl; - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef _Preconditioner Preconditioner; - - enum {UpLo = _UpLo}; - - public: - - /** Default constructor. */ - MINRES() : Base() {} - - /** Initialize the solver with matrix \a A for further \c Ax=b solving. - * - * This constructor is a shortcut for the default constructor followed - * by a call to compute(). - * - * \warning this class stores a reference to the matrix A as well as some - * precomputed values that depend on it. Therefore, if \a A is changed - * this class becomes invalid. Call compute() to update it with the new - * matrix A, or modify a copy of A. - */ - template<typename MatrixDerived> - explicit MINRES(const EigenBase<MatrixDerived>& A) : Base(A.derived()) {} - - /** Destructor. */ - ~MINRES(){} - - /** \internal */ - template<typename Rhs,typename Dest> - void _solve_with_guess_impl(const Rhs& b, Dest& x) const - { - typedef typename Base::MatrixWrapper MatrixWrapper; - typedef typename Base::ActualMatrixType ActualMatrixType; - enum { - TransposeInput = (!MatrixWrapper::MatrixFree) - && (UpLo==(Lower|Upper)) - && (!MatrixType::IsRowMajor) - && (!NumTraits<Scalar>::IsComplex) - }; - typedef typename internal::conditional<TransposeInput,Transpose<const ActualMatrixType>, ActualMatrixType const&>::type RowMajorWrapper; - EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(MatrixWrapper::MatrixFree,UpLo==(Lower|Upper)),MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY); - typedef typename internal::conditional<UpLo==(Lower|Upper), - RowMajorWrapper, - typename MatrixWrapper::template ConstSelfAdjointViewReturnType<UpLo>::Type - >::type SelfAdjointWrapper; - - m_iterations = Base::maxIterations(); - m_error = Base::m_tolerance; - RowMajorWrapper row_mat(matrix()); - for(int j=0; j<b.cols(); ++j) - { - m_iterations = Base::maxIterations(); - m_error = Base::m_tolerance; - - typename Dest::ColXpr xj(x,j); - internal::minres(SelfAdjointWrapper(row_mat), b.col(j), xj, - Base::m_preconditioner, m_iterations, m_error); - } - - m_isInitialized = true; - m_info = m_error <= Base::m_tolerance ? Success : NoConvergence; - } - - /** \internal */ - template<typename Rhs,typename Dest> - void _solve_impl(const Rhs& b, MatrixBase<Dest> &x) const - { - x.setZero(); - _solve_with_guess_impl(b,x.derived()); - } - - protected: - - }; - -} // end namespace Eigen - -#endif // EIGEN_MINRES_H - diff --git a/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h b/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h deleted file mode 100644 index d113e6e..0000000 --- a/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h +++ /dev/null @@ -1,187 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Desire NUENTSA WAKAM <desire.nuentsa_wakam@inria.fr -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ITERSCALING_H -#define EIGEN_ITERSCALING_H - -namespace Eigen { - -/** - * \ingroup IterativeSolvers_Module - * \brief iterative scaling algorithm to equilibrate rows and column norms in matrices - * - * This class can be used as a preprocessing tool to accelerate the convergence of iterative methods - * - * This feature is useful to limit the pivoting amount during LU/ILU factorization - * The scaling strategy as presented here preserves the symmetry of the problem - * NOTE It is assumed that the matrix does not have empty row or column, - * - * Example with key steps - * \code - * VectorXd x(n), b(n); - * SparseMatrix<double> A; - * // fill A and b; - * IterScaling<SparseMatrix<double> > scal; - * // Compute the left and right scaling vectors. The matrix is equilibrated at output - * scal.computeRef(A); - * // Scale the right hand side - * b = scal.LeftScaling().cwiseProduct(b); - * // Now, solve the equilibrated linear system with any available solver - * - * // Scale back the computed solution - * x = scal.RightScaling().cwiseProduct(x); - * \endcode - * - * \tparam _MatrixType the type of the matrix. It should be a real square sparsematrix - * - * References : D. Ruiz and B. Ucar, A Symmetry Preserving Algorithm for Matrix Scaling, INRIA Research report RR-7552 - * - * \sa \ref IncompleteLUT - */ -template<typename _MatrixType> -class IterScaling -{ - public: - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - - public: - IterScaling() { init(); } - - IterScaling(const MatrixType& matrix) - { - init(); - compute(matrix); - } - - ~IterScaling() { } - - /** - * Compute the left and right diagonal matrices to scale the input matrix @p mat - * - * FIXME This algorithm will be modified such that the diagonal elements are permuted on the diagonal. - * - * \sa LeftScaling() RightScaling() - */ - void compute (const MatrixType& mat) - { - using std::abs; - int m = mat.rows(); - int n = mat.cols(); - eigen_assert((m>0 && m == n) && "Please give a non - empty matrix"); - m_left.resize(m); - m_right.resize(n); - m_left.setOnes(); - m_right.setOnes(); - m_matrix = mat; - VectorXd Dr, Dc, DrRes, DcRes; // Temporary Left and right scaling vectors - Dr.resize(m); Dc.resize(n); - DrRes.resize(m); DcRes.resize(n); - double EpsRow = 1.0, EpsCol = 1.0; - int its = 0; - do - { // Iterate until the infinite norm of each row and column is approximately 1 - // Get the maximum value in each row and column - Dr.setZero(); Dc.setZero(); - for (int k=0; k<m_matrix.outerSize(); ++k) - { - for (typename MatrixType::InnerIterator it(m_matrix, k); it; ++it) - { - if ( Dr(it.row()) < abs(it.value()) ) - Dr(it.row()) = abs(it.value()); - - if ( Dc(it.col()) < abs(it.value()) ) - Dc(it.col()) = abs(it.value()); - } - } - for (int i = 0; i < m; ++i) - { - Dr(i) = std::sqrt(Dr(i)); - Dc(i) = std::sqrt(Dc(i)); - } - // Save the scaling factors - for (int i = 0; i < m; ++i) - { - m_left(i) /= Dr(i); - m_right(i) /= Dc(i); - } - // Scale the rows and the columns of the matrix - DrRes.setZero(); DcRes.setZero(); - for (int k=0; k<m_matrix.outerSize(); ++k) - { - for (typename MatrixType::InnerIterator it(m_matrix, k); it; ++it) - { - it.valueRef() = it.value()/( Dr(it.row()) * Dc(it.col()) ); - // Accumulate the norms of the row and column vectors - if ( DrRes(it.row()) < abs(it.value()) ) - DrRes(it.row()) = abs(it.value()); - - if ( DcRes(it.col()) < abs(it.value()) ) - DcRes(it.col()) = abs(it.value()); - } - } - DrRes.array() = (1-DrRes.array()).abs(); - EpsRow = DrRes.maxCoeff(); - DcRes.array() = (1-DcRes.array()).abs(); - EpsCol = DcRes.maxCoeff(); - its++; - }while ( (EpsRow >m_tol || EpsCol > m_tol) && (its < m_maxits) ); - m_isInitialized = true; - } - /** Compute the left and right vectors to scale the vectors - * the input matrix is scaled with the computed vectors at output - * - * \sa compute() - */ - void computeRef (MatrixType& mat) - { - compute (mat); - mat = m_matrix; - } - /** Get the vector to scale the rows of the matrix - */ - VectorXd& LeftScaling() - { - return m_left; - } - - /** Get the vector to scale the columns of the matrix - */ - VectorXd& RightScaling() - { - return m_right; - } - - /** Set the tolerance for the convergence of the iterative scaling algorithm - */ - void setTolerance(double tol) - { - m_tol = tol; - } - - protected: - - void init() - { - m_tol = 1e-10; - m_maxits = 5; - m_isInitialized = false; - } - - MatrixType m_matrix; - mutable ComputationInfo m_info; - bool m_isInitialized; - VectorXd m_left; // Left scaling vector - VectorXd m_right; // m_right scaling vector - double m_tol; - int m_maxits; // Maximum number of iterations allowed -}; -} -#endif diff --git a/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h deleted file mode 100644 index 582fa85..0000000 --- a/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +++ /dev/null @@ -1,305 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Kolja Brix <brix@igpm.rwth-aachen.de> -// Copyright (C) 2011 Andreas Platen <andiplaten@gmx.de> -// Copyright (C) 2012 Chen-Pang He <jdh8@ms63.hinet.net> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef KRONECKER_TENSOR_PRODUCT_H -#define KRONECKER_TENSOR_PRODUCT_H - -namespace Eigen { - -/*! - * \ingroup KroneckerProduct_Module - * - * \brief The base class of dense and sparse Kronecker product. - * - * \tparam Derived is the derived type. - */ -template<typename Derived> -class KroneckerProductBase : public ReturnByValue<Derived> -{ - private: - typedef typename internal::traits<Derived> Traits; - typedef typename Traits::Scalar Scalar; - - protected: - typedef typename Traits::Lhs Lhs; - typedef typename Traits::Rhs Rhs; - - public: - /*! \brief Constructor. */ - KroneckerProductBase(const Lhs& A, const Rhs& B) - : m_A(A), m_B(B) - {} - - inline Index rows() const { return m_A.rows() * m_B.rows(); } - inline Index cols() const { return m_A.cols() * m_B.cols(); } - - /*! - * This overrides ReturnByValue::coeff because this function is - * efficient enough. - */ - Scalar coeff(Index row, Index col) const - { - return m_A.coeff(row / m_B.rows(), col / m_B.cols()) * - m_B.coeff(row % m_B.rows(), col % m_B.cols()); - } - - /*! - * This overrides ReturnByValue::coeff because this function is - * efficient enough. - */ - Scalar coeff(Index i) const - { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); - return m_A.coeff(i / m_A.size()) * m_B.coeff(i % m_A.size()); - } - - protected: - typename Lhs::Nested m_A; - typename Rhs::Nested m_B; -}; - -/*! - * \ingroup KroneckerProduct_Module - * - * \brief Kronecker tensor product helper class for dense matrices - * - * This class is the return value of kroneckerProduct(MatrixBase, - * MatrixBase). Use the function rather than construct this class - * directly to avoid specifying template prarameters. - * - * \tparam Lhs Type of the left-hand side, a matrix expression. - * \tparam Rhs Type of the rignt-hand side, a matrix expression. - */ -template<typename Lhs, typename Rhs> -class KroneckerProduct : public KroneckerProductBase<KroneckerProduct<Lhs,Rhs> > -{ - private: - typedef KroneckerProductBase<KroneckerProduct> Base; - using Base::m_A; - using Base::m_B; - - public: - /*! \brief Constructor. */ - KroneckerProduct(const Lhs& A, const Rhs& B) - : Base(A, B) - {} - - /*! \brief Evaluate the Kronecker tensor product. */ - template<typename Dest> void evalTo(Dest& dst) const; -}; - -/*! - * \ingroup KroneckerProduct_Module - * - * \brief Kronecker tensor product helper class for sparse matrices - * - * If at least one of the operands is a sparse matrix expression, - * then this class is returned and evaluates into a sparse matrix. - * - * This class is the return value of kroneckerProduct(EigenBase, - * EigenBase). Use the function rather than construct this class - * directly to avoid specifying template prarameters. - * - * \tparam Lhs Type of the left-hand side, a matrix expression. - * \tparam Rhs Type of the rignt-hand side, a matrix expression. - */ -template<typename Lhs, typename Rhs> -class KroneckerProductSparse : public KroneckerProductBase<KroneckerProductSparse<Lhs,Rhs> > -{ - private: - typedef KroneckerProductBase<KroneckerProductSparse> Base; - using Base::m_A; - using Base::m_B; - - public: - /*! \brief Constructor. */ - KroneckerProductSparse(const Lhs& A, const Rhs& B) - : Base(A, B) - {} - - /*! \brief Evaluate the Kronecker tensor product. */ - template<typename Dest> void evalTo(Dest& dst) const; -}; - -template<typename Lhs, typename Rhs> -template<typename Dest> -void KroneckerProduct<Lhs,Rhs>::evalTo(Dest& dst) const -{ - const int BlockRows = Rhs::RowsAtCompileTime, - BlockCols = Rhs::ColsAtCompileTime; - const Index Br = m_B.rows(), - Bc = m_B.cols(); - for (Index i=0; i < m_A.rows(); ++i) - for (Index j=0; j < m_A.cols(); ++j) - Block<Dest,BlockRows,BlockCols>(dst,i*Br,j*Bc,Br,Bc) = m_A.coeff(i,j) * m_B; -} - -template<typename Lhs, typename Rhs> -template<typename Dest> -void KroneckerProductSparse<Lhs,Rhs>::evalTo(Dest& dst) const -{ - Index Br = m_B.rows(), Bc = m_B.cols(); - dst.resize(this->rows(), this->cols()); - dst.resizeNonZeros(0); - - // 1 - evaluate the operands if needed: - typedef typename internal::nested_eval<Lhs,Dynamic>::type Lhs1; - typedef typename internal::remove_all<Lhs1>::type Lhs1Cleaned; - const Lhs1 lhs1(m_A); - typedef typename internal::nested_eval<Rhs,Dynamic>::type Rhs1; - typedef typename internal::remove_all<Rhs1>::type Rhs1Cleaned; - const Rhs1 rhs1(m_B); - - // 2 - construct respective iterators - typedef Eigen::InnerIterator<Lhs1Cleaned> LhsInnerIterator; - typedef Eigen::InnerIterator<Rhs1Cleaned> RhsInnerIterator; - - // compute number of non-zeros per innervectors of dst - { - // TODO VectorXi is not necessarily big enough! - VectorXi nnzA = VectorXi::Zero(Dest::IsRowMajor ? m_A.rows() : m_A.cols()); - for (Index kA=0; kA < m_A.outerSize(); ++kA) - for (LhsInnerIterator itA(lhs1,kA); itA; ++itA) - nnzA(Dest::IsRowMajor ? itA.row() : itA.col())++; - - VectorXi nnzB = VectorXi::Zero(Dest::IsRowMajor ? m_B.rows() : m_B.cols()); - for (Index kB=0; kB < m_B.outerSize(); ++kB) - for (RhsInnerIterator itB(rhs1,kB); itB; ++itB) - nnzB(Dest::IsRowMajor ? itB.row() : itB.col())++; - - Matrix<int,Dynamic,Dynamic,ColMajor> nnzAB = nnzB * nnzA.transpose(); - dst.reserve(VectorXi::Map(nnzAB.data(), nnzAB.size())); - } - - for (Index kA=0; kA < m_A.outerSize(); ++kA) - { - for (Index kB=0; kB < m_B.outerSize(); ++kB) - { - for (LhsInnerIterator itA(lhs1,kA); itA; ++itA) - { - for (RhsInnerIterator itB(rhs1,kB); itB; ++itB) - { - Index i = itA.row() * Br + itB.row(), - j = itA.col() * Bc + itB.col(); - dst.insert(i,j) = itA.value() * itB.value(); - } - } - } - } -} - -namespace internal { - -template<typename _Lhs, typename _Rhs> -struct traits<KroneckerProduct<_Lhs,_Rhs> > -{ - typedef typename remove_all<_Lhs>::type Lhs; - typedef typename remove_all<_Rhs>::type Rhs; - typedef typename ScalarBinaryOpTraits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar; - typedef typename promote_index_type<typename Lhs::StorageIndex, typename Rhs::StorageIndex>::type StorageIndex; - - enum { - Rows = size_at_compile_time<traits<Lhs>::RowsAtCompileTime, traits<Rhs>::RowsAtCompileTime>::ret, - Cols = size_at_compile_time<traits<Lhs>::ColsAtCompileTime, traits<Rhs>::ColsAtCompileTime>::ret, - MaxRows = size_at_compile_time<traits<Lhs>::MaxRowsAtCompileTime, traits<Rhs>::MaxRowsAtCompileTime>::ret, - MaxCols = size_at_compile_time<traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime>::ret - }; - - typedef Matrix<Scalar,Rows,Cols> ReturnType; -}; - -template<typename _Lhs, typename _Rhs> -struct traits<KroneckerProductSparse<_Lhs,_Rhs> > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all<_Lhs>::type Lhs; - typedef typename remove_all<_Rhs>::type Rhs; - typedef typename ScalarBinaryOpTraits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar; - typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind, typename traits<Rhs>::StorageKind, scalar_product_op<typename Lhs::Scalar, typename Rhs::Scalar> >::ret StorageKind; - typedef typename promote_index_type<typename Lhs::StorageIndex, typename Rhs::StorageIndex>::type StorageIndex; - - enum { - LhsFlags = Lhs::Flags, - RhsFlags = Rhs::Flags, - - RowsAtCompileTime = size_at_compile_time<traits<Lhs>::RowsAtCompileTime, traits<Rhs>::RowsAtCompileTime>::ret, - ColsAtCompileTime = size_at_compile_time<traits<Lhs>::ColsAtCompileTime, traits<Rhs>::ColsAtCompileTime>::ret, - MaxRowsAtCompileTime = size_at_compile_time<traits<Lhs>::MaxRowsAtCompileTime, traits<Rhs>::MaxRowsAtCompileTime>::ret, - MaxColsAtCompileTime = size_at_compile_time<traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime>::ret, - - EvalToRowMajor = (LhsFlags & RhsFlags & RowMajorBit), - RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit), - - Flags = ((LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) - | EvalBeforeNestingBit, - CoeffReadCost = HugeCost - }; - - typedef SparseMatrix<Scalar, 0, StorageIndex> ReturnType; -}; - -} // end namespace internal - -/*! - * \ingroup KroneckerProduct_Module - * - * Computes Kronecker tensor product of two dense matrices - * - * \warning If you want to replace a matrix by its Kronecker product - * with some matrix, do \b NOT do this: - * \code - * A = kroneckerProduct(A,B); // bug!!! caused by aliasing effect - * \endcode - * instead, use eval() to work around this: - * \code - * A = kroneckerProduct(A,B).eval(); - * \endcode - * - * \param a Dense matrix a - * \param b Dense matrix b - * \return Kronecker tensor product of a and b - */ -template<typename A, typename B> -KroneckerProduct<A,B> kroneckerProduct(const MatrixBase<A>& a, const MatrixBase<B>& b) -{ - return KroneckerProduct<A, B>(a.derived(), b.derived()); -} - -/*! - * \ingroup KroneckerProduct_Module - * - * Computes Kronecker tensor product of two matrices, at least one of - * which is sparse - * - * \warning If you want to replace a matrix by its Kronecker product - * with some matrix, do \b NOT do this: - * \code - * A = kroneckerProduct(A,B); // bug!!! caused by aliasing effect - * \endcode - * instead, use eval() to work around this: - * \code - * A = kroneckerProduct(A,B).eval(); - * \endcode - * - * \param a Dense/sparse matrix a - * \param b Dense/sparse matrix b - * \return Kronecker tensor product of a and b, stored in a sparse - * matrix - */ -template<typename A, typename B> -KroneckerProductSparse<A,B> kroneckerProduct(const EigenBase<A>& a, const EigenBase<B>& b) -{ - return KroneckerProductSparse<A,B>(a.derived(), b.derived()); -} - -} // end namespace Eigen - -#endif // KRONECKER_TENSOR_PRODUCT_H diff --git a/eigen/unsupported/Eigen/src/LevenbergMarquardt/CopyrightMINPACK.txt b/eigen/unsupported/Eigen/src/LevenbergMarquardt/CopyrightMINPACK.txt deleted file mode 100644 index ae7984d..0000000 --- a/eigen/unsupported/Eigen/src/LevenbergMarquardt/CopyrightMINPACK.txt +++ /dev/null @@ -1,52 +0,0 @@ -Minpack Copyright Notice (1999) University of Chicago. All rights reserved - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the -following conditions are met: - -1. Redistributions of source code must retain the above -copyright notice, this list of conditions and the following -disclaimer. - -2. Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following -disclaimer in the documentation and/or other materials -provided with the distribution. - -3. The end-user documentation included with the -redistribution, if any, must include the following -acknowledgment: - - "This product includes software developed by the - University of Chicago, as Operator of Argonne National - Laboratory. - -Alternately, this acknowledgment may appear in the software -itself, if and wherever such third-party acknowledgments -normally appear. - -4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" -WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE -UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND -THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE -OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY -OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR -USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF -THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) -DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION -UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL -BE CORRECTED. - -5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT -HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF -ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, -INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF -ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF -PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER -SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT -(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, -EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE -POSSIBILITY OF SUCH LOSS OR DAMAGES. - diff --git a/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h b/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h deleted file mode 100644 index b75bea2..0000000 --- a/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h +++ /dev/null @@ -1,84 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// This code initially comes from MINPACK whose original authors are: -// Copyright Jorge More - Argonne National Laboratory -// Copyright Burt Garbow - Argonne National Laboratory -// Copyright Ken Hillstrom - Argonne National Laboratory -// -// This Source Code Form is subject to the terms of the Minpack license -// (a BSD-like license) described in the campaigned CopyrightMINPACK.txt file. - -#ifndef EIGEN_LMCOVAR_H -#define EIGEN_LMCOVAR_H - -namespace Eigen { - -namespace internal { - -template <typename Scalar> -void covar( - Matrix< Scalar, Dynamic, Dynamic > &r, - const VectorXi& ipvt, - Scalar tol = std::sqrt(NumTraits<Scalar>::epsilon()) ) -{ - using std::abs; - /* Local variables */ - Index i, j, k, l, ii, jj; - bool sing; - Scalar temp; - - /* Function Body */ - const Index n = r.cols(); - const Scalar tolr = tol * abs(r(0,0)); - Matrix< Scalar, Dynamic, 1 > wa(n); - eigen_assert(ipvt.size()==n); - - /* form the inverse of r in the full upper triangle of r. */ - l = -1; - for (k = 0; k < n; ++k) - if (abs(r(k,k)) > tolr) { - r(k,k) = 1. / r(k,k); - for (j = 0; j <= k-1; ++j) { - temp = r(k,k) * r(j,k); - r(j,k) = 0.; - r.col(k).head(j+1) -= r.col(j).head(j+1) * temp; - } - l = k; - } - - /* form the full upper triangle of the inverse of (r transpose)*r */ - /* in the full upper triangle of r. */ - for (k = 0; k <= l; ++k) { - for (j = 0; j <= k-1; ++j) - r.col(j).head(j+1) += r.col(k).head(j+1) * r(j,k); - r.col(k).head(k+1) *= r(k,k); - } - - /* form the full lower triangle of the covariance matrix */ - /* in the strict lower triangle of r and in wa. */ - for (j = 0; j < n; ++j) { - jj = ipvt[j]; - sing = j > l; - for (i = 0; i <= j; ++i) { - if (sing) - r(i,j) = 0.; - ii = ipvt[i]; - if (ii > jj) - r(ii,jj) = r(i,j); - if (ii < jj) - r(jj,ii) = r(i,j); - } - wa[jj] = r(j,j); - } - - /* symmetrize the covariance matrix in r. */ - r.topLeftCorner(n,n).template triangularView<StrictlyUpper>() = r.topLeftCorner(n,n).transpose(); - r.diagonal() = wa; -} - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_LMCOVAR_H diff --git a/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h b/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h deleted file mode 100644 index 25b32ec..0000000 --- a/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h +++ /dev/null @@ -1,202 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org> -// -// This code initially comes from MINPACK whose original authors are: -// Copyright Jorge More - Argonne National Laboratory -// Copyright Burt Garbow - Argonne National Laboratory -// Copyright Ken Hillstrom - Argonne National Laboratory -// -// This Source Code Form is subject to the terms of the Minpack license -// (a BSD-like license) described in the campaigned CopyrightMINPACK.txt file. - -#ifndef EIGEN_LMONESTEP_H -#define EIGEN_LMONESTEP_H - -namespace Eigen { - -template<typename FunctorType> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType>::minimizeOneStep(FVectorType &x) -{ - using std::abs; - using std::sqrt; - RealScalar temp, temp1,temp2; - RealScalar ratio; - RealScalar pnorm, xnorm, fnorm1, actred, dirder, prered; - eigen_assert(x.size()==n); // check the caller is not cheating us - - temp = 0.0; xnorm = 0.0; - /* calculate the jacobian matrix. */ - Index df_ret = m_functor.df(x, m_fjac); - if (df_ret<0) - return LevenbergMarquardtSpace::UserAsked; - if (df_ret>0) - // numerical diff, we evaluated the function df_ret times - m_nfev += df_ret; - else m_njev++; - - /* compute the qr factorization of the jacobian. */ - for (int j = 0; j < x.size(); ++j) - m_wa2(j) = m_fjac.col(j).blueNorm(); - QRSolver qrfac(m_fjac); - if(qrfac.info() != Success) { - m_info = NumericalIssue; - return LevenbergMarquardtSpace::ImproperInputParameters; - } - // Make a copy of the first factor with the associated permutation - m_rfactor = qrfac.matrixR(); - m_permutation = (qrfac.colsPermutation()); - - /* on the first iteration and if external scaling is not used, scale according */ - /* to the norms of the columns of the initial jacobian. */ - if (m_iter == 1) { - if (!m_useExternalScaling) - for (Index j = 0; j < n; ++j) - m_diag[j] = (m_wa2[j]==0.)? 1. : m_wa2[j]; - - /* on the first iteration, calculate the norm of the scaled x */ - /* and initialize the step bound m_delta. */ - xnorm = m_diag.cwiseProduct(x).stableNorm(); - m_delta = m_factor * xnorm; - if (m_delta == 0.) - m_delta = m_factor; - } - - /* form (q transpose)*m_fvec and store the first n components in */ - /* m_qtf. */ - m_wa4 = m_fvec; - m_wa4 = qrfac.matrixQ().adjoint() * m_fvec; - m_qtf = m_wa4.head(n); - - /* compute the norm of the scaled gradient. */ - m_gnorm = 0.; - if (m_fnorm != 0.) - for (Index j = 0; j < n; ++j) - if (m_wa2[m_permutation.indices()[j]] != 0.) - m_gnorm = (std::max)(m_gnorm, abs( m_rfactor.col(j).head(j+1).dot(m_qtf.head(j+1)/m_fnorm) / m_wa2[m_permutation.indices()[j]])); - - /* test for convergence of the gradient norm. */ - if (m_gnorm <= m_gtol) { - m_info = Success; - return LevenbergMarquardtSpace::CosinusTooSmall; - } - - /* rescale if necessary. */ - if (!m_useExternalScaling) - m_diag = m_diag.cwiseMax(m_wa2); - - do { - /* determine the levenberg-marquardt parameter. */ - internal::lmpar2(qrfac, m_diag, m_qtf, m_delta, m_par, m_wa1); - - /* store the direction p and x + p. calculate the norm of p. */ - m_wa1 = -m_wa1; - m_wa2 = x + m_wa1; - pnorm = m_diag.cwiseProduct(m_wa1).stableNorm(); - - /* on the first iteration, adjust the initial step bound. */ - if (m_iter == 1) - m_delta = (std::min)(m_delta,pnorm); - - /* evaluate the function at x + p and calculate its norm. */ - if ( m_functor(m_wa2, m_wa4) < 0) - return LevenbergMarquardtSpace::UserAsked; - ++m_nfev; - fnorm1 = m_wa4.stableNorm(); - - /* compute the scaled actual reduction. */ - actred = -1.; - if (Scalar(.1) * fnorm1 < m_fnorm) - actred = 1. - numext::abs2(fnorm1 / m_fnorm); - - /* compute the scaled predicted reduction and */ - /* the scaled directional derivative. */ - m_wa3 = m_rfactor.template triangularView<Upper>() * (m_permutation.inverse() *m_wa1); - temp1 = numext::abs2(m_wa3.stableNorm() / m_fnorm); - temp2 = numext::abs2(sqrt(m_par) * pnorm / m_fnorm); - prered = temp1 + temp2 / Scalar(.5); - dirder = -(temp1 + temp2); - - /* compute the ratio of the actual to the predicted */ - /* reduction. */ - ratio = 0.; - if (prered != 0.) - ratio = actred / prered; - - /* update the step bound. */ - if (ratio <= Scalar(.25)) { - if (actred >= 0.) - temp = RealScalar(.5); - if (actred < 0.) - temp = RealScalar(.5) * dirder / (dirder + RealScalar(.5) * actred); - if (RealScalar(.1) * fnorm1 >= m_fnorm || temp < RealScalar(.1)) - temp = Scalar(.1); - /* Computing MIN */ - m_delta = temp * (std::min)(m_delta, pnorm / RealScalar(.1)); - m_par /= temp; - } else if (!(m_par != 0. && ratio < RealScalar(.75))) { - m_delta = pnorm / RealScalar(.5); - m_par = RealScalar(.5) * m_par; - } - - /* test for successful iteration. */ - if (ratio >= RealScalar(1e-4)) { - /* successful iteration. update x, m_fvec, and their norms. */ - x = m_wa2; - m_wa2 = m_diag.cwiseProduct(x); - m_fvec = m_wa4; - xnorm = m_wa2.stableNorm(); - m_fnorm = fnorm1; - ++m_iter; - } - - /* tests for convergence. */ - if (abs(actred) <= m_ftol && prered <= m_ftol && Scalar(.5) * ratio <= 1. && m_delta <= m_xtol * xnorm) - { - m_info = Success; - return LevenbergMarquardtSpace::RelativeErrorAndReductionTooSmall; - } - if (abs(actred) <= m_ftol && prered <= m_ftol && Scalar(.5) * ratio <= 1.) - { - m_info = Success; - return LevenbergMarquardtSpace::RelativeReductionTooSmall; - } - if (m_delta <= m_xtol * xnorm) - { - m_info = Success; - return LevenbergMarquardtSpace::RelativeErrorTooSmall; - } - - /* tests for termination and stringent tolerances. */ - if (m_nfev >= m_maxfev) - { - m_info = NoConvergence; - return LevenbergMarquardtSpace::TooManyFunctionEvaluation; - } - if (abs(actred) <= NumTraits<Scalar>::epsilon() && prered <= NumTraits<Scalar>::epsilon() && Scalar(.5) * ratio <= 1.) - { - m_info = Success; - return LevenbergMarquardtSpace::FtolTooSmall; - } - if (m_delta <= NumTraits<Scalar>::epsilon() * xnorm) - { - m_info = Success; - return LevenbergMarquardtSpace::XtolTooSmall; - } - if (m_gnorm <= NumTraits<Scalar>::epsilon()) - { - m_info = Success; - return LevenbergMarquardtSpace::GtolTooSmall; - } - - } while (ratio < Scalar(1e-4)); - - return LevenbergMarquardtSpace::Running; -} - - -} // end namespace Eigen - -#endif // EIGEN_LMONESTEP_H diff --git a/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h b/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h deleted file mode 100644 index 9a48365..0000000 --- a/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h +++ /dev/null @@ -1,160 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// This code initially comes from MINPACK whose original authors are: -// Copyright Jorge More - Argonne National Laboratory -// Copyright Burt Garbow - Argonne National Laboratory -// Copyright Ken Hillstrom - Argonne National Laboratory -// -// This Source Code Form is subject to the terms of the Minpack license -// (a BSD-like license) described in the campaigned CopyrightMINPACK.txt file. - -#ifndef EIGEN_LMPAR_H -#define EIGEN_LMPAR_H - -namespace Eigen { - -namespace internal { - - template <typename QRSolver, typename VectorType> - void lmpar2( - const QRSolver &qr, - const VectorType &diag, - const VectorType &qtb, - typename VectorType::Scalar m_delta, - typename VectorType::Scalar &par, - VectorType &x) - - { - using std::sqrt; - using std::abs; - typedef typename QRSolver::MatrixType MatrixType; - typedef typename QRSolver::Scalar Scalar; -// typedef typename QRSolver::StorageIndex StorageIndex; - - /* Local variables */ - Index j; - Scalar fp; - Scalar parc, parl; - Index iter; - Scalar temp, paru; - Scalar gnorm; - Scalar dxnorm; - - // Make a copy of the triangular factor. - // This copy is modified during call the qrsolv - MatrixType s; - s = qr.matrixR(); - - /* Function Body */ - const Scalar dwarf = (std::numeric_limits<Scalar>::min)(); - const Index n = qr.matrixR().cols(); - eigen_assert(n==diag.size()); - eigen_assert(n==qtb.size()); - - VectorType wa1, wa2; - - /* compute and store in x the gauss-newton direction. if the */ - /* jacobian is rank-deficient, obtain a least squares solution. */ - - // const Index rank = qr.nonzeroPivots(); // exactly double(0.) - const Index rank = qr.rank(); // use a threshold - wa1 = qtb; - wa1.tail(n-rank).setZero(); - //FIXME There is no solve in place for sparse triangularView - wa1.head(rank) = s.topLeftCorner(rank,rank).template triangularView<Upper>().solve(qtb.head(rank)); - - x = qr.colsPermutation()*wa1; - - /* initialize the iteration counter. */ - /* evaluate the function at the origin, and test */ - /* for acceptance of the gauss-newton direction. */ - iter = 0; - wa2 = diag.cwiseProduct(x); - dxnorm = wa2.blueNorm(); - fp = dxnorm - m_delta; - if (fp <= Scalar(0.1) * m_delta) { - par = 0; - return; - } - - /* if the jacobian is not rank deficient, the newton */ - /* step provides a lower bound, parl, for the zero of */ - /* the function. otherwise set this bound to zero. */ - parl = 0.; - if (rank==n) { - wa1 = qr.colsPermutation().inverse() * diag.cwiseProduct(wa2)/dxnorm; - s.topLeftCorner(n,n).transpose().template triangularView<Lower>().solveInPlace(wa1); - temp = wa1.blueNorm(); - parl = fp / m_delta / temp / temp; - } - - /* calculate an upper bound, paru, for the zero of the function. */ - for (j = 0; j < n; ++j) - wa1[j] = s.col(j).head(j+1).dot(qtb.head(j+1)) / diag[qr.colsPermutation().indices()(j)]; - - gnorm = wa1.stableNorm(); - paru = gnorm / m_delta; - if (paru == 0.) - paru = dwarf / (std::min)(m_delta,Scalar(0.1)); - - /* if the input par lies outside of the interval (parl,paru), */ - /* set par to the closer endpoint. */ - par = (std::max)(par,parl); - par = (std::min)(par,paru); - if (par == 0.) - par = gnorm / dxnorm; - - /* beginning of an iteration. */ - while (true) { - ++iter; - - /* evaluate the function at the current value of par. */ - if (par == 0.) - par = (std::max)(dwarf,Scalar(.001) * paru); /* Computing MAX */ - wa1 = sqrt(par)* diag; - - VectorType sdiag(n); - lmqrsolv(s, qr.colsPermutation(), wa1, qtb, x, sdiag); - - wa2 = diag.cwiseProduct(x); - dxnorm = wa2.blueNorm(); - temp = fp; - fp = dxnorm - m_delta; - - /* if the function is small enough, accept the current value */ - /* of par. also test for the exceptional cases where parl */ - /* is zero or the number of iterations has reached 10. */ - if (abs(fp) <= Scalar(0.1) * m_delta || (parl == 0. && fp <= temp && temp < 0.) || iter == 10) - break; - - /* compute the newton correction. */ - wa1 = qr.colsPermutation().inverse() * diag.cwiseProduct(wa2/dxnorm); - // we could almost use this here, but the diagonal is outside qr, in sdiag[] - for (j = 0; j < n; ++j) { - wa1[j] /= sdiag[j]; - temp = wa1[j]; - for (Index i = j+1; i < n; ++i) - wa1[i] -= s.coeff(i,j) * temp; - } - temp = wa1.blueNorm(); - parc = fp / m_delta / temp / temp; - - /* depending on the sign of the function, update parl or paru. */ - if (fp > 0.) - parl = (std::max)(parl,par); - if (fp < 0.) - paru = (std::min)(paru,par); - - /* compute an improved estimate for par. */ - par = (std::max)(parl,par+parc); - } - if (iter == 0) - par = 0.; - return; - } -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_LMPAR_H diff --git a/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h b/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h deleted file mode 100644 index ae9d793..0000000 --- a/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h +++ /dev/null @@ -1,188 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org> -// Copyright (C) 2012 Desire Nuentsa <desire.nuentsa_wakam@inria.fr> -// -// This code initially comes from MINPACK whose original authors are: -// Copyright Jorge More - Argonne National Laboratory -// Copyright Burt Garbow - Argonne National Laboratory -// Copyright Ken Hillstrom - Argonne National Laboratory -// -// This Source Code Form is subject to the terms of the Minpack license -// (a BSD-like license) described in the campaigned CopyrightMINPACK.txt file. - -#ifndef EIGEN_LMQRSOLV_H -#define EIGEN_LMQRSOLV_H - -namespace Eigen { - -namespace internal { - -template <typename Scalar,int Rows, int Cols, typename PermIndex> -void lmqrsolv( - Matrix<Scalar,Rows,Cols> &s, - const PermutationMatrix<Dynamic,Dynamic,PermIndex> &iPerm, - const Matrix<Scalar,Dynamic,1> &diag, - const Matrix<Scalar,Dynamic,1> &qtb, - Matrix<Scalar,Dynamic,1> &x, - Matrix<Scalar,Dynamic,1> &sdiag) -{ - /* Local variables */ - Index i, j, k; - Scalar temp; - Index n = s.cols(); - Matrix<Scalar,Dynamic,1> wa(n); - JacobiRotation<Scalar> givens; - - /* Function Body */ - // the following will only change the lower triangular part of s, including - // the diagonal, though the diagonal is restored afterward - - /* copy r and (q transpose)*b to preserve input and initialize s. */ - /* in particular, save the diagonal elements of r in x. */ - x = s.diagonal(); - wa = qtb; - - - s.topLeftCorner(n,n).template triangularView<StrictlyLower>() = s.topLeftCorner(n,n).transpose(); - /* eliminate the diagonal matrix d using a givens rotation. */ - for (j = 0; j < n; ++j) { - - /* prepare the row of d to be eliminated, locating the */ - /* diagonal element using p from the qr factorization. */ - const PermIndex l = iPerm.indices()(j); - if (diag[l] == 0.) - break; - sdiag.tail(n-j).setZero(); - sdiag[j] = diag[l]; - - /* the transformations to eliminate the row of d */ - /* modify only a single element of (q transpose)*b */ - /* beyond the first n, which is initially zero. */ - Scalar qtbpj = 0.; - for (k = j; k < n; ++k) { - /* determine a givens rotation which eliminates the */ - /* appropriate element in the current row of d. */ - givens.makeGivens(-s(k,k), sdiag[k]); - - /* compute the modified diagonal element of r and */ - /* the modified element of ((q transpose)*b,0). */ - s(k,k) = givens.c() * s(k,k) + givens.s() * sdiag[k]; - temp = givens.c() * wa[k] + givens.s() * qtbpj; - qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj; - wa[k] = temp; - - /* accumulate the tranformation in the row of s. */ - for (i = k+1; i<n; ++i) { - temp = givens.c() * s(i,k) + givens.s() * sdiag[i]; - sdiag[i] = -givens.s() * s(i,k) + givens.c() * sdiag[i]; - s(i,k) = temp; - } - } - } - - /* solve the triangular system for z. if the system is */ - /* singular, then obtain a least squares solution. */ - Index nsing; - for(nsing=0; nsing<n && sdiag[nsing]!=0; nsing++) {} - - wa.tail(n-nsing).setZero(); - s.topLeftCorner(nsing, nsing).transpose().template triangularView<Upper>().solveInPlace(wa.head(nsing)); - - // restore - sdiag = s.diagonal(); - s.diagonal() = x; - - /* permute the components of z back to components of x. */ - x = iPerm * wa; -} - -template <typename Scalar, int _Options, typename Index> -void lmqrsolv( - SparseMatrix<Scalar,_Options,Index> &s, - const PermutationMatrix<Dynamic,Dynamic> &iPerm, - const Matrix<Scalar,Dynamic,1> &diag, - const Matrix<Scalar,Dynamic,1> &qtb, - Matrix<Scalar,Dynamic,1> &x, - Matrix<Scalar,Dynamic,1> &sdiag) -{ - /* Local variables */ - typedef SparseMatrix<Scalar,RowMajor,Index> FactorType; - Index i, j, k, l; - Scalar temp; - Index n = s.cols(); - Matrix<Scalar,Dynamic,1> wa(n); - JacobiRotation<Scalar> givens; - - /* Function Body */ - // the following will only change the lower triangular part of s, including - // the diagonal, though the diagonal is restored afterward - - /* copy r and (q transpose)*b to preserve input and initialize R. */ - wa = qtb; - FactorType R(s); - // Eliminate the diagonal matrix d using a givens rotation - for (j = 0; j < n; ++j) - { - // Prepare the row of d to be eliminated, locating the - // diagonal element using p from the qr factorization - l = iPerm.indices()(j); - if (diag(l) == Scalar(0)) - break; - sdiag.tail(n-j).setZero(); - sdiag[j] = diag[l]; - // the transformations to eliminate the row of d - // modify only a single element of (q transpose)*b - // beyond the first n, which is initially zero. - - Scalar qtbpj = 0; - // Browse the nonzero elements of row j of the upper triangular s - for (k = j; k < n; ++k) - { - typename FactorType::InnerIterator itk(R,k); - for (; itk; ++itk){ - if (itk.index() < k) continue; - else break; - } - //At this point, we have the diagonal element R(k,k) - // Determine a givens rotation which eliminates - // the appropriate element in the current row of d - givens.makeGivens(-itk.value(), sdiag(k)); - - // Compute the modified diagonal element of r and - // the modified element of ((q transpose)*b,0). - itk.valueRef() = givens.c() * itk.value() + givens.s() * sdiag(k); - temp = givens.c() * wa(k) + givens.s() * qtbpj; - qtbpj = -givens.s() * wa(k) + givens.c() * qtbpj; - wa(k) = temp; - - // Accumulate the transformation in the remaining k row/column of R - for (++itk; itk; ++itk) - { - i = itk.index(); - temp = givens.c() * itk.value() + givens.s() * sdiag(i); - sdiag(i) = -givens.s() * itk.value() + givens.c() * sdiag(i); - itk.valueRef() = temp; - } - } - } - - // Solve the triangular system for z. If the system is - // singular, then obtain a least squares solution - Index nsing; - for(nsing = 0; nsing<n && sdiag(nsing) !=0; nsing++) {} - - wa.tail(n-nsing).setZero(); -// x = wa; - wa.head(nsing) = R.topLeftCorner(nsing,nsing).template triangularView<Upper>().solve/*InPlace*/(wa.head(nsing)); - - sdiag = R.diagonal(); - // Permute the components of z back to components of x - x = iPerm * wa; -} -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_LMQRSOLV_H diff --git a/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h b/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h deleted file mode 100644 index 9954279..0000000 --- a/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h +++ /dev/null @@ -1,396 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org> -// Copyright (C) 2012 Desire Nuentsa <desire.nuentsa_wakam@inria.fr> -// -// The algorithm of this class initially comes from MINPACK whose original authors are: -// Copyright Jorge More - Argonne National Laboratory -// Copyright Burt Garbow - Argonne National Laboratory -// Copyright Ken Hillstrom - Argonne National Laboratory -// -// This Source Code Form is subject to the terms of the Minpack license -// (a BSD-like license) described in the campaigned CopyrightMINPACK.txt file. -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_LEVENBERGMARQUARDT_H -#define EIGEN_LEVENBERGMARQUARDT_H - - -namespace Eigen { -namespace LevenbergMarquardtSpace { - enum Status { - NotStarted = -2, - Running = -1, - ImproperInputParameters = 0, - RelativeReductionTooSmall = 1, - RelativeErrorTooSmall = 2, - RelativeErrorAndReductionTooSmall = 3, - CosinusTooSmall = 4, - TooManyFunctionEvaluation = 5, - FtolTooSmall = 6, - XtolTooSmall = 7, - GtolTooSmall = 8, - UserAsked = 9 - }; -} - -template <typename _Scalar, int NX=Dynamic, int NY=Dynamic> -struct DenseFunctor -{ - typedef _Scalar Scalar; - enum { - InputsAtCompileTime = NX, - ValuesAtCompileTime = NY - }; - typedef Matrix<Scalar,InputsAtCompileTime,1> InputType; - typedef Matrix<Scalar,ValuesAtCompileTime,1> ValueType; - typedef Matrix<Scalar,ValuesAtCompileTime,InputsAtCompileTime> JacobianType; - typedef ColPivHouseholderQR<JacobianType> QRSolver; - const int m_inputs, m_values; - - DenseFunctor() : m_inputs(InputsAtCompileTime), m_values(ValuesAtCompileTime) {} - DenseFunctor(int inputs, int values) : m_inputs(inputs), m_values(values) {} - - int inputs() const { return m_inputs; } - int values() const { return m_values; } - - //int operator()(const InputType &x, ValueType& fvec) { } - // should be defined in derived classes - - //int df(const InputType &x, JacobianType& fjac) { } - // should be defined in derived classes -}; - -template <typename _Scalar, typename _Index> -struct SparseFunctor -{ - typedef _Scalar Scalar; - typedef _Index Index; - typedef Matrix<Scalar,Dynamic,1> InputType; - typedef Matrix<Scalar,Dynamic,1> ValueType; - typedef SparseMatrix<Scalar, ColMajor, Index> JacobianType; - typedef SparseQR<JacobianType, COLAMDOrdering<int> > QRSolver; - enum { - InputsAtCompileTime = Dynamic, - ValuesAtCompileTime = Dynamic - }; - - SparseFunctor(int inputs, int values) : m_inputs(inputs), m_values(values) {} - - int inputs() const { return m_inputs; } - int values() const { return m_values; } - - const int m_inputs, m_values; - //int operator()(const InputType &x, ValueType& fvec) { } - // to be defined in the functor - - //int df(const InputType &x, JacobianType& fjac) { } - // to be defined in the functor if no automatic differentiation - -}; -namespace internal { -template <typename QRSolver, typename VectorType> -void lmpar2(const QRSolver &qr, const VectorType &diag, const VectorType &qtb, - typename VectorType::Scalar m_delta, typename VectorType::Scalar &par, - VectorType &x); - } -/** - * \ingroup NonLinearOptimization_Module - * \brief Performs non linear optimization over a non-linear function, - * using a variant of the Levenberg Marquardt algorithm. - * - * Check wikipedia for more information. - * http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm - */ -template<typename _FunctorType> -class LevenbergMarquardt : internal::no_assignment_operator -{ - public: - typedef _FunctorType FunctorType; - typedef typename FunctorType::QRSolver QRSolver; - typedef typename FunctorType::JacobianType JacobianType; - typedef typename JacobianType::Scalar Scalar; - typedef typename JacobianType::RealScalar RealScalar; - typedef typename QRSolver::StorageIndex PermIndex; - typedef Matrix<Scalar,Dynamic,1> FVectorType; - typedef PermutationMatrix<Dynamic,Dynamic> PermutationType; - public: - LevenbergMarquardt(FunctorType& functor) - : m_functor(functor),m_nfev(0),m_njev(0),m_fnorm(0.0),m_gnorm(0), - m_isInitialized(false),m_info(InvalidInput) - { - resetParameters(); - m_useExternalScaling=false; - } - - LevenbergMarquardtSpace::Status minimize(FVectorType &x); - LevenbergMarquardtSpace::Status minimizeInit(FVectorType &x); - LevenbergMarquardtSpace::Status minimizeOneStep(FVectorType &x); - LevenbergMarquardtSpace::Status lmder1( - FVectorType &x, - const Scalar tol = std::sqrt(NumTraits<Scalar>::epsilon()) - ); - static LevenbergMarquardtSpace::Status lmdif1( - FunctorType &functor, - FVectorType &x, - Index *nfev, - const Scalar tol = std::sqrt(NumTraits<Scalar>::epsilon()) - ); - - /** Sets the default parameters */ - void resetParameters() - { - using std::sqrt; - - m_factor = 100.; - m_maxfev = 400; - m_ftol = sqrt(NumTraits<RealScalar>::epsilon()); - m_xtol = sqrt(NumTraits<RealScalar>::epsilon()); - m_gtol = 0. ; - m_epsfcn = 0. ; - } - - /** Sets the tolerance for the norm of the solution vector*/ - void setXtol(RealScalar xtol) { m_xtol = xtol; } - - /** Sets the tolerance for the norm of the vector function*/ - void setFtol(RealScalar ftol) { m_ftol = ftol; } - - /** Sets the tolerance for the norm of the gradient of the error vector*/ - void setGtol(RealScalar gtol) { m_gtol = gtol; } - - /** Sets the step bound for the diagonal shift */ - void setFactor(RealScalar factor) { m_factor = factor; } - - /** Sets the error precision */ - void setEpsilon (RealScalar epsfcn) { m_epsfcn = epsfcn; } - - /** Sets the maximum number of function evaluation */ - void setMaxfev(Index maxfev) {m_maxfev = maxfev; } - - /** Use an external Scaling. If set to true, pass a nonzero diagonal to diag() */ - void setExternalScaling(bool value) {m_useExternalScaling = value; } - - /** \returns the tolerance for the norm of the solution vector */ - RealScalar xtol() const {return m_xtol; } - - /** \returns the tolerance for the norm of the vector function */ - RealScalar ftol() const {return m_ftol; } - - /** \returns the tolerance for the norm of the gradient of the error vector */ - RealScalar gtol() const {return m_gtol; } - - /** \returns the step bound for the diagonal shift */ - RealScalar factor() const {return m_factor; } - - /** \returns the error precision */ - RealScalar epsilon() const {return m_epsfcn; } - - /** \returns the maximum number of function evaluation */ - Index maxfev() const {return m_maxfev; } - - /** \returns a reference to the diagonal of the jacobian */ - FVectorType& diag() {return m_diag; } - - /** \returns the number of iterations performed */ - Index iterations() { return m_iter; } - - /** \returns the number of functions evaluation */ - Index nfev() { return m_nfev; } - - /** \returns the number of jacobian evaluation */ - Index njev() { return m_njev; } - - /** \returns the norm of current vector function */ - RealScalar fnorm() {return m_fnorm; } - - /** \returns the norm of the gradient of the error */ - RealScalar gnorm() {return m_gnorm; } - - /** \returns the LevenbergMarquardt parameter */ - RealScalar lm_param(void) { return m_par; } - - /** \returns a reference to the current vector function - */ - FVectorType& fvec() {return m_fvec; } - - /** \returns a reference to the matrix where the current Jacobian matrix is stored - */ - JacobianType& jacobian() {return m_fjac; } - - /** \returns a reference to the triangular matrix R from the QR of the jacobian matrix. - * \sa jacobian() - */ - JacobianType& matrixR() {return m_rfactor; } - - /** the permutation used in the QR factorization - */ - PermutationType permutation() {return m_permutation; } - - /** - * \brief Reports whether the minimization was successful - * \returns \c Success if the minimization was succesful, - * \c NumericalIssue if a numerical problem arises during the - * minimization process, for exemple during the QR factorization - * \c NoConvergence if the minimization did not converge after - * the maximum number of function evaluation allowed - * \c InvalidInput if the input matrix is invalid - */ - ComputationInfo info() const - { - - return m_info; - } - private: - JacobianType m_fjac; - JacobianType m_rfactor; // The triangular matrix R from the QR of the jacobian matrix m_fjac - FunctorType &m_functor; - FVectorType m_fvec, m_qtf, m_diag; - Index n; - Index m; - Index m_nfev; - Index m_njev; - RealScalar m_fnorm; // Norm of the current vector function - RealScalar m_gnorm; //Norm of the gradient of the error - RealScalar m_factor; // - Index m_maxfev; // Maximum number of function evaluation - RealScalar m_ftol; //Tolerance in the norm of the vector function - RealScalar m_xtol; // - RealScalar m_gtol; //tolerance of the norm of the error gradient - RealScalar m_epsfcn; // - Index m_iter; // Number of iterations performed - RealScalar m_delta; - bool m_useExternalScaling; - PermutationType m_permutation; - FVectorType m_wa1, m_wa2, m_wa3, m_wa4; //Temporary vectors - RealScalar m_par; - bool m_isInitialized; // Check whether the minimization step has been called - ComputationInfo m_info; -}; - -template<typename FunctorType> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType>::minimize(FVectorType &x) -{ - LevenbergMarquardtSpace::Status status = minimizeInit(x); - if (status==LevenbergMarquardtSpace::ImproperInputParameters) { - m_isInitialized = true; - return status; - } - do { -// std::cout << " uv " << x.transpose() << "\n"; - status = minimizeOneStep(x); - } while (status==LevenbergMarquardtSpace::Running); - m_isInitialized = true; - return status; -} - -template<typename FunctorType> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType>::minimizeInit(FVectorType &x) -{ - n = x.size(); - m = m_functor.values(); - - m_wa1.resize(n); m_wa2.resize(n); m_wa3.resize(n); - m_wa4.resize(m); - m_fvec.resize(m); - //FIXME Sparse Case : Allocate space for the jacobian - m_fjac.resize(m, n); -// m_fjac.reserve(VectorXi::Constant(n,5)); // FIXME Find a better alternative - if (!m_useExternalScaling) - m_diag.resize(n); - eigen_assert( (!m_useExternalScaling || m_diag.size()==n) && "When m_useExternalScaling is set, the caller must provide a valid 'm_diag'"); - m_qtf.resize(n); - - /* Function Body */ - m_nfev = 0; - m_njev = 0; - - /* check the input parameters for errors. */ - if (n <= 0 || m < n || m_ftol < 0. || m_xtol < 0. || m_gtol < 0. || m_maxfev <= 0 || m_factor <= 0.){ - m_info = InvalidInput; - return LevenbergMarquardtSpace::ImproperInputParameters; - } - - if (m_useExternalScaling) - for (Index j = 0; j < n; ++j) - if (m_diag[j] <= 0.) - { - m_info = InvalidInput; - return LevenbergMarquardtSpace::ImproperInputParameters; - } - - /* evaluate the function at the starting point */ - /* and calculate its norm. */ - m_nfev = 1; - if ( m_functor(x, m_fvec) < 0) - return LevenbergMarquardtSpace::UserAsked; - m_fnorm = m_fvec.stableNorm(); - - /* initialize levenberg-marquardt parameter and iteration counter. */ - m_par = 0.; - m_iter = 1; - - return LevenbergMarquardtSpace::NotStarted; -} - -template<typename FunctorType> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType>::lmder1( - FVectorType &x, - const Scalar tol - ) -{ - n = x.size(); - m = m_functor.values(); - - /* check the input parameters for errors. */ - if (n <= 0 || m < n || tol < 0.) - return LevenbergMarquardtSpace::ImproperInputParameters; - - resetParameters(); - m_ftol = tol; - m_xtol = tol; - m_maxfev = 100*(n+1); - - return minimize(x); -} - - -template<typename FunctorType> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType>::lmdif1( - FunctorType &functor, - FVectorType &x, - Index *nfev, - const Scalar tol - ) -{ - Index n = x.size(); - Index m = functor.values(); - - /* check the input parameters for errors. */ - if (n <= 0 || m < n || tol < 0.) - return LevenbergMarquardtSpace::ImproperInputParameters; - - NumericalDiff<FunctorType> numDiff(functor); - // embedded LevenbergMarquardt - LevenbergMarquardt<NumericalDiff<FunctorType> > lm(numDiff); - lm.setFtol(tol); - lm.setXtol(tol); - lm.setMaxfev(200*(n+1)); - - LevenbergMarquardtSpace::Status info = LevenbergMarquardtSpace::Status(lm.minimize(x)); - if (nfev) - * nfev = lm.nfev(); - return info; -} - -} // end namespace Eigen - -#endif // EIGEN_LEVENBERGMARQUARDT_H diff --git a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h deleted file mode 100644 index e5ebbcf..0000000 --- a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +++ /dev/null @@ -1,442 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009, 2010, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk> -// Copyright (C) 2011, 2013 Chen-Pang He <jdh8@ms63.hinet.net> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MATRIX_EXPONENTIAL -#define EIGEN_MATRIX_EXPONENTIAL - -#include "StemFunction.h" - -namespace Eigen { -namespace internal { - -/** \brief Scaling operator. - * - * This struct is used by CwiseUnaryOp to scale a matrix by \f$ 2^{-s} \f$. - */ -template <typename RealScalar> -struct MatrixExponentialScalingOp -{ - /** \brief Constructor. - * - * \param[in] squarings The integer \f$ s \f$ in this document. - */ - MatrixExponentialScalingOp(int squarings) : m_squarings(squarings) { } - - - /** \brief Scale a matrix coefficient. - * - * \param[in,out] x The scalar to be scaled, becoming \f$ 2^{-s} x \f$. - */ - inline const RealScalar operator() (const RealScalar& x) const - { - using std::ldexp; - return ldexp(x, -m_squarings); - } - - typedef std::complex<RealScalar> ComplexScalar; - - /** \brief Scale a matrix coefficient. - * - * \param[in,out] x The scalar to be scaled, becoming \f$ 2^{-s} x \f$. - */ - inline const ComplexScalar operator() (const ComplexScalar& x) const - { - using std::ldexp; - return ComplexScalar(ldexp(x.real(), -m_squarings), ldexp(x.imag(), -m_squarings)); - } - - private: - int m_squarings; -}; - -/** \brief Compute the (3,3)-Padé approximant to the exponential. - * - * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé - * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. - */ -template <typename MatA, typename MatU, typename MatV> -void matrix_exp_pade3(const MatA& A, MatU& U, MatV& V) -{ - typedef typename MatA::PlainObject MatrixType; - typedef typename NumTraits<typename traits<MatA>::Scalar>::Real RealScalar; - const RealScalar b[] = {120.L, 60.L, 12.L, 1.L}; - const MatrixType A2 = A * A; - const MatrixType tmp = b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols()); - U.noalias() = A * tmp; - V = b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols()); -} - -/** \brief Compute the (5,5)-Padé approximant to the exponential. - * - * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé - * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. - */ -template <typename MatA, typename MatU, typename MatV> -void matrix_exp_pade5(const MatA& A, MatU& U, MatV& V) -{ - typedef typename MatA::PlainObject MatrixType; - typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar; - const RealScalar b[] = {30240.L, 15120.L, 3360.L, 420.L, 30.L, 1.L}; - const MatrixType A2 = A * A; - const MatrixType A4 = A2 * A2; - const MatrixType tmp = b[5] * A4 + b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols()); - U.noalias() = A * tmp; - V = b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols()); -} - -/** \brief Compute the (7,7)-Padé approximant to the exponential. - * - * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé - * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. - */ -template <typename MatA, typename MatU, typename MatV> -void matrix_exp_pade7(const MatA& A, MatU& U, MatV& V) -{ - typedef typename MatA::PlainObject MatrixType; - typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar; - const RealScalar b[] = {17297280.L, 8648640.L, 1995840.L, 277200.L, 25200.L, 1512.L, 56.L, 1.L}; - const MatrixType A2 = A * A; - const MatrixType A4 = A2 * A2; - const MatrixType A6 = A4 * A2; - const MatrixType tmp = b[7] * A6 + b[5] * A4 + b[3] * A2 - + b[1] * MatrixType::Identity(A.rows(), A.cols()); - U.noalias() = A * tmp; - V = b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols()); - -} - -/** \brief Compute the (9,9)-Padé approximant to the exponential. - * - * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé - * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. - */ -template <typename MatA, typename MatU, typename MatV> -void matrix_exp_pade9(const MatA& A, MatU& U, MatV& V) -{ - typedef typename MatA::PlainObject MatrixType; - typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar; - const RealScalar b[] = {17643225600.L, 8821612800.L, 2075673600.L, 302702400.L, 30270240.L, - 2162160.L, 110880.L, 3960.L, 90.L, 1.L}; - const MatrixType A2 = A * A; - const MatrixType A4 = A2 * A2; - const MatrixType A6 = A4 * A2; - const MatrixType A8 = A6 * A2; - const MatrixType tmp = b[9] * A8 + b[7] * A6 + b[5] * A4 + b[3] * A2 - + b[1] * MatrixType::Identity(A.rows(), A.cols()); - U.noalias() = A * tmp; - V = b[8] * A8 + b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols()); -} - -/** \brief Compute the (13,13)-Padé approximant to the exponential. - * - * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé - * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. - */ -template <typename MatA, typename MatU, typename MatV> -void matrix_exp_pade13(const MatA& A, MatU& U, MatV& V) -{ - typedef typename MatA::PlainObject MatrixType; - typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar; - const RealScalar b[] = {64764752532480000.L, 32382376266240000.L, 7771770303897600.L, - 1187353796428800.L, 129060195264000.L, 10559470521600.L, 670442572800.L, - 33522128640.L, 1323241920.L, 40840800.L, 960960.L, 16380.L, 182.L, 1.L}; - const MatrixType A2 = A * A; - const MatrixType A4 = A2 * A2; - const MatrixType A6 = A4 * A2; - V = b[13] * A6 + b[11] * A4 + b[9] * A2; // used for temporary storage - MatrixType tmp = A6 * V; - tmp += b[7] * A6 + b[5] * A4 + b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols()); - U.noalias() = A * tmp; - tmp = b[12] * A6 + b[10] * A4 + b[8] * A2; - V.noalias() = A6 * tmp; - V += b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols()); -} - -/** \brief Compute the (17,17)-Padé approximant to the exponential. - * - * After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Padé - * approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$. - * - * This function activates only if your long double is double-double or quadruple. - */ -#if LDBL_MANT_DIG > 64 -template <typename MatA, typename MatU, typename MatV> -void matrix_exp_pade17(const MatA& A, MatU& U, MatV& V) -{ - typedef typename MatA::PlainObject MatrixType; - typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar; - const RealScalar b[] = {830034394580628357120000.L, 415017197290314178560000.L, - 100610229646136770560000.L, 15720348382208870400000.L, - 1774878043152614400000.L, 153822763739893248000.L, 10608466464820224000.L, - 595373117923584000.L, 27563570274240000.L, 1060137318240000.L, - 33924394183680.L, 899510451840.L, 19554575040.L, 341863200.L, 4651200.L, - 46512.L, 306.L, 1.L}; - const MatrixType A2 = A * A; - const MatrixType A4 = A2 * A2; - const MatrixType A6 = A4 * A2; - const MatrixType A8 = A4 * A4; - V = b[17] * A8 + b[15] * A6 + b[13] * A4 + b[11] * A2; // used for temporary storage - MatrixType tmp = A8 * V; - tmp += b[9] * A8 + b[7] * A6 + b[5] * A4 + b[3] * A2 - + b[1] * MatrixType::Identity(A.rows(), A.cols()); - U.noalias() = A * tmp; - tmp = b[16] * A8 + b[14] * A6 + b[12] * A4 + b[10] * A2; - V.noalias() = tmp * A8; - V += b[8] * A8 + b[6] * A6 + b[4] * A4 + b[2] * A2 - + b[0] * MatrixType::Identity(A.rows(), A.cols()); -} -#endif - -template <typename MatrixType, typename RealScalar = typename NumTraits<typename traits<MatrixType>::Scalar>::Real> -struct matrix_exp_computeUV -{ - /** \brief Compute Padé approximant to the exponential. - * - * Computes \c U, \c V and \c squarings such that \f$ (V+U)(V-U)^{-1} \f$ is a Padé - * approximant of \f$ \exp(2^{-\mbox{squarings}}M) \f$ around \f$ M = 0 \f$, where \f$ M \f$ - * denotes the matrix \c arg. The degree of the Padé approximant and the value of squarings - * are chosen such that the approximation error is no more than the round-off error. - */ - static void run(const MatrixType& arg, MatrixType& U, MatrixType& V, int& squarings); -}; - -template <typename MatrixType> -struct matrix_exp_computeUV<MatrixType, float> -{ - template <typename ArgType> - static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) - { - using std::frexp; - using std::pow; - const float l1norm = arg.cwiseAbs().colwise().sum().maxCoeff(); - squarings = 0; - if (l1norm < 4.258730016922831e-001f) { - matrix_exp_pade3(arg, U, V); - } else if (l1norm < 1.880152677804762e+000f) { - matrix_exp_pade5(arg, U, V); - } else { - const float maxnorm = 3.925724783138660f; - frexp(l1norm / maxnorm, &squarings); - if (squarings < 0) squarings = 0; - MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<float>(squarings)); - matrix_exp_pade7(A, U, V); - } - } -}; - -template <typename MatrixType> -struct matrix_exp_computeUV<MatrixType, double> -{ - typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar; - template <typename ArgType> - static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) - { - using std::frexp; - using std::pow; - const RealScalar l1norm = arg.cwiseAbs().colwise().sum().maxCoeff(); - squarings = 0; - if (l1norm < 1.495585217958292e-002) { - matrix_exp_pade3(arg, U, V); - } else if (l1norm < 2.539398330063230e-001) { - matrix_exp_pade5(arg, U, V); - } else if (l1norm < 9.504178996162932e-001) { - matrix_exp_pade7(arg, U, V); - } else if (l1norm < 2.097847961257068e+000) { - matrix_exp_pade9(arg, U, V); - } else { - const RealScalar maxnorm = 5.371920351148152; - frexp(l1norm / maxnorm, &squarings); - if (squarings < 0) squarings = 0; - MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<RealScalar>(squarings)); - matrix_exp_pade13(A, U, V); - } - } -}; - -template <typename MatrixType> -struct matrix_exp_computeUV<MatrixType, long double> -{ - template <typename ArgType> - static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) - { -#if LDBL_MANT_DIG == 53 // double precision - matrix_exp_computeUV<MatrixType, double>::run(arg, U, V, squarings); - -#else - - using std::frexp; - using std::pow; - const long double l1norm = arg.cwiseAbs().colwise().sum().maxCoeff(); - squarings = 0; - -#if LDBL_MANT_DIG <= 64 // extended precision - - if (l1norm < 4.1968497232266989671e-003L) { - matrix_exp_pade3(arg, U, V); - } else if (l1norm < 1.1848116734693823091e-001L) { - matrix_exp_pade5(arg, U, V); - } else if (l1norm < 5.5170388480686700274e-001L) { - matrix_exp_pade7(arg, U, V); - } else if (l1norm < 1.3759868875587845383e+000L) { - matrix_exp_pade9(arg, U, V); - } else { - const long double maxnorm = 4.0246098906697353063L; - frexp(l1norm / maxnorm, &squarings); - if (squarings < 0) squarings = 0; - MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings)); - matrix_exp_pade13(A, U, V); - } - -#elif LDBL_MANT_DIG <= 106 // double-double - - if (l1norm < 3.2787892205607026992947488108213e-005L) { - matrix_exp_pade3(arg, U, V); - } else if (l1norm < 6.4467025060072760084130906076332e-003L) { - matrix_exp_pade5(arg, U, V); - } else if (l1norm < 6.8988028496595374751374122881143e-002L) { - matrix_exp_pade7(arg, U, V); - } else if (l1norm < 2.7339737518502231741495857201670e-001L) { - matrix_exp_pade9(arg, U, V); - } else if (l1norm < 1.3203382096514474905666448850278e+000L) { - matrix_exp_pade13(arg, U, V); - } else { - const long double maxnorm = 3.2579440895405400856599663723517L; - frexp(l1norm / maxnorm, &squarings); - if (squarings < 0) squarings = 0; - MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings)); - matrix_exp_pade17(A, U, V); - } - -#elif LDBL_MANT_DIG <= 112 // quadruple precison - - if (l1norm < 1.639394610288918690547467954466970e-005L) { - matrix_exp_pade3(arg, U, V); - } else if (l1norm < 4.253237712165275566025884344433009e-003L) { - matrix_exp_pade5(arg, U, V); - } else if (l1norm < 5.125804063165764409885122032933142e-002L) { - matrix_exp_pade7(arg, U, V); - } else if (l1norm < 2.170000765161155195453205651889853e-001L) { - matrix_exp_pade9(arg, U, V); - } else if (l1norm < 1.125358383453143065081397882891878e+000L) { - matrix_exp_pade13(arg, U, V); - } else { - const long double maxnorm = 2.884233277829519311757165057717815L; - frexp(l1norm / maxnorm, &squarings); - if (squarings < 0) squarings = 0; - MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings)); - matrix_exp_pade17(A, U, V); - } - -#else - - // this case should be handled in compute() - eigen_assert(false && "Bug in MatrixExponential"); - -#endif -#endif // LDBL_MANT_DIG - } -}; - -template<typename T> struct is_exp_known_type : false_type {}; -template<> struct is_exp_known_type<float> : true_type {}; -template<> struct is_exp_known_type<double> : true_type {}; -#if LDBL_MANT_DIG <= 112 -template<> struct is_exp_known_type<long double> : true_type {}; -#endif - -template <typename ArgType, typename ResultType> -void matrix_exp_compute(const ArgType& arg, ResultType &result, true_type) // natively supported scalar type -{ - typedef typename ArgType::PlainObject MatrixType; - MatrixType U, V; - int squarings; - matrix_exp_computeUV<MatrixType>::run(arg, U, V, squarings); // Pade approximant is (U+V) / (-U+V) - MatrixType numer = U + V; - MatrixType denom = -U + V; - result = denom.partialPivLu().solve(numer); - for (int i=0; i<squarings; i++) - result *= result; // undo scaling by repeated squaring -} - - -/* Computes the matrix exponential - * - * \param arg argument of matrix exponential (should be plain object) - * \param result variable in which result will be stored - */ -template <typename ArgType, typename ResultType> -void matrix_exp_compute(const ArgType& arg, ResultType &result, false_type) // default -{ - typedef typename ArgType::PlainObject MatrixType; - typedef typename traits<MatrixType>::Scalar Scalar; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename std::complex<RealScalar> ComplexScalar; - result = arg.matrixFunction(internal::stem_function_exp<ComplexScalar>); -} - -} // end namespace Eigen::internal - -/** \ingroup MatrixFunctions_Module - * - * \brief Proxy for the matrix exponential of some matrix (expression). - * - * \tparam Derived Type of the argument to the matrix exponential. - * - * This class holds the argument to the matrix exponential until it is assigned or evaluated for - * some other reason (so the argument should not be changed in the meantime). It is the return type - * of MatrixBase::exp() and most of the time this is the only way it is used. - */ -template<typename Derived> struct MatrixExponentialReturnValue -: public ReturnByValue<MatrixExponentialReturnValue<Derived> > -{ - typedef typename Derived::Index Index; - public: - /** \brief Constructor. - * - * \param src %Matrix (expression) forming the argument of the matrix exponential. - */ - MatrixExponentialReturnValue(const Derived& src) : m_src(src) { } - - /** \brief Compute the matrix exponential. - * - * \param result the matrix exponential of \p src in the constructor. - */ - template <typename ResultType> - inline void evalTo(ResultType& result) const - { - const typename internal::nested_eval<Derived, 10>::type tmp(m_src); - internal::matrix_exp_compute(tmp, result, internal::is_exp_known_type<typename Derived::Scalar>()); - } - - Index rows() const { return m_src.rows(); } - Index cols() const { return m_src.cols(); } - - protected: - const typename internal::ref_selector<Derived>::type m_src; -}; - -namespace internal { -template<typename Derived> -struct traits<MatrixExponentialReturnValue<Derived> > -{ - typedef typename Derived::PlainObject ReturnType; -}; -} - -template <typename Derived> -const MatrixExponentialReturnValue<Derived> MatrixBase<Derived>::exp() const -{ - eigen_assert(rows() == cols()); - return MatrixExponentialReturnValue<Derived>(derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_MATRIX_EXPONENTIAL diff --git a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h deleted file mode 100644 index 3df8239..0000000 --- a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +++ /dev/null @@ -1,580 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009-2011, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MATRIX_FUNCTION_H -#define EIGEN_MATRIX_FUNCTION_H - -#include "StemFunction.h" - - -namespace Eigen { - -namespace internal { - -/** \brief Maximum distance allowed between eigenvalues to be considered "close". */ -static const float matrix_function_separation = 0.1f; - -/** \ingroup MatrixFunctions_Module - * \class MatrixFunctionAtomic - * \brief Helper class for computing matrix functions of atomic matrices. - * - * Here, an atomic matrix is a triangular matrix whose diagonal entries are close to each other. - */ -template <typename MatrixType> -class MatrixFunctionAtomic -{ - public: - - typedef typename MatrixType::Scalar Scalar; - typedef typename stem_function<Scalar>::type StemFunction; - - /** \brief Constructor - * \param[in] f matrix function to compute. - */ - MatrixFunctionAtomic(StemFunction f) : m_f(f) { } - - /** \brief Compute matrix function of atomic matrix - * \param[in] A argument of matrix function, should be upper triangular and atomic - * \returns f(A), the matrix function evaluated at the given matrix - */ - MatrixType compute(const MatrixType& A); - - private: - StemFunction* m_f; -}; - -template <typename MatrixType> -typename NumTraits<typename MatrixType::Scalar>::Real matrix_function_compute_mu(const MatrixType& A) -{ - typedef typename plain_col_type<MatrixType>::type VectorType; - typename MatrixType::Index rows = A.rows(); - const MatrixType N = MatrixType::Identity(rows, rows) - A; - VectorType e = VectorType::Ones(rows); - N.template triangularView<Upper>().solveInPlace(e); - return e.cwiseAbs().maxCoeff(); -} - -template <typename MatrixType> -MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A) -{ - // TODO: Use that A is upper triangular - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef typename MatrixType::Index Index; - Index rows = A.rows(); - Scalar avgEival = A.trace() / Scalar(RealScalar(rows)); - MatrixType Ashifted = A - avgEival * MatrixType::Identity(rows, rows); - RealScalar mu = matrix_function_compute_mu(Ashifted); - MatrixType F = m_f(avgEival, 0) * MatrixType::Identity(rows, rows); - MatrixType P = Ashifted; - MatrixType Fincr; - for (Index s = 1; s < 1.1 * rows + 10; s++) { // upper limit is fairly arbitrary - Fincr = m_f(avgEival, static_cast<int>(s)) * P; - F += Fincr; - P = Scalar(RealScalar(1.0/(s + 1))) * P * Ashifted; - - // test whether Taylor series converged - const RealScalar F_norm = F.cwiseAbs().rowwise().sum().maxCoeff(); - const RealScalar Fincr_norm = Fincr.cwiseAbs().rowwise().sum().maxCoeff(); - if (Fincr_norm < NumTraits<Scalar>::epsilon() * F_norm) { - RealScalar delta = 0; - RealScalar rfactorial = 1; - for (Index r = 0; r < rows; r++) { - RealScalar mx = 0; - for (Index i = 0; i < rows; i++) - mx = (std::max)(mx, std::abs(m_f(Ashifted(i, i) + avgEival, static_cast<int>(s+r)))); - if (r != 0) - rfactorial *= RealScalar(r); - delta = (std::max)(delta, mx / rfactorial); - } - const RealScalar P_norm = P.cwiseAbs().rowwise().sum().maxCoeff(); - if (mu * delta * P_norm < NumTraits<Scalar>::epsilon() * F_norm) // series converged - break; - } - } - return F; -} - -/** \brief Find cluster in \p clusters containing some value - * \param[in] key Value to find - * \returns Iterator to cluster containing \p key, or \c clusters.end() if no cluster in \p m_clusters - * contains \p key. - */ -template <typename Index, typename ListOfClusters> -typename ListOfClusters::iterator matrix_function_find_cluster(Index key, ListOfClusters& clusters) -{ - typename std::list<Index>::iterator j; - for (typename ListOfClusters::iterator i = clusters.begin(); i != clusters.end(); ++i) { - j = std::find(i->begin(), i->end(), key); - if (j != i->end()) - return i; - } - return clusters.end(); -} - -/** \brief Partition eigenvalues in clusters of ei'vals close to each other - * - * \param[in] eivals Eigenvalues - * \param[out] clusters Resulting partition of eigenvalues - * - * The partition satisfies the following two properties: - * # Any eigenvalue in a certain cluster is at most matrix_function_separation() away from another eigenvalue - * in the same cluster. - * # The distance between two eigenvalues in different clusters is more than matrix_function_separation(). - * The implementation follows Algorithm 4.1 in the paper of Davies and Higham. - */ -template <typename EivalsType, typename Cluster> -void matrix_function_partition_eigenvalues(const EivalsType& eivals, std::list<Cluster>& clusters) -{ - typedef typename EivalsType::Index Index; - typedef typename EivalsType::RealScalar RealScalar; - for (Index i=0; i<eivals.rows(); ++i) { - // Find cluster containing i-th ei'val, adding a new cluster if necessary - typename std::list<Cluster>::iterator qi = matrix_function_find_cluster(i, clusters); - if (qi == clusters.end()) { - Cluster l; - l.push_back(i); - clusters.push_back(l); - qi = clusters.end(); - --qi; - } - - // Look for other element to add to the set - for (Index j=i+1; j<eivals.rows(); ++j) { - if (abs(eivals(j) - eivals(i)) <= RealScalar(matrix_function_separation) - && std::find(qi->begin(), qi->end(), j) == qi->end()) { - typename std::list<Cluster>::iterator qj = matrix_function_find_cluster(j, clusters); - if (qj == clusters.end()) { - qi->push_back(j); - } else { - qi->insert(qi->end(), qj->begin(), qj->end()); - clusters.erase(qj); - } - } - } - } -} - -/** \brief Compute size of each cluster given a partitioning */ -template <typename ListOfClusters, typename Index> -void matrix_function_compute_cluster_size(const ListOfClusters& clusters, Matrix<Index, Dynamic, 1>& clusterSize) -{ - const Index numClusters = static_cast<Index>(clusters.size()); - clusterSize.setZero(numClusters); - Index clusterIndex = 0; - for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) { - clusterSize[clusterIndex] = cluster->size(); - ++clusterIndex; - } -} - -/** \brief Compute start of each block using clusterSize */ -template <typename VectorType> -void matrix_function_compute_block_start(const VectorType& clusterSize, VectorType& blockStart) -{ - blockStart.resize(clusterSize.rows()); - blockStart(0) = 0; - for (typename VectorType::Index i = 1; i < clusterSize.rows(); i++) { - blockStart(i) = blockStart(i-1) + clusterSize(i-1); - } -} - -/** \brief Compute mapping of eigenvalue indices to cluster indices */ -template <typename EivalsType, typename ListOfClusters, typename VectorType> -void matrix_function_compute_map(const EivalsType& eivals, const ListOfClusters& clusters, VectorType& eivalToCluster) -{ - typedef typename EivalsType::Index Index; - eivalToCluster.resize(eivals.rows()); - Index clusterIndex = 0; - for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) { - for (Index i = 0; i < eivals.rows(); ++i) { - if (std::find(cluster->begin(), cluster->end(), i) != cluster->end()) { - eivalToCluster[i] = clusterIndex; - } - } - ++clusterIndex; - } -} - -/** \brief Compute permutation which groups ei'vals in same cluster together */ -template <typename DynVectorType, typename VectorType> -void matrix_function_compute_permutation(const DynVectorType& blockStart, const DynVectorType& eivalToCluster, VectorType& permutation) -{ - typedef typename VectorType::Index Index; - DynVectorType indexNextEntry = blockStart; - permutation.resize(eivalToCluster.rows()); - for (Index i = 0; i < eivalToCluster.rows(); i++) { - Index cluster = eivalToCluster[i]; - permutation[i] = indexNextEntry[cluster]; - ++indexNextEntry[cluster]; - } -} - -/** \brief Permute Schur decomposition in U and T according to permutation */ -template <typename VectorType, typename MatrixType> -void matrix_function_permute_schur(VectorType& permutation, MatrixType& U, MatrixType& T) -{ - typedef typename VectorType::Index Index; - for (Index i = 0; i < permutation.rows() - 1; i++) { - Index j; - for (j = i; j < permutation.rows(); j++) { - if (permutation(j) == i) break; - } - eigen_assert(permutation(j) == i); - for (Index k = j-1; k >= i; k--) { - JacobiRotation<typename MatrixType::Scalar> rotation; - rotation.makeGivens(T(k, k+1), T(k+1, k+1) - T(k, k)); - T.applyOnTheLeft(k, k+1, rotation.adjoint()); - T.applyOnTheRight(k, k+1, rotation); - U.applyOnTheRight(k, k+1, rotation); - std::swap(permutation.coeffRef(k), permutation.coeffRef(k+1)); - } - } -} - -/** \brief Compute block diagonal part of matrix function. - * - * This routine computes the matrix function applied to the block diagonal part of \p T (which should be - * upper triangular), with the blocking given by \p blockStart and \p clusterSize. The matrix function of - * each diagonal block is computed by \p atomic. The off-diagonal parts of \p fT are set to zero. - */ -template <typename MatrixType, typename AtomicType, typename VectorType> -void matrix_function_compute_block_atomic(const MatrixType& T, AtomicType& atomic, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT) -{ - fT.setZero(T.rows(), T.cols()); - for (typename VectorType::Index i = 0; i < clusterSize.rows(); ++i) { - fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)) - = atomic.compute(T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i))); - } -} - -/** \brief Solve a triangular Sylvester equation AX + XB = C - * - * \param[in] A the matrix A; should be square and upper triangular - * \param[in] B the matrix B; should be square and upper triangular - * \param[in] C the matrix C; should have correct size. - * - * \returns the solution X. - * - * If A is m-by-m and B is n-by-n, then both C and X are m-by-n. The (i,j)-th component of the Sylvester - * equation is - * \f[ - * \sum_{k=i}^m A_{ik} X_{kj} + \sum_{k=1}^j X_{ik} B_{kj} = C_{ij}. - * \f] - * This can be re-arranged to yield: - * \f[ - * X_{ij} = \frac{1}{A_{ii} + B_{jj}} \Bigl( C_{ij} - * - \sum_{k=i+1}^m A_{ik} X_{kj} - \sum_{k=1}^{j-1} X_{ik} B_{kj} \Bigr). - * \f] - * It is assumed that A and B are such that the numerator is never zero (otherwise the Sylvester equation - * does not have a unique solution). In that case, these equations can be evaluated in the order - * \f$ i=m,\ldots,1 \f$ and \f$ j=1,\ldots,n \f$. - */ -template <typename MatrixType> -MatrixType matrix_function_solve_triangular_sylvester(const MatrixType& A, const MatrixType& B, const MatrixType& C) -{ - eigen_assert(A.rows() == A.cols()); - eigen_assert(A.isUpperTriangular()); - eigen_assert(B.rows() == B.cols()); - eigen_assert(B.isUpperTriangular()); - eigen_assert(C.rows() == A.rows()); - eigen_assert(C.cols() == B.rows()); - - typedef typename MatrixType::Index Index; - typedef typename MatrixType::Scalar Scalar; - - Index m = A.rows(); - Index n = B.rows(); - MatrixType X(m, n); - - for (Index i = m - 1; i >= 0; --i) { - for (Index j = 0; j < n; ++j) { - - // Compute AX = \sum_{k=i+1}^m A_{ik} X_{kj} - Scalar AX; - if (i == m - 1) { - AX = 0; - } else { - Matrix<Scalar,1,1> AXmatrix = A.row(i).tail(m-1-i) * X.col(j).tail(m-1-i); - AX = AXmatrix(0,0); - } - - // Compute XB = \sum_{k=1}^{j-1} X_{ik} B_{kj} - Scalar XB; - if (j == 0) { - XB = 0; - } else { - Matrix<Scalar,1,1> XBmatrix = X.row(i).head(j) * B.col(j).head(j); - XB = XBmatrix(0,0); - } - - X(i,j) = (C(i,j) - AX - XB) / (A(i,i) + B(j,j)); - } - } - return X; -} - -/** \brief Compute part of matrix function above block diagonal. - * - * This routine completes the computation of \p fT, denoting a matrix function applied to the triangular - * matrix \p T. It assumes that the block diagonal part of \p fT has already been computed. The part below - * the diagonal is zero, because \p T is upper triangular. - */ -template <typename MatrixType, typename VectorType> -void matrix_function_compute_above_diagonal(const MatrixType& T, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT) -{ - typedef internal::traits<MatrixType> Traits; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - static const int RowsAtCompileTime = Traits::RowsAtCompileTime; - static const int ColsAtCompileTime = Traits::ColsAtCompileTime; - static const int Options = MatrixType::Options; - typedef Matrix<Scalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType; - - for (Index k = 1; k < clusterSize.rows(); k++) { - for (Index i = 0; i < clusterSize.rows() - k; i++) { - // compute (i, i+k) block - DynMatrixType A = T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)); - DynMatrixType B = -T.block(blockStart(i+k), blockStart(i+k), clusterSize(i+k), clusterSize(i+k)); - DynMatrixType C = fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)) - * T.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k)); - C -= T.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k)) - * fT.block(blockStart(i+k), blockStart(i+k), clusterSize(i+k), clusterSize(i+k)); - for (Index m = i + 1; m < i + k; m++) { - C += fT.block(blockStart(i), blockStart(m), clusterSize(i), clusterSize(m)) - * T.block(blockStart(m), blockStart(i+k), clusterSize(m), clusterSize(i+k)); - C -= T.block(blockStart(i), blockStart(m), clusterSize(i), clusterSize(m)) - * fT.block(blockStart(m), blockStart(i+k), clusterSize(m), clusterSize(i+k)); - } - fT.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k)) - = matrix_function_solve_triangular_sylvester(A, B, C); - } - } -} - -/** \ingroup MatrixFunctions_Module - * \brief Class for computing matrix functions. - * \tparam MatrixType type of the argument of the matrix function, - * expected to be an instantiation of the Matrix class template. - * \tparam AtomicType type for computing matrix function of atomic blocks. - * \tparam IsComplex used internally to select correct specialization. - * - * This class implements the Schur-Parlett algorithm for computing matrix functions. The spectrum of the - * matrix is divided in clustered of eigenvalues that lies close together. This class delegates the - * computation of the matrix function on every block corresponding to these clusters to an object of type - * \p AtomicType and uses these results to compute the matrix function of the whole matrix. The class - * \p AtomicType should have a \p compute() member function for computing the matrix function of a block. - * - * \sa class MatrixFunctionAtomic, class MatrixLogarithmAtomic - */ -template <typename MatrixType, int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex> -struct matrix_function_compute -{ - /** \brief Compute the matrix function. - * - * \param[in] A argument of matrix function, should be a square matrix. - * \param[in] atomic class for computing matrix function of atomic blocks. - * \param[out] result the function \p f applied to \p A, as - * specified in the constructor. - * - * See MatrixBase::matrixFunction() for details on how this computation - * is implemented. - */ - template <typename AtomicType, typename ResultType> - static void run(const MatrixType& A, AtomicType& atomic, ResultType &result); -}; - -/** \internal \ingroup MatrixFunctions_Module - * \brief Partial specialization of MatrixFunction for real matrices - * - * This converts the real matrix to a complex matrix, compute the matrix function of that matrix, and then - * converts the result back to a real matrix. - */ -template <typename MatrixType> -struct matrix_function_compute<MatrixType, 0> -{ - template <typename MatA, typename AtomicType, typename ResultType> - static void run(const MatA& A, AtomicType& atomic, ResultType &result) - { - typedef internal::traits<MatrixType> Traits; - typedef typename Traits::Scalar Scalar; - static const int Rows = Traits::RowsAtCompileTime, Cols = Traits::ColsAtCompileTime; - static const int MaxRows = Traits::MaxRowsAtCompileTime, MaxCols = Traits::MaxColsAtCompileTime; - - typedef std::complex<Scalar> ComplexScalar; - typedef Matrix<ComplexScalar, Rows, Cols, 0, MaxRows, MaxCols> ComplexMatrix; - - ComplexMatrix CA = A.template cast<ComplexScalar>(); - ComplexMatrix Cresult; - matrix_function_compute<ComplexMatrix>::run(CA, atomic, Cresult); - result = Cresult.real(); - } -}; - -/** \internal \ingroup MatrixFunctions_Module - * \brief Partial specialization of MatrixFunction for complex matrices - */ -template <typename MatrixType> -struct matrix_function_compute<MatrixType, 1> -{ - template <typename MatA, typename AtomicType, typename ResultType> - static void run(const MatA& A, AtomicType& atomic, ResultType &result) - { - typedef internal::traits<MatrixType> Traits; - - // compute Schur decomposition of A - const ComplexSchur<MatrixType> schurOfA(A); - MatrixType T = schurOfA.matrixT(); - MatrixType U = schurOfA.matrixU(); - - // partition eigenvalues into clusters of ei'vals "close" to each other - std::list<std::list<Index> > clusters; - matrix_function_partition_eigenvalues(T.diagonal(), clusters); - - // compute size of each cluster - Matrix<Index, Dynamic, 1> clusterSize; - matrix_function_compute_cluster_size(clusters, clusterSize); - - // blockStart[i] is row index at which block corresponding to i-th cluster starts - Matrix<Index, Dynamic, 1> blockStart; - matrix_function_compute_block_start(clusterSize, blockStart); - - // compute map so that eivalToCluster[i] = j means that i-th ei'val is in j-th cluster - Matrix<Index, Dynamic, 1> eivalToCluster; - matrix_function_compute_map(T.diagonal(), clusters, eivalToCluster); - - // compute permutation which groups ei'vals in same cluster together - Matrix<Index, Traits::RowsAtCompileTime, 1> permutation; - matrix_function_compute_permutation(blockStart, eivalToCluster, permutation); - - // permute Schur decomposition - matrix_function_permute_schur(permutation, U, T); - - // compute result - MatrixType fT; // matrix function applied to T - matrix_function_compute_block_atomic(T, atomic, blockStart, clusterSize, fT); - matrix_function_compute_above_diagonal(T, blockStart, clusterSize, fT); - result = U * (fT.template triangularView<Upper>() * U.adjoint()); - } -}; - -} // end of namespace internal - -/** \ingroup MatrixFunctions_Module - * - * \brief Proxy for the matrix function of some matrix (expression). - * - * \tparam Derived Type of the argument to the matrix function. - * - * This class holds the argument to the matrix function until it is assigned or evaluated for some other - * reason (so the argument should not be changed in the meantime). It is the return type of - * matrixBase::matrixFunction() and related functions and most of the time this is the only way it is used. - */ -template<typename Derived> class MatrixFunctionReturnValue -: public ReturnByValue<MatrixFunctionReturnValue<Derived> > -{ - public: - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; - typedef typename internal::stem_function<Scalar>::type StemFunction; - - protected: - typedef typename internal::ref_selector<Derived>::type DerivedNested; - - public: - - /** \brief Constructor. - * - * \param[in] A %Matrix (expression) forming the argument of the matrix function. - * \param[in] f Stem function for matrix function under consideration. - */ - MatrixFunctionReturnValue(const Derived& A, StemFunction f) : m_A(A), m_f(f) { } - - /** \brief Compute the matrix function. - * - * \param[out] result \p f applied to \p A, where \p f and \p A are as in the constructor. - */ - template <typename ResultType> - inline void evalTo(ResultType& result) const - { - typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType; - typedef typename internal::remove_all<NestedEvalType>::type NestedEvalTypeClean; - typedef internal::traits<NestedEvalTypeClean> Traits; - static const int RowsAtCompileTime = Traits::RowsAtCompileTime; - static const int ColsAtCompileTime = Traits::ColsAtCompileTime; - typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; - typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType; - - typedef internal::MatrixFunctionAtomic<DynMatrixType> AtomicType; - AtomicType atomic(m_f); - - internal::matrix_function_compute<typename NestedEvalTypeClean::PlainObject>::run(m_A, atomic, result); - } - - Index rows() const { return m_A.rows(); } - Index cols() const { return m_A.cols(); } - - private: - const DerivedNested m_A; - StemFunction *m_f; -}; - -namespace internal { -template<typename Derived> -struct traits<MatrixFunctionReturnValue<Derived> > -{ - typedef typename Derived::PlainObject ReturnType; -}; -} - - -/********** MatrixBase methods **********/ - - -template <typename Derived> -const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::matrixFunction(typename internal::stem_function<typename internal::traits<Derived>::Scalar>::type f) const -{ - eigen_assert(rows() == cols()); - return MatrixFunctionReturnValue<Derived>(derived(), f); -} - -template <typename Derived> -const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::sin() const -{ - eigen_assert(rows() == cols()); - typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar; - return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_sin<ComplexScalar>); -} - -template <typename Derived> -const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::cos() const -{ - eigen_assert(rows() == cols()); - typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar; - return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_cos<ComplexScalar>); -} - -template <typename Derived> -const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::sinh() const -{ - eigen_assert(rows() == cols()); - typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar; - return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_sinh<ComplexScalar>); -} - -template <typename Derived> -const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::cosh() const -{ - eigen_assert(rows() == cols()); - typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar; - return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_cosh<ComplexScalar>); -} - -} // end namespace Eigen - -#endif // EIGEN_MATRIX_FUNCTION_H diff --git a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h deleted file mode 100644 index cf5fffa..0000000 --- a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +++ /dev/null @@ -1,373 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk> -// Copyright (C) 2011 Chen-Pang He <jdh8@ms63.hinet.net> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MATRIX_LOGARITHM -#define EIGEN_MATRIX_LOGARITHM - -namespace Eigen { - -namespace internal { - -template <typename Scalar> -struct matrix_log_min_pade_degree -{ - static const int value = 3; -}; - -template <typename Scalar> -struct matrix_log_max_pade_degree -{ - typedef typename NumTraits<Scalar>::Real RealScalar; - static const int value = std::numeric_limits<RealScalar>::digits<= 24? 5: // single precision - std::numeric_limits<RealScalar>::digits<= 53? 7: // double precision - std::numeric_limits<RealScalar>::digits<= 64? 8: // extended precision - std::numeric_limits<RealScalar>::digits<=106? 10: // double-double - 11; // quadruple precision -}; - -/** \brief Compute logarithm of 2x2 triangular matrix. */ -template <typename MatrixType> -void matrix_log_compute_2x2(const MatrixType& A, MatrixType& result) -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - using std::abs; - using std::ceil; - using std::imag; - using std::log; - - Scalar logA00 = log(A(0,0)); - Scalar logA11 = log(A(1,1)); - - result(0,0) = logA00; - result(1,0) = Scalar(0); - result(1,1) = logA11; - - Scalar y = A(1,1) - A(0,0); - if (y==Scalar(0)) - { - result(0,1) = A(0,1) / A(0,0); - } - else if ((abs(A(0,0)) < RealScalar(0.5)*abs(A(1,1))) || (abs(A(0,0)) > 2*abs(A(1,1)))) - { - result(0,1) = A(0,1) * (logA11 - logA00) / y; - } - else - { - // computation in previous branch is inaccurate if A(1,1) \approx A(0,0) - int unwindingNumber = static_cast<int>(ceil((imag(logA11 - logA00) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI))); - result(0,1) = A(0,1) * (numext::log1p(y/A(0,0)) + Scalar(0,2*EIGEN_PI*unwindingNumber)) / y; - } -} - -/* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = float) */ -inline int matrix_log_get_pade_degree(float normTminusI) -{ - const float maxNormForPade[] = { 2.5111573934555054e-1 /* degree = 3 */ , 4.0535837411880493e-1, - 5.3149729967117310e-1 }; - const int minPadeDegree = matrix_log_min_pade_degree<float>::value; - const int maxPadeDegree = matrix_log_max_pade_degree<float>::value; - int degree = minPadeDegree; - for (; degree <= maxPadeDegree; ++degree) - if (normTminusI <= maxNormForPade[degree - minPadeDegree]) - break; - return degree; -} - -/* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = double) */ -inline int matrix_log_get_pade_degree(double normTminusI) -{ - const double maxNormForPade[] = { 1.6206284795015624e-2 /* degree = 3 */ , 5.3873532631381171e-2, - 1.1352802267628681e-1, 1.8662860613541288e-1, 2.642960831111435e-1 }; - const int minPadeDegree = matrix_log_min_pade_degree<double>::value; - const int maxPadeDegree = matrix_log_max_pade_degree<double>::value; - int degree = minPadeDegree; - for (; degree <= maxPadeDegree; ++degree) - if (normTminusI <= maxNormForPade[degree - minPadeDegree]) - break; - return degree; -} - -/* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = long double) */ -inline int matrix_log_get_pade_degree(long double normTminusI) -{ -#if LDBL_MANT_DIG == 53 // double precision - const long double maxNormForPade[] = { 1.6206284795015624e-2L /* degree = 3 */ , 5.3873532631381171e-2L, - 1.1352802267628681e-1L, 1.8662860613541288e-1L, 2.642960831111435e-1L }; -#elif LDBL_MANT_DIG <= 64 // extended precision - const long double maxNormForPade[] = { 5.48256690357782863103e-3L /* degree = 3 */, 2.34559162387971167321e-2L, - 5.84603923897347449857e-2L, 1.08486423756725170223e-1L, 1.68385767881294446649e-1L, - 2.32777776523703892094e-1L }; -#elif LDBL_MANT_DIG <= 106 // double-double - const long double maxNormForPade[] = { 8.58970550342939562202529664318890e-5L /* degree = 3 */, - 9.34074328446359654039446552677759e-4L, 4.26117194647672175773064114582860e-3L, - 1.21546224740281848743149666560464e-2L, 2.61100544998339436713088248557444e-2L, - 4.66170074627052749243018566390567e-2L, 7.32585144444135027565872014932387e-2L, - 1.05026503471351080481093652651105e-1L }; -#else // quadruple precision - const long double maxNormForPade[] = { 4.7419931187193005048501568167858103e-5L /* degree = 3 */, - 5.8853168473544560470387769480192666e-4L, 2.9216120366601315391789493628113520e-3L, - 8.8415758124319434347116734705174308e-3L, 1.9850836029449446668518049562565291e-2L, - 3.6688019729653446926585242192447447e-2L, 5.9290962294020186998954055264528393e-2L, - 8.6998436081634343903250580992127677e-2L, 1.1880960220216759245467951592883642e-1L }; -#endif - const int minPadeDegree = matrix_log_min_pade_degree<long double>::value; - const int maxPadeDegree = matrix_log_max_pade_degree<long double>::value; - int degree = minPadeDegree; - for (; degree <= maxPadeDegree; ++degree) - if (normTminusI <= maxNormForPade[degree - minPadeDegree]) - break; - return degree; -} - -/* \brief Compute Pade approximation to matrix logarithm */ -template <typename MatrixType> -void matrix_log_compute_pade(MatrixType& result, const MatrixType& T, int degree) -{ - typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; - const int minPadeDegree = 3; - const int maxPadeDegree = 11; - assert(degree >= minPadeDegree && degree <= maxPadeDegree); - - const RealScalar nodes[][maxPadeDegree] = { - { 0.1127016653792583114820734600217600L, 0.5000000000000000000000000000000000L, // degree 3 - 0.8872983346207416885179265399782400L }, - { 0.0694318442029737123880267555535953L, 0.3300094782075718675986671204483777L, // degree 4 - 0.6699905217924281324013328795516223L, 0.9305681557970262876119732444464048L }, - { 0.0469100770306680036011865608503035L, 0.2307653449471584544818427896498956L, // degree 5 - 0.5000000000000000000000000000000000L, 0.7692346550528415455181572103501044L, - 0.9530899229693319963988134391496965L }, - { 0.0337652428984239860938492227530027L, 0.1693953067668677431693002024900473L, // degree 6 - 0.3806904069584015456847491391596440L, 0.6193095930415984543152508608403560L, - 0.8306046932331322568306997975099527L, 0.9662347571015760139061507772469973L }, - { 0.0254460438286207377369051579760744L, 0.1292344072003027800680676133596058L, // degree 7 - 0.2970774243113014165466967939615193L, 0.5000000000000000000000000000000000L, - 0.7029225756886985834533032060384807L, 0.8707655927996972199319323866403942L, - 0.9745539561713792622630948420239256L }, - { 0.0198550717512318841582195657152635L, 0.1016667612931866302042230317620848L, // degree 8 - 0.2372337950418355070911304754053768L, 0.4082826787521750975302619288199080L, - 0.5917173212478249024697380711800920L, 0.7627662049581644929088695245946232L, - 0.8983332387068133697957769682379152L, 0.9801449282487681158417804342847365L }, - { 0.0159198802461869550822118985481636L, 0.0819844463366821028502851059651326L, // degree 9 - 0.1933142836497048013456489803292629L, 0.3378732882980955354807309926783317L, - 0.5000000000000000000000000000000000L, 0.6621267117019044645192690073216683L, - 0.8066857163502951986543510196707371L, 0.9180155536633178971497148940348674L, - 0.9840801197538130449177881014518364L }, - { 0.0130467357414141399610179939577740L, 0.0674683166555077446339516557882535L, // degree 10 - 0.1602952158504877968828363174425632L, 0.2833023029353764046003670284171079L, - 0.4255628305091843945575869994351400L, 0.5744371694908156054424130005648600L, - 0.7166976970646235953996329715828921L, 0.8397047841495122031171636825574368L, - 0.9325316833444922553660483442117465L, 0.9869532642585858600389820060422260L }, - { 0.0108856709269715035980309994385713L, 0.0564687001159523504624211153480364L, // degree 11 - 0.1349239972129753379532918739844233L, 0.2404519353965940920371371652706952L, - 0.3652284220238275138342340072995692L, 0.5000000000000000000000000000000000L, - 0.6347715779761724861657659927004308L, 0.7595480646034059079628628347293048L, - 0.8650760027870246620467081260155767L, 0.9435312998840476495375788846519636L, - 0.9891143290730284964019690005614287L } }; - - const RealScalar weights[][maxPadeDegree] = { - { 0.2777777777777777777777777777777778L, 0.4444444444444444444444444444444444L, // degree 3 - 0.2777777777777777777777777777777778L }, - { 0.1739274225687269286865319746109997L, 0.3260725774312730713134680253890003L, // degree 4 - 0.3260725774312730713134680253890003L, 0.1739274225687269286865319746109997L }, - { 0.1184634425280945437571320203599587L, 0.2393143352496832340206457574178191L, // degree 5 - 0.2844444444444444444444444444444444L, 0.2393143352496832340206457574178191L, - 0.1184634425280945437571320203599587L }, - { 0.0856622461895851725201480710863665L, 0.1803807865240693037849167569188581L, // degree 6 - 0.2339569672863455236949351719947755L, 0.2339569672863455236949351719947755L, - 0.1803807865240693037849167569188581L, 0.0856622461895851725201480710863665L }, - { 0.0647424830844348466353057163395410L, 0.1398526957446383339507338857118898L, // degree 7 - 0.1909150252525594724751848877444876L, 0.2089795918367346938775510204081633L, - 0.1909150252525594724751848877444876L, 0.1398526957446383339507338857118898L, - 0.0647424830844348466353057163395410L }, - { 0.0506142681451881295762656771549811L, 0.1111905172266872352721779972131204L, // degree 8 - 0.1568533229389436436689811009933007L, 0.1813418916891809914825752246385978L, - 0.1813418916891809914825752246385978L, 0.1568533229389436436689811009933007L, - 0.1111905172266872352721779972131204L, 0.0506142681451881295762656771549811L }, - { 0.0406371941807872059859460790552618L, 0.0903240803474287020292360156214564L, // degree 9 - 0.1303053482014677311593714347093164L, 0.1561735385200014200343152032922218L, - 0.1651196775006298815822625346434870L, 0.1561735385200014200343152032922218L, - 0.1303053482014677311593714347093164L, 0.0903240803474287020292360156214564L, - 0.0406371941807872059859460790552618L }, - { 0.0333356721543440687967844049466659L, 0.0747256745752902965728881698288487L, // degree 10 - 0.1095431812579910219977674671140816L, 0.1346333596549981775456134607847347L, - 0.1477621123573764350869464973256692L, 0.1477621123573764350869464973256692L, - 0.1346333596549981775456134607847347L, 0.1095431812579910219977674671140816L, - 0.0747256745752902965728881698288487L, 0.0333356721543440687967844049466659L }, - { 0.0278342835580868332413768602212743L, 0.0627901847324523123173471496119701L, // degree 11 - 0.0931451054638671257130488207158280L, 0.1165968822959952399592618524215876L, - 0.1314022722551233310903444349452546L, 0.1364625433889503153572417641681711L, - 0.1314022722551233310903444349452546L, 0.1165968822959952399592618524215876L, - 0.0931451054638671257130488207158280L, 0.0627901847324523123173471496119701L, - 0.0278342835580868332413768602212743L } }; - - MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows()); - result.setZero(T.rows(), T.rows()); - for (int k = 0; k < degree; ++k) { - RealScalar weight = weights[degree-minPadeDegree][k]; - RealScalar node = nodes[degree-minPadeDegree][k]; - result += weight * (MatrixType::Identity(T.rows(), T.rows()) + node * TminusI) - .template triangularView<Upper>().solve(TminusI); - } -} - -/** \brief Compute logarithm of triangular matrices with size > 2. - * \details This uses a inverse scale-and-square algorithm. */ -template <typename MatrixType> -void matrix_log_compute_big(const MatrixType& A, MatrixType& result) -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits<Scalar>::Real RealScalar; - using std::pow; - - int numberOfSquareRoots = 0; - int numberOfExtraSquareRoots = 0; - int degree; - MatrixType T = A, sqrtT; - - int maxPadeDegree = matrix_log_max_pade_degree<Scalar>::value; - const RealScalar maxNormForPade = maxPadeDegree<= 5? 5.3149729967117310e-1L: // single precision - maxPadeDegree<= 7? 2.6429608311114350e-1L: // double precision - maxPadeDegree<= 8? 2.32777776523703892094e-1L: // extended precision - maxPadeDegree<=10? 1.05026503471351080481093652651105e-1L: // double-double - 1.1880960220216759245467951592883642e-1L; // quadruple precision - - while (true) { - RealScalar normTminusI = (T - MatrixType::Identity(T.rows(), T.rows())).cwiseAbs().colwise().sum().maxCoeff(); - if (normTminusI < maxNormForPade) { - degree = matrix_log_get_pade_degree(normTminusI); - int degree2 = matrix_log_get_pade_degree(normTminusI / RealScalar(2)); - if ((degree - degree2 <= 1) || (numberOfExtraSquareRoots == 1)) - break; - ++numberOfExtraSquareRoots; - } - matrix_sqrt_triangular(T, sqrtT); - T = sqrtT.template triangularView<Upper>(); - ++numberOfSquareRoots; - } - - matrix_log_compute_pade(result, T, degree); - result *= pow(RealScalar(2), numberOfSquareRoots); -} - -/** \ingroup MatrixFunctions_Module - * \class MatrixLogarithmAtomic - * \brief Helper class for computing matrix logarithm of atomic matrices. - * - * Here, an atomic matrix is a triangular matrix whose diagonal entries are close to each other. - * - * \sa class MatrixFunctionAtomic, MatrixBase::log() - */ -template <typename MatrixType> -class MatrixLogarithmAtomic -{ -public: - /** \brief Compute matrix logarithm of atomic matrix - * \param[in] A argument of matrix logarithm, should be upper triangular and atomic - * \returns The logarithm of \p A. - */ - MatrixType compute(const MatrixType& A); -}; - -template <typename MatrixType> -MatrixType MatrixLogarithmAtomic<MatrixType>::compute(const MatrixType& A) -{ - using std::log; - MatrixType result(A.rows(), A.rows()); - if (A.rows() == 1) - result(0,0) = log(A(0,0)); - else if (A.rows() == 2) - matrix_log_compute_2x2(A, result); - else - matrix_log_compute_big(A, result); - return result; -} - -} // end of namespace internal - -/** \ingroup MatrixFunctions_Module - * - * \brief Proxy for the matrix logarithm of some matrix (expression). - * - * \tparam Derived Type of the argument to the matrix function. - * - * This class holds the argument to the matrix function until it is - * assigned or evaluated for some other reason (so the argument - * should not be changed in the meantime). It is the return type of - * MatrixBase::log() and most of the time this is the only way it - * is used. - */ -template<typename Derived> class MatrixLogarithmReturnValue -: public ReturnByValue<MatrixLogarithmReturnValue<Derived> > -{ -public: - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; - -protected: - typedef typename internal::ref_selector<Derived>::type DerivedNested; - -public: - - /** \brief Constructor. - * - * \param[in] A %Matrix (expression) forming the argument of the matrix logarithm. - */ - explicit MatrixLogarithmReturnValue(const Derived& A) : m_A(A) { } - - /** \brief Compute the matrix logarithm. - * - * \param[out] result Logarithm of \c A, where \c A is as specified in the constructor. - */ - template <typename ResultType> - inline void evalTo(ResultType& result) const - { - typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType; - typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean; - typedef internal::traits<DerivedEvalTypeClean> Traits; - static const int RowsAtCompileTime = Traits::RowsAtCompileTime; - static const int ColsAtCompileTime = Traits::ColsAtCompileTime; - typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; - typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType; - typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType; - AtomicType atomic; - - internal::matrix_function_compute<typename DerivedEvalTypeClean::PlainObject>::run(m_A, atomic, result); - } - - Index rows() const { return m_A.rows(); } - Index cols() const { return m_A.cols(); } - -private: - const DerivedNested m_A; -}; - -namespace internal { - template<typename Derived> - struct traits<MatrixLogarithmReturnValue<Derived> > - { - typedef typename Derived::PlainObject ReturnType; - }; -} - - -/********** MatrixBase method **********/ - - -template <typename Derived> -const MatrixLogarithmReturnValue<Derived> MatrixBase<Derived>::log() const -{ - eigen_assert(rows() == cols()); - return MatrixLogarithmReturnValue<Derived>(derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_MATRIX_LOGARITHM diff --git a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h deleted file mode 100644 index a3273da..0000000 --- a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h +++ /dev/null @@ -1,709 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012, 2013 Chen-Pang He <jdh8@ms63.hinet.net> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MATRIX_POWER -#define EIGEN_MATRIX_POWER - -namespace Eigen { - -template<typename MatrixType> class MatrixPower; - -/** - * \ingroup MatrixFunctions_Module - * - * \brief Proxy for the matrix power of some matrix. - * - * \tparam MatrixType type of the base, a matrix. - * - * This class holds the arguments to the matrix power until it is - * assigned or evaluated for some other reason (so the argument - * should not be changed in the meantime). It is the return type of - * MatrixPower::operator() and related functions and most of the - * time this is the only way it is used. - */ -/* TODO This class is only used by MatrixPower, so it should be nested - * into MatrixPower, like MatrixPower::ReturnValue. However, my - * compiler complained about unused template parameter in the - * following declaration in namespace internal. - * - * template<typename MatrixType> - * struct traits<MatrixPower<MatrixType>::ReturnValue>; - */ -template<typename MatrixType> -class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParenthesesReturnValue<MatrixType> > -{ - public: - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - - /** - * \brief Constructor. - * - * \param[in] pow %MatrixPower storing the base. - * \param[in] p scalar, the exponent of the matrix power. - */ - MatrixPowerParenthesesReturnValue(MatrixPower<MatrixType>& pow, RealScalar p) : m_pow(pow), m_p(p) - { } - - /** - * \brief Compute the matrix power. - * - * \param[out] result - */ - template<typename ResultType> - inline void evalTo(ResultType& result) const - { m_pow.compute(result, m_p); } - - Index rows() const { return m_pow.rows(); } - Index cols() const { return m_pow.cols(); } - - private: - MatrixPower<MatrixType>& m_pow; - const RealScalar m_p; -}; - -/** - * \ingroup MatrixFunctions_Module - * - * \brief Class for computing matrix powers. - * - * \tparam MatrixType type of the base, expected to be an instantiation - * of the Matrix class template. - * - * This class is capable of computing triangular real/complex matrices - * raised to a power in the interval \f$ (-1, 1) \f$. - * - * \note Currently this class is only used by MatrixPower. One may - * insist that this be nested into MatrixPower. This class is here to - * faciliate future development of triangular matrix functions. - */ -template<typename MatrixType> -class MatrixPowerAtomic : internal::noncopyable -{ - private: - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime - }; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef std::complex<RealScalar> ComplexScalar; - typedef typename MatrixType::Index Index; - typedef Block<MatrixType,Dynamic,Dynamic> ResultType; - - const MatrixType& m_A; - RealScalar m_p; - - void computePade(int degree, const MatrixType& IminusT, ResultType& res) const; - void compute2x2(ResultType& res, RealScalar p) const; - void computeBig(ResultType& res) const; - static int getPadeDegree(float normIminusT); - static int getPadeDegree(double normIminusT); - static int getPadeDegree(long double normIminusT); - static ComplexScalar computeSuperDiag(const ComplexScalar&, const ComplexScalar&, RealScalar p); - static RealScalar computeSuperDiag(RealScalar, RealScalar, RealScalar p); - - public: - /** - * \brief Constructor. - * - * \param[in] T the base of the matrix power. - * \param[in] p the exponent of the matrix power, should be in - * \f$ (-1, 1) \f$. - * - * The class stores a reference to T, so it should not be changed - * (or destroyed) before evaluation. Only the upper triangular - * part of T is read. - */ - MatrixPowerAtomic(const MatrixType& T, RealScalar p); - - /** - * \brief Compute the matrix power. - * - * \param[out] res \f$ A^p \f$ where A and p are specified in the - * constructor. - */ - void compute(ResultType& res) const; -}; - -template<typename MatrixType> -MatrixPowerAtomic<MatrixType>::MatrixPowerAtomic(const MatrixType& T, RealScalar p) : - m_A(T), m_p(p) -{ - eigen_assert(T.rows() == T.cols()); - eigen_assert(p > -1 && p < 1); -} - -template<typename MatrixType> -void MatrixPowerAtomic<MatrixType>::compute(ResultType& res) const -{ - using std::pow; - switch (m_A.rows()) { - case 0: - break; - case 1: - res(0,0) = pow(m_A(0,0), m_p); - break; - case 2: - compute2x2(res, m_p); - break; - default: - computeBig(res); - } -} - -template<typename MatrixType> -void MatrixPowerAtomic<MatrixType>::computePade(int degree, const MatrixType& IminusT, ResultType& res) const -{ - int i = 2*degree; - res = (m_p-degree) / (2*i-2) * IminusT; - - for (--i; i; --i) { - res = (MatrixType::Identity(IminusT.rows(), IminusT.cols()) + res).template triangularView<Upper>() - .solve((i==1 ? -m_p : i&1 ? (-m_p-i/2)/(2*i) : (m_p-i/2)/(2*i-2)) * IminusT).eval(); - } - res += MatrixType::Identity(IminusT.rows(), IminusT.cols()); -} - -// This function assumes that res has the correct size (see bug 614) -template<typename MatrixType> -void MatrixPowerAtomic<MatrixType>::compute2x2(ResultType& res, RealScalar p) const -{ - using std::abs; - using std::pow; - res.coeffRef(0,0) = pow(m_A.coeff(0,0), p); - - for (Index i=1; i < m_A.cols(); ++i) { - res.coeffRef(i,i) = pow(m_A.coeff(i,i), p); - if (m_A.coeff(i-1,i-1) == m_A.coeff(i,i)) - res.coeffRef(i-1,i) = p * pow(m_A.coeff(i,i), p-1); - else if (2*abs(m_A.coeff(i-1,i-1)) < abs(m_A.coeff(i,i)) || 2*abs(m_A.coeff(i,i)) < abs(m_A.coeff(i-1,i-1))) - res.coeffRef(i-1,i) = (res.coeff(i,i)-res.coeff(i-1,i-1)) / (m_A.coeff(i,i)-m_A.coeff(i-1,i-1)); - else - res.coeffRef(i-1,i) = computeSuperDiag(m_A.coeff(i,i), m_A.coeff(i-1,i-1), p); - res.coeffRef(i-1,i) *= m_A.coeff(i-1,i); - } -} - -template<typename MatrixType> -void MatrixPowerAtomic<MatrixType>::computeBig(ResultType& res) const -{ - using std::ldexp; - const int digits = std::numeric_limits<RealScalar>::digits; - const RealScalar maxNormForPade = digits <= 24? 4.3386528e-1L // single precision - : digits <= 53? 2.789358995219730e-1L // double precision - : digits <= 64? 2.4471944416607995472e-1L // extended precision - : digits <= 106? 1.1016843812851143391275867258512e-1L // double-double - : 9.134603732914548552537150753385375e-2L; // quadruple precision - MatrixType IminusT, sqrtT, T = m_A.template triangularView<Upper>(); - RealScalar normIminusT; - int degree, degree2, numberOfSquareRoots = 0; - bool hasExtraSquareRoot = false; - - for (Index i=0; i < m_A.cols(); ++i) - eigen_assert(m_A(i,i) != RealScalar(0)); - - while (true) { - IminusT = MatrixType::Identity(m_A.rows(), m_A.cols()) - T; - normIminusT = IminusT.cwiseAbs().colwise().sum().maxCoeff(); - if (normIminusT < maxNormForPade) { - degree = getPadeDegree(normIminusT); - degree2 = getPadeDegree(normIminusT/2); - if (degree - degree2 <= 1 || hasExtraSquareRoot) - break; - hasExtraSquareRoot = true; - } - matrix_sqrt_triangular(T, sqrtT); - T = sqrtT.template triangularView<Upper>(); - ++numberOfSquareRoots; - } - computePade(degree, IminusT, res); - - for (; numberOfSquareRoots; --numberOfSquareRoots) { - compute2x2(res, ldexp(m_p, -numberOfSquareRoots)); - res = res.template triangularView<Upper>() * res; - } - compute2x2(res, m_p); -} - -template<typename MatrixType> -inline int MatrixPowerAtomic<MatrixType>::getPadeDegree(float normIminusT) -{ - const float maxNormForPade[] = { 2.8064004e-1f /* degree = 3 */ , 4.3386528e-1f }; - int degree = 3; - for (; degree <= 4; ++degree) - if (normIminusT <= maxNormForPade[degree - 3]) - break; - return degree; -} - -template<typename MatrixType> -inline int MatrixPowerAtomic<MatrixType>::getPadeDegree(double normIminusT) -{ - const double maxNormForPade[] = { 1.884160592658218e-2 /* degree = 3 */ , 6.038881904059573e-2, 1.239917516308172e-1, - 1.999045567181744e-1, 2.789358995219730e-1 }; - int degree = 3; - for (; degree <= 7; ++degree) - if (normIminusT <= maxNormForPade[degree - 3]) - break; - return degree; -} - -template<typename MatrixType> -inline int MatrixPowerAtomic<MatrixType>::getPadeDegree(long double normIminusT) -{ -#if LDBL_MANT_DIG == 53 - const int maxPadeDegree = 7; - const double maxNormForPade[] = { 1.884160592658218e-2L /* degree = 3 */ , 6.038881904059573e-2L, 1.239917516308172e-1L, - 1.999045567181744e-1L, 2.789358995219730e-1L }; -#elif LDBL_MANT_DIG <= 64 - const int maxPadeDegree = 8; - const long double maxNormForPade[] = { 6.3854693117491799460e-3L /* degree = 3 */ , 2.6394893435456973676e-2L, - 6.4216043030404063729e-2L, 1.1701165502926694307e-1L, 1.7904284231268670284e-1L, 2.4471944416607995472e-1L }; -#elif LDBL_MANT_DIG <= 106 - const int maxPadeDegree = 10; - const double maxNormForPade[] = { 1.0007161601787493236741409687186e-4L /* degree = 3 */ , - 1.0007161601787493236741409687186e-3L, 4.7069769360887572939882574746264e-3L, 1.3220386624169159689406653101695e-2L, - 2.8063482381631737920612944054906e-2L, 4.9625993951953473052385361085058e-2L, 7.7367040706027886224557538328171e-2L, - 1.1016843812851143391275867258512e-1L }; -#else - const int maxPadeDegree = 10; - const double maxNormForPade[] = { 5.524506147036624377378713555116378e-5L /* degree = 3 */ , - 6.640600568157479679823602193345995e-4L, 3.227716520106894279249709728084626e-3L, - 9.619593944683432960546978734646284e-3L, 2.134595382433742403911124458161147e-2L, - 3.908166513900489428442993794761185e-2L, 6.266780814639442865832535460550138e-2L, - 9.134603732914548552537150753385375e-2L }; -#endif - int degree = 3; - for (; degree <= maxPadeDegree; ++degree) - if (normIminusT <= maxNormForPade[degree - 3]) - break; - return degree; -} - -template<typename MatrixType> -inline typename MatrixPowerAtomic<MatrixType>::ComplexScalar -MatrixPowerAtomic<MatrixType>::computeSuperDiag(const ComplexScalar& curr, const ComplexScalar& prev, RealScalar p) -{ - using std::ceil; - using std::exp; - using std::log; - using std::sinh; - - ComplexScalar logCurr = log(curr); - ComplexScalar logPrev = log(prev); - int unwindingNumber = ceil((numext::imag(logCurr - logPrev) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI)); - ComplexScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2) + ComplexScalar(0, EIGEN_PI*unwindingNumber); - return RealScalar(2) * exp(RealScalar(0.5) * p * (logCurr + logPrev)) * sinh(p * w) / (curr - prev); -} - -template<typename MatrixType> -inline typename MatrixPowerAtomic<MatrixType>::RealScalar -MatrixPowerAtomic<MatrixType>::computeSuperDiag(RealScalar curr, RealScalar prev, RealScalar p) -{ - using std::exp; - using std::log; - using std::sinh; - - RealScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2); - return 2 * exp(p * (log(curr) + log(prev)) / 2) * sinh(p * w) / (curr - prev); -} - -/** - * \ingroup MatrixFunctions_Module - * - * \brief Class for computing matrix powers. - * - * \tparam MatrixType type of the base, expected to be an instantiation - * of the Matrix class template. - * - * This class is capable of computing real/complex matrices raised to - * an arbitrary real power. Meanwhile, it saves the result of Schur - * decomposition if an non-integral power has even been calculated. - * Therefore, if you want to compute multiple (>= 2) matrix powers - * for the same matrix, using the class directly is more efficient than - * calling MatrixBase::pow(). - * - * Example: - * \include MatrixPower_optimal.cpp - * Output: \verbinclude MatrixPower_optimal.out - */ -template<typename MatrixType> -class MatrixPower : internal::noncopyable -{ - private: - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - - public: - /** - * \brief Constructor. - * - * \param[in] A the base of the matrix power. - * - * The class stores a reference to A, so it should not be changed - * (or destroyed) before evaluation. - */ - explicit MatrixPower(const MatrixType& A) : - m_A(A), - m_conditionNumber(0), - m_rank(A.cols()), - m_nulls(0) - { eigen_assert(A.rows() == A.cols()); } - - /** - * \brief Returns the matrix power. - * - * \param[in] p exponent, a real scalar. - * \return The expression \f$ A^p \f$, where A is specified in the - * constructor. - */ - const MatrixPowerParenthesesReturnValue<MatrixType> operator()(RealScalar p) - { return MatrixPowerParenthesesReturnValue<MatrixType>(*this, p); } - - /** - * \brief Compute the matrix power. - * - * \param[in] p exponent, a real scalar. - * \param[out] res \f$ A^p \f$ where A is specified in the - * constructor. - */ - template<typename ResultType> - void compute(ResultType& res, RealScalar p); - - Index rows() const { return m_A.rows(); } - Index cols() const { return m_A.cols(); } - - private: - typedef std::complex<RealScalar> ComplexScalar; - typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, - MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime> ComplexMatrix; - - /** \brief Reference to the base of matrix power. */ - typename MatrixType::Nested m_A; - - /** \brief Temporary storage. */ - MatrixType m_tmp; - - /** \brief Store the result of Schur decomposition. */ - ComplexMatrix m_T, m_U; - - /** \brief Store fractional power of m_T. */ - ComplexMatrix m_fT; - - /** - * \brief Condition number of m_A. - * - * It is initialized as 0 to avoid performing unnecessary Schur - * decomposition, which is the bottleneck. - */ - RealScalar m_conditionNumber; - - /** \brief Rank of m_A. */ - Index m_rank; - - /** \brief Rank deficiency of m_A. */ - Index m_nulls; - - /** - * \brief Split p into integral part and fractional part. - * - * \param[in] p The exponent. - * \param[out] p The fractional part ranging in \f$ (-1, 1) \f$. - * \param[out] intpart The integral part. - * - * Only if the fractional part is nonzero, it calls initialize(). - */ - void split(RealScalar& p, RealScalar& intpart); - - /** \brief Perform Schur decomposition for fractional power. */ - void initialize(); - - template<typename ResultType> - void computeIntPower(ResultType& res, RealScalar p); - - template<typename ResultType> - void computeFracPower(ResultType& res, RealScalar p); - - template<int Rows, int Cols, int Options, int MaxRows, int MaxCols> - static void revertSchur( - Matrix<ComplexScalar, Rows, Cols, Options, MaxRows, MaxCols>& res, - const ComplexMatrix& T, - const ComplexMatrix& U); - - template<int Rows, int Cols, int Options, int MaxRows, int MaxCols> - static void revertSchur( - Matrix<RealScalar, Rows, Cols, Options, MaxRows, MaxCols>& res, - const ComplexMatrix& T, - const ComplexMatrix& U); -}; - -template<typename MatrixType> -template<typename ResultType> -void MatrixPower<MatrixType>::compute(ResultType& res, RealScalar p) -{ - using std::pow; - switch (cols()) { - case 0: - break; - case 1: - res(0,0) = pow(m_A.coeff(0,0), p); - break; - default: - RealScalar intpart; - split(p, intpart); - - res = MatrixType::Identity(rows(), cols()); - computeIntPower(res, intpart); - if (p) computeFracPower(res, p); - } -} - -template<typename MatrixType> -void MatrixPower<MatrixType>::split(RealScalar& p, RealScalar& intpart) -{ - using std::floor; - using std::pow; - - intpart = floor(p); - p -= intpart; - - // Perform Schur decomposition if it is not yet performed and the power is - // not an integer. - if (!m_conditionNumber && p) - initialize(); - - // Choose the more stable of intpart = floor(p) and intpart = ceil(p). - if (p > RealScalar(0.5) && p > (1-p) * pow(m_conditionNumber, p)) { - --p; - ++intpart; - } -} - -template<typename MatrixType> -void MatrixPower<MatrixType>::initialize() -{ - const ComplexSchur<MatrixType> schurOfA(m_A); - JacobiRotation<ComplexScalar> rot; - ComplexScalar eigenvalue; - - m_fT.resizeLike(m_A); - m_T = schurOfA.matrixT(); - m_U = schurOfA.matrixU(); - m_conditionNumber = m_T.diagonal().array().abs().maxCoeff() / m_T.diagonal().array().abs().minCoeff(); - - // Move zero eigenvalues to the bottom right corner. - for (Index i = cols()-1; i>=0; --i) { - if (m_rank <= 2) - return; - if (m_T.coeff(i,i) == RealScalar(0)) { - for (Index j=i+1; j < m_rank; ++j) { - eigenvalue = m_T.coeff(j,j); - rot.makeGivens(m_T.coeff(j-1,j), eigenvalue); - m_T.applyOnTheRight(j-1, j, rot); - m_T.applyOnTheLeft(j-1, j, rot.adjoint()); - m_T.coeffRef(j-1,j-1) = eigenvalue; - m_T.coeffRef(j,j) = RealScalar(0); - m_U.applyOnTheRight(j-1, j, rot); - } - --m_rank; - } - } - - m_nulls = rows() - m_rank; - if (m_nulls) { - eigen_assert(m_T.bottomRightCorner(m_nulls, m_nulls).isZero() - && "Base of matrix power should be invertible or with a semisimple zero eigenvalue."); - m_fT.bottomRows(m_nulls).fill(RealScalar(0)); - } -} - -template<typename MatrixType> -template<typename ResultType> -void MatrixPower<MatrixType>::computeIntPower(ResultType& res, RealScalar p) -{ - using std::abs; - using std::fmod; - RealScalar pp = abs(p); - - if (p<0) - m_tmp = m_A.inverse(); - else - m_tmp = m_A; - - while (true) { - if (fmod(pp, 2) >= 1) - res = m_tmp * res; - pp /= 2; - if (pp < 1) - break; - m_tmp *= m_tmp; - } -} - -template<typename MatrixType> -template<typename ResultType> -void MatrixPower<MatrixType>::computeFracPower(ResultType& res, RealScalar p) -{ - Block<ComplexMatrix,Dynamic,Dynamic> blockTp(m_fT, 0, 0, m_rank, m_rank); - eigen_assert(m_conditionNumber); - eigen_assert(m_rank + m_nulls == rows()); - - MatrixPowerAtomic<ComplexMatrix>(m_T.topLeftCorner(m_rank, m_rank), p).compute(blockTp); - if (m_nulls) { - m_fT.topRightCorner(m_rank, m_nulls) = m_T.topLeftCorner(m_rank, m_rank).template triangularView<Upper>() - .solve(blockTp * m_T.topRightCorner(m_rank, m_nulls)); - } - revertSchur(m_tmp, m_fT, m_U); - res = m_tmp * res; -} - -template<typename MatrixType> -template<int Rows, int Cols, int Options, int MaxRows, int MaxCols> -inline void MatrixPower<MatrixType>::revertSchur( - Matrix<ComplexScalar, Rows, Cols, Options, MaxRows, MaxCols>& res, - const ComplexMatrix& T, - const ComplexMatrix& U) -{ res.noalias() = U * (T.template triangularView<Upper>() * U.adjoint()); } - -template<typename MatrixType> -template<int Rows, int Cols, int Options, int MaxRows, int MaxCols> -inline void MatrixPower<MatrixType>::revertSchur( - Matrix<RealScalar, Rows, Cols, Options, MaxRows, MaxCols>& res, - const ComplexMatrix& T, - const ComplexMatrix& U) -{ res.noalias() = (U * (T.template triangularView<Upper>() * U.adjoint())).real(); } - -/** - * \ingroup MatrixFunctions_Module - * - * \brief Proxy for the matrix power of some matrix (expression). - * - * \tparam Derived type of the base, a matrix (expression). - * - * This class holds the arguments to the matrix power until it is - * assigned or evaluated for some other reason (so the argument - * should not be changed in the meantime). It is the return type of - * MatrixBase::pow() and related functions and most of the - * time this is the only way it is used. - */ -template<typename Derived> -class MatrixPowerReturnValue : public ReturnByValue< MatrixPowerReturnValue<Derived> > -{ - public: - typedef typename Derived::PlainObject PlainObject; - typedef typename Derived::RealScalar RealScalar; - typedef typename Derived::Index Index; - - /** - * \brief Constructor. - * - * \param[in] A %Matrix (expression), the base of the matrix power. - * \param[in] p real scalar, the exponent of the matrix power. - */ - MatrixPowerReturnValue(const Derived& A, RealScalar p) : m_A(A), m_p(p) - { } - - /** - * \brief Compute the matrix power. - * - * \param[out] result \f$ A^p \f$ where \p A and \p p are as in the - * constructor. - */ - template<typename ResultType> - inline void evalTo(ResultType& result) const - { MatrixPower<PlainObject>(m_A.eval()).compute(result, m_p); } - - Index rows() const { return m_A.rows(); } - Index cols() const { return m_A.cols(); } - - private: - const Derived& m_A; - const RealScalar m_p; -}; - -/** - * \ingroup MatrixFunctions_Module - * - * \brief Proxy for the matrix power of some matrix (expression). - * - * \tparam Derived type of the base, a matrix (expression). - * - * This class holds the arguments to the matrix power until it is - * assigned or evaluated for some other reason (so the argument - * should not be changed in the meantime). It is the return type of - * MatrixBase::pow() and related functions and most of the - * time this is the only way it is used. - */ -template<typename Derived> -class MatrixComplexPowerReturnValue : public ReturnByValue< MatrixComplexPowerReturnValue<Derived> > -{ - public: - typedef typename Derived::PlainObject PlainObject; - typedef typename std::complex<typename Derived::RealScalar> ComplexScalar; - typedef typename Derived::Index Index; - - /** - * \brief Constructor. - * - * \param[in] A %Matrix (expression), the base of the matrix power. - * \param[in] p complex scalar, the exponent of the matrix power. - */ - MatrixComplexPowerReturnValue(const Derived& A, const ComplexScalar& p) : m_A(A), m_p(p) - { } - - /** - * \brief Compute the matrix power. - * - * Because \p p is complex, \f$ A^p \f$ is simply evaluated as \f$ - * \exp(p \log(A)) \f$. - * - * \param[out] result \f$ A^p \f$ where \p A and \p p are as in the - * constructor. - */ - template<typename ResultType> - inline void evalTo(ResultType& result) const - { result = (m_p * m_A.log()).exp(); } - - Index rows() const { return m_A.rows(); } - Index cols() const { return m_A.cols(); } - - private: - const Derived& m_A; - const ComplexScalar m_p; -}; - -namespace internal { - -template<typename MatrixPowerType> -struct traits< MatrixPowerParenthesesReturnValue<MatrixPowerType> > -{ typedef typename MatrixPowerType::PlainObject ReturnType; }; - -template<typename Derived> -struct traits< MatrixPowerReturnValue<Derived> > -{ typedef typename Derived::PlainObject ReturnType; }; - -template<typename Derived> -struct traits< MatrixComplexPowerReturnValue<Derived> > -{ typedef typename Derived::PlainObject ReturnType; }; - -} - -template<typename Derived> -const MatrixPowerReturnValue<Derived> MatrixBase<Derived>::pow(const RealScalar& p) const -{ return MatrixPowerReturnValue<Derived>(derived(), p); } - -template<typename Derived> -const MatrixComplexPowerReturnValue<Derived> MatrixBase<Derived>::pow(const std::complex<RealScalar>& p) const -{ return MatrixComplexPowerReturnValue<Derived>(derived(), p); } - -} // namespace Eigen - -#endif // EIGEN_MATRIX_POWER diff --git a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h b/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h deleted file mode 100644 index 2e5abda..0000000 --- a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h +++ /dev/null @@ -1,366 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MATRIX_SQUARE_ROOT -#define EIGEN_MATRIX_SQUARE_ROOT - -namespace Eigen { - -namespace internal { - -// pre: T.block(i,i,2,2) has complex conjugate eigenvalues -// post: sqrtT.block(i,i,2,2) is square root of T.block(i,i,2,2) -template <typename MatrixType, typename ResultType> -void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, typename MatrixType::Index i, ResultType& sqrtT) -{ - // TODO: This case (2-by-2 blocks with complex conjugate eigenvalues) is probably hidden somewhere - // in EigenSolver. If we expose it, we could call it directly from here. - typedef typename traits<MatrixType>::Scalar Scalar; - Matrix<Scalar,2,2> block = T.template block<2,2>(i,i); - EigenSolver<Matrix<Scalar,2,2> > es(block); - sqrtT.template block<2,2>(i,i) - = (es.eigenvectors() * es.eigenvalues().cwiseSqrt().asDiagonal() * es.eigenvectors().inverse()).real(); -} - -// pre: block structure of T is such that (i,j) is a 1x1 block, -// all blocks of sqrtT to left of and below (i,j) are correct -// post: sqrtT(i,j) has the correct value -template <typename MatrixType, typename ResultType> -void matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT) -{ - typedef typename traits<MatrixType>::Scalar Scalar; - Scalar tmp = (sqrtT.row(i).segment(i+1,j-i-1) * sqrtT.col(j).segment(i+1,j-i-1)).value(); - sqrtT.coeffRef(i,j) = (T.coeff(i,j) - tmp) / (sqrtT.coeff(i,i) + sqrtT.coeff(j,j)); -} - -// similar to compute1x1offDiagonalBlock() -template <typename MatrixType, typename ResultType> -void matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT) -{ - typedef typename traits<MatrixType>::Scalar Scalar; - Matrix<Scalar,1,2> rhs = T.template block<1,2>(i,j); - if (j-i > 1) - rhs -= sqrtT.block(i, i+1, 1, j-i-1) * sqrtT.block(i+1, j, j-i-1, 2); - Matrix<Scalar,2,2> A = sqrtT.coeff(i,i) * Matrix<Scalar,2,2>::Identity(); - A += sqrtT.template block<2,2>(j,j).transpose(); - sqrtT.template block<1,2>(i,j).transpose() = A.fullPivLu().solve(rhs.transpose()); -} - -// similar to compute1x1offDiagonalBlock() -template <typename MatrixType, typename ResultType> -void matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT) -{ - typedef typename traits<MatrixType>::Scalar Scalar; - Matrix<Scalar,2,1> rhs = T.template block<2,1>(i,j); - if (j-i > 2) - rhs -= sqrtT.block(i, i+2, 2, j-i-2) * sqrtT.block(i+2, j, j-i-2, 1); - Matrix<Scalar,2,2> A = sqrtT.coeff(j,j) * Matrix<Scalar,2,2>::Identity(); - A += sqrtT.template block<2,2>(i,i); - sqrtT.template block<2,1>(i,j) = A.fullPivLu().solve(rhs); -} - -// solves the equation A X + X B = C where all matrices are 2-by-2 -template <typename MatrixType> -void matrix_sqrt_quasi_triangular_solve_auxiliary_equation(MatrixType& X, const MatrixType& A, const MatrixType& B, const MatrixType& C) -{ - typedef typename traits<MatrixType>::Scalar Scalar; - Matrix<Scalar,4,4> coeffMatrix = Matrix<Scalar,4,4>::Zero(); - coeffMatrix.coeffRef(0,0) = A.coeff(0,0) + B.coeff(0,0); - coeffMatrix.coeffRef(1,1) = A.coeff(0,0) + B.coeff(1,1); - coeffMatrix.coeffRef(2,2) = A.coeff(1,1) + B.coeff(0,0); - coeffMatrix.coeffRef(3,3) = A.coeff(1,1) + B.coeff(1,1); - coeffMatrix.coeffRef(0,1) = B.coeff(1,0); - coeffMatrix.coeffRef(0,2) = A.coeff(0,1); - coeffMatrix.coeffRef(1,0) = B.coeff(0,1); - coeffMatrix.coeffRef(1,3) = A.coeff(0,1); - coeffMatrix.coeffRef(2,0) = A.coeff(1,0); - coeffMatrix.coeffRef(2,3) = B.coeff(1,0); - coeffMatrix.coeffRef(3,1) = A.coeff(1,0); - coeffMatrix.coeffRef(3,2) = B.coeff(0,1); - - Matrix<Scalar,4,1> rhs; - rhs.coeffRef(0) = C.coeff(0,0); - rhs.coeffRef(1) = C.coeff(0,1); - rhs.coeffRef(2) = C.coeff(1,0); - rhs.coeffRef(3) = C.coeff(1,1); - - Matrix<Scalar,4,1> result; - result = coeffMatrix.fullPivLu().solve(rhs); - - X.coeffRef(0,0) = result.coeff(0); - X.coeffRef(0,1) = result.coeff(1); - X.coeffRef(1,0) = result.coeff(2); - X.coeffRef(1,1) = result.coeff(3); -} - -// similar to compute1x1offDiagonalBlock() -template <typename MatrixType, typename ResultType> -void matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT) -{ - typedef typename traits<MatrixType>::Scalar Scalar; - Matrix<Scalar,2,2> A = sqrtT.template block<2,2>(i,i); - Matrix<Scalar,2,2> B = sqrtT.template block<2,2>(j,j); - Matrix<Scalar,2,2> C = T.template block<2,2>(i,j); - if (j-i > 2) - C -= sqrtT.block(i, i+2, 2, j-i-2) * sqrtT.block(i+2, j, j-i-2, 2); - Matrix<Scalar,2,2> X; - matrix_sqrt_quasi_triangular_solve_auxiliary_equation(X, A, B, C); - sqrtT.template block<2,2>(i,j) = X; -} - -// pre: T is quasi-upper-triangular and sqrtT is a zero matrix of the same size -// post: the diagonal blocks of sqrtT are the square roots of the diagonal blocks of T -template <typename MatrixType, typename ResultType> -void matrix_sqrt_quasi_triangular_diagonal(const MatrixType& T, ResultType& sqrtT) -{ - using std::sqrt; - const Index size = T.rows(); - for (Index i = 0; i < size; i++) { - if (i == size - 1 || T.coeff(i+1, i) == 0) { - eigen_assert(T(i,i) >= 0); - sqrtT.coeffRef(i,i) = sqrt(T.coeff(i,i)); - } - else { - matrix_sqrt_quasi_triangular_2x2_diagonal_block(T, i, sqrtT); - ++i; - } - } -} - -// pre: T is quasi-upper-triangular and diagonal blocks of sqrtT are square root of diagonal blocks of T. -// post: sqrtT is the square root of T. -template <typename MatrixType, typename ResultType> -void matrix_sqrt_quasi_triangular_off_diagonal(const MatrixType& T, ResultType& sqrtT) -{ - const Index size = T.rows(); - for (Index j = 1; j < size; j++) { - if (T.coeff(j, j-1) != 0) // if T(j-1:j, j-1:j) is a 2-by-2 block - continue; - for (Index i = j-1; i >= 0; i--) { - if (i > 0 && T.coeff(i, i-1) != 0) // if T(i-1:i, i-1:i) is a 2-by-2 block - continue; - bool iBlockIs2x2 = (i < size - 1) && (T.coeff(i+1, i) != 0); - bool jBlockIs2x2 = (j < size - 1) && (T.coeff(j+1, j) != 0); - if (iBlockIs2x2 && jBlockIs2x2) - matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(T, i, j, sqrtT); - else if (iBlockIs2x2 && !jBlockIs2x2) - matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(T, i, j, sqrtT); - else if (!iBlockIs2x2 && jBlockIs2x2) - matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(T, i, j, sqrtT); - else if (!iBlockIs2x2 && !jBlockIs2x2) - matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(T, i, j, sqrtT); - } - } -} - -} // end of namespace internal - -/** \ingroup MatrixFunctions_Module - * \brief Compute matrix square root of quasi-triangular matrix. - * - * \tparam MatrixType type of \p arg, the argument of matrix square root, - * expected to be an instantiation of the Matrix class template. - * \tparam ResultType type of \p result, where result is to be stored. - * \param[in] arg argument of matrix square root. - * \param[out] result matrix square root of upper Hessenberg part of \p arg. - * - * This function computes the square root of the upper quasi-triangular matrix stored in the upper - * Hessenberg part of \p arg. Only the upper Hessenberg part of \p result is updated, the rest is - * not touched. See MatrixBase::sqrt() for details on how this computation is implemented. - * - * \sa MatrixSquareRoot, MatrixSquareRootQuasiTriangular - */ -template <typename MatrixType, typename ResultType> -void matrix_sqrt_quasi_triangular(const MatrixType &arg, ResultType &result) -{ - eigen_assert(arg.rows() == arg.cols()); - result.resize(arg.rows(), arg.cols()); - internal::matrix_sqrt_quasi_triangular_diagonal(arg, result); - internal::matrix_sqrt_quasi_triangular_off_diagonal(arg, result); -} - - -/** \ingroup MatrixFunctions_Module - * \brief Compute matrix square root of triangular matrix. - * - * \tparam MatrixType type of \p arg, the argument of matrix square root, - * expected to be an instantiation of the Matrix class template. - * \tparam ResultType type of \p result, where result is to be stored. - * \param[in] arg argument of matrix square root. - * \param[out] result matrix square root of upper triangular part of \p arg. - * - * Only the upper triangular part (including the diagonal) of \p result is updated, the rest is not - * touched. See MatrixBase::sqrt() for details on how this computation is implemented. - * - * \sa MatrixSquareRoot, MatrixSquareRootQuasiTriangular - */ -template <typename MatrixType, typename ResultType> -void matrix_sqrt_triangular(const MatrixType &arg, ResultType &result) -{ - using std::sqrt; - typedef typename MatrixType::Scalar Scalar; - - eigen_assert(arg.rows() == arg.cols()); - - // Compute square root of arg and store it in upper triangular part of result - // This uses that the square root of triangular matrices can be computed directly. - result.resize(arg.rows(), arg.cols()); - for (Index i = 0; i < arg.rows(); i++) { - result.coeffRef(i,i) = sqrt(arg.coeff(i,i)); - } - for (Index j = 1; j < arg.cols(); j++) { - for (Index i = j-1; i >= 0; i--) { - // if i = j-1, then segment has length 0 so tmp = 0 - Scalar tmp = (result.row(i).segment(i+1,j-i-1) * result.col(j).segment(i+1,j-i-1)).value(); - // denominator may be zero if original matrix is singular - result.coeffRef(i,j) = (arg.coeff(i,j) - tmp) / (result.coeff(i,i) + result.coeff(j,j)); - } - } -} - - -namespace internal { - -/** \ingroup MatrixFunctions_Module - * \brief Helper struct for computing matrix square roots of general matrices. - * \tparam MatrixType type of the argument of the matrix square root, - * expected to be an instantiation of the Matrix class template. - * - * \sa MatrixSquareRootTriangular, MatrixSquareRootQuasiTriangular, MatrixBase::sqrt() - */ -template <typename MatrixType, int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex> -struct matrix_sqrt_compute -{ - /** \brief Compute the matrix square root - * - * \param[in] arg matrix whose square root is to be computed. - * \param[out] result square root of \p arg. - * - * See MatrixBase::sqrt() for details on how this computation is implemented. - */ - template <typename ResultType> static void run(const MatrixType &arg, ResultType &result); -}; - - -// ********** Partial specialization for real matrices ********** - -template <typename MatrixType> -struct matrix_sqrt_compute<MatrixType, 0> -{ - template <typename ResultType> - static void run(const MatrixType &arg, ResultType &result) - { - eigen_assert(arg.rows() == arg.cols()); - - // Compute Schur decomposition of arg - const RealSchur<MatrixType> schurOfA(arg); - const MatrixType& T = schurOfA.matrixT(); - const MatrixType& U = schurOfA.matrixU(); - - // Compute square root of T - MatrixType sqrtT = MatrixType::Zero(arg.rows(), arg.cols()); - matrix_sqrt_quasi_triangular(T, sqrtT); - - // Compute square root of arg - result = U * sqrtT * U.adjoint(); - } -}; - - -// ********** Partial specialization for complex matrices ********** - -template <typename MatrixType> -struct matrix_sqrt_compute<MatrixType, 1> -{ - template <typename ResultType> - static void run(const MatrixType &arg, ResultType &result) - { - eigen_assert(arg.rows() == arg.cols()); - - // Compute Schur decomposition of arg - const ComplexSchur<MatrixType> schurOfA(arg); - const MatrixType& T = schurOfA.matrixT(); - const MatrixType& U = schurOfA.matrixU(); - - // Compute square root of T - MatrixType sqrtT; - matrix_sqrt_triangular(T, sqrtT); - - // Compute square root of arg - result = U * (sqrtT.template triangularView<Upper>() * U.adjoint()); - } -}; - -} // end namespace internal - -/** \ingroup MatrixFunctions_Module - * - * \brief Proxy for the matrix square root of some matrix (expression). - * - * \tparam Derived Type of the argument to the matrix square root. - * - * This class holds the argument to the matrix square root until it - * is assigned or evaluated for some other reason (so the argument - * should not be changed in the meantime). It is the return type of - * MatrixBase::sqrt() and most of the time this is the only way it is - * used. - */ -template<typename Derived> class MatrixSquareRootReturnValue -: public ReturnByValue<MatrixSquareRootReturnValue<Derived> > -{ - protected: - typedef typename internal::ref_selector<Derived>::type DerivedNested; - - public: - /** \brief Constructor. - * - * \param[in] src %Matrix (expression) forming the argument of the - * matrix square root. - */ - explicit MatrixSquareRootReturnValue(const Derived& src) : m_src(src) { } - - /** \brief Compute the matrix square root. - * - * \param[out] result the matrix square root of \p src in the - * constructor. - */ - template <typename ResultType> - inline void evalTo(ResultType& result) const - { - typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType; - typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean; - DerivedEvalType tmp(m_src); - internal::matrix_sqrt_compute<DerivedEvalTypeClean>::run(tmp, result); - } - - Index rows() const { return m_src.rows(); } - Index cols() const { return m_src.cols(); } - - protected: - const DerivedNested m_src; -}; - -namespace internal { -template<typename Derived> -struct traits<MatrixSquareRootReturnValue<Derived> > -{ - typedef typename Derived::PlainObject ReturnType; -}; -} - -template <typename Derived> -const MatrixSquareRootReturnValue<Derived> MatrixBase<Derived>::sqrt() const -{ - eigen_assert(rows() == cols()); - return MatrixSquareRootReturnValue<Derived>(derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_MATRIX_FUNCTION diff --git a/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h b/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h deleted file mode 100644 index 7604df9..0000000 --- a/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h +++ /dev/null @@ -1,117 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_STEM_FUNCTION -#define EIGEN_STEM_FUNCTION - -namespace Eigen { - -namespace internal { - -/** \brief The exponential function (and its derivatives). */ -template <typename Scalar> -Scalar stem_function_exp(Scalar x, int) -{ - using std::exp; - return exp(x); -} - -/** \brief Cosine (and its derivatives). */ -template <typename Scalar> -Scalar stem_function_cos(Scalar x, int n) -{ - using std::cos; - using std::sin; - Scalar res; - - switch (n % 4) { - case 0: - res = std::cos(x); - break; - case 1: - res = -std::sin(x); - break; - case 2: - res = -std::cos(x); - break; - case 3: - res = std::sin(x); - break; - } - return res; -} - -/** \brief Sine (and its derivatives). */ -template <typename Scalar> -Scalar stem_function_sin(Scalar x, int n) -{ - using std::cos; - using std::sin; - Scalar res; - - switch (n % 4) { - case 0: - res = std::sin(x); - break; - case 1: - res = std::cos(x); - break; - case 2: - res = -std::sin(x); - break; - case 3: - res = -std::cos(x); - break; - } - return res; -} - -/** \brief Hyperbolic cosine (and its derivatives). */ -template <typename Scalar> -Scalar stem_function_cosh(Scalar x, int n) -{ - using std::cosh; - using std::sinh; - Scalar res; - - switch (n % 2) { - case 0: - res = std::cosh(x); - break; - case 1: - res = std::sinh(x); - break; - } - return res; -} - -/** \brief Hyperbolic sine (and its derivatives). */ -template <typename Scalar> -Scalar stem_function_sinh(Scalar x, int n) -{ - using std::cosh; - using std::sinh; - Scalar res; - - switch (n % 2) { - case 0: - res = std::sinh(x); - break; - case 1: - res = std::cosh(x); - break; - } - return res; -} - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_STEM_FUNCTION diff --git a/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h b/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h deleted file mode 100644 index 63cb28d..0000000 --- a/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h +++ /dev/null @@ -1,95 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com> -// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H -#define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H - -namespace Eigen { - -namespace internal { - -/** \internal \returns the arcsin of \a a (coeff-wise) */ -template<typename Packet> inline static Packet pasin(Packet a) { return std::asin(a); } - -#ifdef EIGEN_VECTORIZE_SSE - -template<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x) -{ - _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); - _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5); - _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5); - - _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); - - _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654); - _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5); - - _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2); - _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2); - _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2); - _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2); - _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1); - - Packet4f a = pabs(x);//got the absolute value - - Packet4f sign_bit= _mm_and_ps(x, p4f_sign_mask);//extracted the sign bit - - Packet4f z1,z2;//will need them during computation - - -//will compute the two branches for asin -//so first compare with half - - Packet4f branch_mask= _mm_cmpgt_ps(a, p4f_half);//this is to select which branch to take -//both will be taken, and finally results will be merged -//the branch for values >0.5 - - { -//the core series expansion - z1=pmadd(p4f_minus_half,a,p4f_half); - Packet4f x1=psqrt(z1); - Packet4f s1=pmadd(p4f_asin1, z1, p4f_asin2); - Packet4f s2=pmadd(s1, z1, p4f_asin3); - Packet4f s3=pmadd(s2,z1, p4f_asin4); - Packet4f s4=pmadd(s3,z1, p4f_asin5); - Packet4f temp=pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd - z1=pmadd(temp,x1,x1); - z1=padd(z1,z1); - z1=psub(p4f_pi_over_2,z1); - } - - { -//the core series expansion - Packet4f x2=a; - z2=pmul(x2,x2); - Packet4f s1=pmadd(p4f_asin1, z2, p4f_asin2); - Packet4f s2=pmadd(s1, z2, p4f_asin3); - Packet4f s3=pmadd(s2,z2, p4f_asin4); - Packet4f s4=pmadd(s3,z2, p4f_asin5); - Packet4f temp=pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd - z2=pmadd(temp,x2,x2); - } - -/* select the correct result from the two branch evaluations */ - z1 = _mm_and_ps(branch_mask, z1); - z2 = _mm_andnot_ps(branch_mask, z2); - Packet4f z = _mm_or_ps(z1,z2); - -/* update the sign */ - return _mm_xor_ps(z, sign_bit); -} - -#endif // EIGEN_VECTORIZE_SSE - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h deleted file mode 100644 index 8fe3ed8..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h +++ /dev/null @@ -1,601 +0,0 @@ -// -*- coding: utf-8 -// vim: set fileencoding=utf-8 - -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_HYBRIDNONLINEARSOLVER_H -#define EIGEN_HYBRIDNONLINEARSOLVER_H - -namespace Eigen { - -namespace HybridNonLinearSolverSpace { - enum Status { - Running = -1, - ImproperInputParameters = 0, - RelativeErrorTooSmall = 1, - TooManyFunctionEvaluation = 2, - TolTooSmall = 3, - NotMakingProgressJacobian = 4, - NotMakingProgressIterations = 5, - UserAsked = 6 - }; -} - -/** - * \ingroup NonLinearOptimization_Module - * \brief Finds a zero of a system of n - * nonlinear functions in n variables by a modification of the Powell - * hybrid method ("dogleg"). - * - * The user must provide a subroutine which calculates the - * functions. The Jacobian is either provided by the user, or approximated - * using a forward-difference method. - * - */ -template<typename FunctorType, typename Scalar=double> -class HybridNonLinearSolver -{ -public: - typedef DenseIndex Index; - - HybridNonLinearSolver(FunctorType &_functor) - : functor(_functor) { nfev=njev=iter = 0; fnorm= 0.; useExternalScaling=false;} - - struct Parameters { - Parameters() - : factor(Scalar(100.)) - , maxfev(1000) - , xtol(std::sqrt(NumTraits<Scalar>::epsilon())) - , nb_of_subdiagonals(-1) - , nb_of_superdiagonals(-1) - , epsfcn(Scalar(0.)) {} - Scalar factor; - Index maxfev; // maximum number of function evaluation - Scalar xtol; - Index nb_of_subdiagonals; - Index nb_of_superdiagonals; - Scalar epsfcn; - }; - typedef Matrix< Scalar, Dynamic, 1 > FVectorType; - typedef Matrix< Scalar, Dynamic, Dynamic > JacobianType; - /* TODO: if eigen provides a triangular storage, use it here */ - typedef Matrix< Scalar, Dynamic, Dynamic > UpperTriangularType; - - HybridNonLinearSolverSpace::Status hybrj1( - FVectorType &x, - const Scalar tol = std::sqrt(NumTraits<Scalar>::epsilon()) - ); - - HybridNonLinearSolverSpace::Status solveInit(FVectorType &x); - HybridNonLinearSolverSpace::Status solveOneStep(FVectorType &x); - HybridNonLinearSolverSpace::Status solve(FVectorType &x); - - HybridNonLinearSolverSpace::Status hybrd1( - FVectorType &x, - const Scalar tol = std::sqrt(NumTraits<Scalar>::epsilon()) - ); - - HybridNonLinearSolverSpace::Status solveNumericalDiffInit(FVectorType &x); - HybridNonLinearSolverSpace::Status solveNumericalDiffOneStep(FVectorType &x); - HybridNonLinearSolverSpace::Status solveNumericalDiff(FVectorType &x); - - void resetParameters(void) { parameters = Parameters(); } - Parameters parameters; - FVectorType fvec, qtf, diag; - JacobianType fjac; - UpperTriangularType R; - Index nfev; - Index njev; - Index iter; - Scalar fnorm; - bool useExternalScaling; -private: - FunctorType &functor; - Index n; - Scalar sum; - bool sing; - Scalar temp; - Scalar delta; - bool jeval; - Index ncsuc; - Scalar ratio; - Scalar pnorm, xnorm, fnorm1; - Index nslow1, nslow2; - Index ncfail; - Scalar actred, prered; - FVectorType wa1, wa2, wa3, wa4; - - HybridNonLinearSolver& operator=(const HybridNonLinearSolver&); -}; - - - -template<typename FunctorType, typename Scalar> -HybridNonLinearSolverSpace::Status -HybridNonLinearSolver<FunctorType,Scalar>::hybrj1( - FVectorType &x, - const Scalar tol - ) -{ - n = x.size(); - - /* check the input parameters for errors. */ - if (n <= 0 || tol < 0.) - return HybridNonLinearSolverSpace::ImproperInputParameters; - - resetParameters(); - parameters.maxfev = 100*(n+1); - parameters.xtol = tol; - diag.setConstant(n, 1.); - useExternalScaling = true; - return solve(x); -} - -template<typename FunctorType, typename Scalar> -HybridNonLinearSolverSpace::Status -HybridNonLinearSolver<FunctorType,Scalar>::solveInit(FVectorType &x) -{ - n = x.size(); - - wa1.resize(n); wa2.resize(n); wa3.resize(n); wa4.resize(n); - fvec.resize(n); - qtf.resize(n); - fjac.resize(n, n); - if (!useExternalScaling) - diag.resize(n); - eigen_assert( (!useExternalScaling || diag.size()==n) && "When useExternalScaling is set, the caller must provide a valid 'diag'"); - - /* Function Body */ - nfev = 0; - njev = 0; - - /* check the input parameters for errors. */ - if (n <= 0 || parameters.xtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0. ) - return HybridNonLinearSolverSpace::ImproperInputParameters; - if (useExternalScaling) - for (Index j = 0; j < n; ++j) - if (diag[j] <= 0.) - return HybridNonLinearSolverSpace::ImproperInputParameters; - - /* evaluate the function at the starting point */ - /* and calculate its norm. */ - nfev = 1; - if ( functor(x, fvec) < 0) - return HybridNonLinearSolverSpace::UserAsked; - fnorm = fvec.stableNorm(); - - /* initialize iteration counter and monitors. */ - iter = 1; - ncsuc = 0; - ncfail = 0; - nslow1 = 0; - nslow2 = 0; - - return HybridNonLinearSolverSpace::Running; -} - -template<typename FunctorType, typename Scalar> -HybridNonLinearSolverSpace::Status -HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(FVectorType &x) -{ - using std::abs; - - eigen_assert(x.size()==n); // check the caller is not cheating us - - Index j; - std::vector<JacobiRotation<Scalar> > v_givens(n), w_givens(n); - - jeval = true; - - /* calculate the jacobian matrix. */ - if ( functor.df(x, fjac) < 0) - return HybridNonLinearSolverSpace::UserAsked; - ++njev; - - wa2 = fjac.colwise().blueNorm(); - - /* on the first iteration and if external scaling is not used, scale according */ - /* to the norms of the columns of the initial jacobian. */ - if (iter == 1) { - if (!useExternalScaling) - for (j = 0; j < n; ++j) - diag[j] = (wa2[j]==0.) ? 1. : wa2[j]; - - /* on the first iteration, calculate the norm of the scaled x */ - /* and initialize the step bound delta. */ - xnorm = diag.cwiseProduct(x).stableNorm(); - delta = parameters.factor * xnorm; - if (delta == 0.) - delta = parameters.factor; - } - - /* compute the qr factorization of the jacobian. */ - HouseholderQR<JacobianType> qrfac(fjac); // no pivoting: - - /* copy the triangular factor of the qr factorization into r. */ - R = qrfac.matrixQR(); - - /* accumulate the orthogonal factor in fjac. */ - fjac = qrfac.householderQ(); - - /* form (q transpose)*fvec and store in qtf. */ - qtf = fjac.transpose() * fvec; - - /* rescale if necessary. */ - if (!useExternalScaling) - diag = diag.cwiseMax(wa2); - - while (true) { - /* determine the direction p. */ - internal::dogleg<Scalar>(R, diag, qtf, delta, wa1); - - /* store the direction p and x + p. calculate the norm of p. */ - wa1 = -wa1; - wa2 = x + wa1; - pnorm = diag.cwiseProduct(wa1).stableNorm(); - - /* on the first iteration, adjust the initial step bound. */ - if (iter == 1) - delta = (std::min)(delta,pnorm); - - /* evaluate the function at x + p and calculate its norm. */ - if ( functor(wa2, wa4) < 0) - return HybridNonLinearSolverSpace::UserAsked; - ++nfev; - fnorm1 = wa4.stableNorm(); - - /* compute the scaled actual reduction. */ - actred = -1.; - if (fnorm1 < fnorm) /* Computing 2nd power */ - actred = 1. - numext::abs2(fnorm1 / fnorm); - - /* compute the scaled predicted reduction. */ - wa3 = R.template triangularView<Upper>()*wa1 + qtf; - temp = wa3.stableNorm(); - prered = 0.; - if (temp < fnorm) /* Computing 2nd power */ - prered = 1. - numext::abs2(temp / fnorm); - - /* compute the ratio of the actual to the predicted reduction. */ - ratio = 0.; - if (prered > 0.) - ratio = actred / prered; - - /* update the step bound. */ - if (ratio < Scalar(.1)) { - ncsuc = 0; - ++ncfail; - delta = Scalar(.5) * delta; - } else { - ncfail = 0; - ++ncsuc; - if (ratio >= Scalar(.5) || ncsuc > 1) - delta = (std::max)(delta, pnorm / Scalar(.5)); - if (abs(ratio - 1.) <= Scalar(.1)) { - delta = pnorm / Scalar(.5); - } - } - - /* test for successful iteration. */ - if (ratio >= Scalar(1e-4)) { - /* successful iteration. update x, fvec, and their norms. */ - x = wa2; - wa2 = diag.cwiseProduct(x); - fvec = wa4; - xnorm = wa2.stableNorm(); - fnorm = fnorm1; - ++iter; - } - - /* determine the progress of the iteration. */ - ++nslow1; - if (actred >= Scalar(.001)) - nslow1 = 0; - if (jeval) - ++nslow2; - if (actred >= Scalar(.1)) - nslow2 = 0; - - /* test for convergence. */ - if (delta <= parameters.xtol * xnorm || fnorm == 0.) - return HybridNonLinearSolverSpace::RelativeErrorTooSmall; - - /* tests for termination and stringent tolerances. */ - if (nfev >= parameters.maxfev) - return HybridNonLinearSolverSpace::TooManyFunctionEvaluation; - if (Scalar(.1) * (std::max)(Scalar(.1) * delta, pnorm) <= NumTraits<Scalar>::epsilon() * xnorm) - return HybridNonLinearSolverSpace::TolTooSmall; - if (nslow2 == 5) - return HybridNonLinearSolverSpace::NotMakingProgressJacobian; - if (nslow1 == 10) - return HybridNonLinearSolverSpace::NotMakingProgressIterations; - - /* criterion for recalculating jacobian. */ - if (ncfail == 2) - break; // leave inner loop and go for the next outer loop iteration - - /* calculate the rank one modification to the jacobian */ - /* and update qtf if necessary. */ - wa1 = diag.cwiseProduct( diag.cwiseProduct(wa1)/pnorm ); - wa2 = fjac.transpose() * wa4; - if (ratio >= Scalar(1e-4)) - qtf = wa2; - wa2 = (wa2-wa3)/pnorm; - - /* compute the qr factorization of the updated jacobian. */ - internal::r1updt<Scalar>(R, wa1, v_givens, w_givens, wa2, wa3, &sing); - internal::r1mpyq<Scalar>(n, n, fjac.data(), v_givens, w_givens); - internal::r1mpyq<Scalar>(1, n, qtf.data(), v_givens, w_givens); - - jeval = false; - } - return HybridNonLinearSolverSpace::Running; -} - -template<typename FunctorType, typename Scalar> -HybridNonLinearSolverSpace::Status -HybridNonLinearSolver<FunctorType,Scalar>::solve(FVectorType &x) -{ - HybridNonLinearSolverSpace::Status status = solveInit(x); - if (status==HybridNonLinearSolverSpace::ImproperInputParameters) - return status; - while (status==HybridNonLinearSolverSpace::Running) - status = solveOneStep(x); - return status; -} - - - -template<typename FunctorType, typename Scalar> -HybridNonLinearSolverSpace::Status -HybridNonLinearSolver<FunctorType,Scalar>::hybrd1( - FVectorType &x, - const Scalar tol - ) -{ - n = x.size(); - - /* check the input parameters for errors. */ - if (n <= 0 || tol < 0.) - return HybridNonLinearSolverSpace::ImproperInputParameters; - - resetParameters(); - parameters.maxfev = 200*(n+1); - parameters.xtol = tol; - - diag.setConstant(n, 1.); - useExternalScaling = true; - return solveNumericalDiff(x); -} - -template<typename FunctorType, typename Scalar> -HybridNonLinearSolverSpace::Status -HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffInit(FVectorType &x) -{ - n = x.size(); - - if (parameters.nb_of_subdiagonals<0) parameters.nb_of_subdiagonals= n-1; - if (parameters.nb_of_superdiagonals<0) parameters.nb_of_superdiagonals= n-1; - - wa1.resize(n); wa2.resize(n); wa3.resize(n); wa4.resize(n); - qtf.resize(n); - fjac.resize(n, n); - fvec.resize(n); - if (!useExternalScaling) - diag.resize(n); - eigen_assert( (!useExternalScaling || diag.size()==n) && "When useExternalScaling is set, the caller must provide a valid 'diag'"); - - /* Function Body */ - nfev = 0; - njev = 0; - - /* check the input parameters for errors. */ - if (n <= 0 || parameters.xtol < 0. || parameters.maxfev <= 0 || parameters.nb_of_subdiagonals< 0 || parameters.nb_of_superdiagonals< 0 || parameters.factor <= 0. ) - return HybridNonLinearSolverSpace::ImproperInputParameters; - if (useExternalScaling) - for (Index j = 0; j < n; ++j) - if (diag[j] <= 0.) - return HybridNonLinearSolverSpace::ImproperInputParameters; - - /* evaluate the function at the starting point */ - /* and calculate its norm. */ - nfev = 1; - if ( functor(x, fvec) < 0) - return HybridNonLinearSolverSpace::UserAsked; - fnorm = fvec.stableNorm(); - - /* initialize iteration counter and monitors. */ - iter = 1; - ncsuc = 0; - ncfail = 0; - nslow1 = 0; - nslow2 = 0; - - return HybridNonLinearSolverSpace::Running; -} - -template<typename FunctorType, typename Scalar> -HybridNonLinearSolverSpace::Status -HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(FVectorType &x) -{ - using std::sqrt; - using std::abs; - - assert(x.size()==n); // check the caller is not cheating us - - Index j; - std::vector<JacobiRotation<Scalar> > v_givens(n), w_givens(n); - - jeval = true; - if (parameters.nb_of_subdiagonals<0) parameters.nb_of_subdiagonals= n-1; - if (parameters.nb_of_superdiagonals<0) parameters.nb_of_superdiagonals= n-1; - - /* calculate the jacobian matrix. */ - if (internal::fdjac1(functor, x, fvec, fjac, parameters.nb_of_subdiagonals, parameters.nb_of_superdiagonals, parameters.epsfcn) <0) - return HybridNonLinearSolverSpace::UserAsked; - nfev += (std::min)(parameters.nb_of_subdiagonals+parameters.nb_of_superdiagonals+ 1, n); - - wa2 = fjac.colwise().blueNorm(); - - /* on the first iteration and if external scaling is not used, scale according */ - /* to the norms of the columns of the initial jacobian. */ - if (iter == 1) { - if (!useExternalScaling) - for (j = 0; j < n; ++j) - diag[j] = (wa2[j]==0.) ? 1. : wa2[j]; - - /* on the first iteration, calculate the norm of the scaled x */ - /* and initialize the step bound delta. */ - xnorm = diag.cwiseProduct(x).stableNorm(); - delta = parameters.factor * xnorm; - if (delta == 0.) - delta = parameters.factor; - } - - /* compute the qr factorization of the jacobian. */ - HouseholderQR<JacobianType> qrfac(fjac); // no pivoting: - - /* copy the triangular factor of the qr factorization into r. */ - R = qrfac.matrixQR(); - - /* accumulate the orthogonal factor in fjac. */ - fjac = qrfac.householderQ(); - - /* form (q transpose)*fvec and store in qtf. */ - qtf = fjac.transpose() * fvec; - - /* rescale if necessary. */ - if (!useExternalScaling) - diag = diag.cwiseMax(wa2); - - while (true) { - /* determine the direction p. */ - internal::dogleg<Scalar>(R, diag, qtf, delta, wa1); - - /* store the direction p and x + p. calculate the norm of p. */ - wa1 = -wa1; - wa2 = x + wa1; - pnorm = diag.cwiseProduct(wa1).stableNorm(); - - /* on the first iteration, adjust the initial step bound. */ - if (iter == 1) - delta = (std::min)(delta,pnorm); - - /* evaluate the function at x + p and calculate its norm. */ - if ( functor(wa2, wa4) < 0) - return HybridNonLinearSolverSpace::UserAsked; - ++nfev; - fnorm1 = wa4.stableNorm(); - - /* compute the scaled actual reduction. */ - actred = -1.; - if (fnorm1 < fnorm) /* Computing 2nd power */ - actred = 1. - numext::abs2(fnorm1 / fnorm); - - /* compute the scaled predicted reduction. */ - wa3 = R.template triangularView<Upper>()*wa1 + qtf; - temp = wa3.stableNorm(); - prered = 0.; - if (temp < fnorm) /* Computing 2nd power */ - prered = 1. - numext::abs2(temp / fnorm); - - /* compute the ratio of the actual to the predicted reduction. */ - ratio = 0.; - if (prered > 0.) - ratio = actred / prered; - - /* update the step bound. */ - if (ratio < Scalar(.1)) { - ncsuc = 0; - ++ncfail; - delta = Scalar(.5) * delta; - } else { - ncfail = 0; - ++ncsuc; - if (ratio >= Scalar(.5) || ncsuc > 1) - delta = (std::max)(delta, pnorm / Scalar(.5)); - if (abs(ratio - 1.) <= Scalar(.1)) { - delta = pnorm / Scalar(.5); - } - } - - /* test for successful iteration. */ - if (ratio >= Scalar(1e-4)) { - /* successful iteration. update x, fvec, and their norms. */ - x = wa2; - wa2 = diag.cwiseProduct(x); - fvec = wa4; - xnorm = wa2.stableNorm(); - fnorm = fnorm1; - ++iter; - } - - /* determine the progress of the iteration. */ - ++nslow1; - if (actred >= Scalar(.001)) - nslow1 = 0; - if (jeval) - ++nslow2; - if (actred >= Scalar(.1)) - nslow2 = 0; - - /* test for convergence. */ - if (delta <= parameters.xtol * xnorm || fnorm == 0.) - return HybridNonLinearSolverSpace::RelativeErrorTooSmall; - - /* tests for termination and stringent tolerances. */ - if (nfev >= parameters.maxfev) - return HybridNonLinearSolverSpace::TooManyFunctionEvaluation; - if (Scalar(.1) * (std::max)(Scalar(.1) * delta, pnorm) <= NumTraits<Scalar>::epsilon() * xnorm) - return HybridNonLinearSolverSpace::TolTooSmall; - if (nslow2 == 5) - return HybridNonLinearSolverSpace::NotMakingProgressJacobian; - if (nslow1 == 10) - return HybridNonLinearSolverSpace::NotMakingProgressIterations; - - /* criterion for recalculating jacobian. */ - if (ncfail == 2) - break; // leave inner loop and go for the next outer loop iteration - - /* calculate the rank one modification to the jacobian */ - /* and update qtf if necessary. */ - wa1 = diag.cwiseProduct( diag.cwiseProduct(wa1)/pnorm ); - wa2 = fjac.transpose() * wa4; - if (ratio >= Scalar(1e-4)) - qtf = wa2; - wa2 = (wa2-wa3)/pnorm; - - /* compute the qr factorization of the updated jacobian. */ - internal::r1updt<Scalar>(R, wa1, v_givens, w_givens, wa2, wa3, &sing); - internal::r1mpyq<Scalar>(n, n, fjac.data(), v_givens, w_givens); - internal::r1mpyq<Scalar>(1, n, qtf.data(), v_givens, w_givens); - - jeval = false; - } - return HybridNonLinearSolverSpace::Running; -} - -template<typename FunctorType, typename Scalar> -HybridNonLinearSolverSpace::Status -HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiff(FVectorType &x) -{ - HybridNonLinearSolverSpace::Status status = solveNumericalDiffInit(x); - if (status==HybridNonLinearSolverSpace::ImproperInputParameters) - return status; - while (status==HybridNonLinearSolverSpace::Running) - status = solveNumericalDiffOneStep(x); - return status; -} - -} // end namespace Eigen - -#endif // EIGEN_HYBRIDNONLINEARSOLVER_H - -//vim: ai ts=4 sts=4 et sw=4 diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h deleted file mode 100644 index fe3b79c..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h +++ /dev/null @@ -1,657 +0,0 @@ -// -*- coding: utf-8 -// vim: set fileencoding=utf-8 - -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_LEVENBERGMARQUARDT__H -#define EIGEN_LEVENBERGMARQUARDT__H - -namespace Eigen { - -namespace LevenbergMarquardtSpace { - enum Status { - NotStarted = -2, - Running = -1, - ImproperInputParameters = 0, - RelativeReductionTooSmall = 1, - RelativeErrorTooSmall = 2, - RelativeErrorAndReductionTooSmall = 3, - CosinusTooSmall = 4, - TooManyFunctionEvaluation = 5, - FtolTooSmall = 6, - XtolTooSmall = 7, - GtolTooSmall = 8, - UserAsked = 9 - }; -} - - - -/** - * \ingroup NonLinearOptimization_Module - * \brief Performs non linear optimization over a non-linear function, - * using a variant of the Levenberg Marquardt algorithm. - * - * Check wikipedia for more information. - * http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm - */ -template<typename FunctorType, typename Scalar=double> -class LevenbergMarquardt -{ - static Scalar sqrt_epsilon() - { - using std::sqrt; - return sqrt(NumTraits<Scalar>::epsilon()); - } - -public: - LevenbergMarquardt(FunctorType &_functor) - : functor(_functor) { nfev = njev = iter = 0; fnorm = gnorm = 0.; useExternalScaling=false; } - - typedef DenseIndex Index; - - struct Parameters { - Parameters() - : factor(Scalar(100.)) - , maxfev(400) - , ftol(sqrt_epsilon()) - , xtol(sqrt_epsilon()) - , gtol(Scalar(0.)) - , epsfcn(Scalar(0.)) {} - Scalar factor; - Index maxfev; // maximum number of function evaluation - Scalar ftol; - Scalar xtol; - Scalar gtol; - Scalar epsfcn; - }; - - typedef Matrix< Scalar, Dynamic, 1 > FVectorType; - typedef Matrix< Scalar, Dynamic, Dynamic > JacobianType; - - LevenbergMarquardtSpace::Status lmder1( - FVectorType &x, - const Scalar tol = sqrt_epsilon() - ); - - LevenbergMarquardtSpace::Status minimize(FVectorType &x); - LevenbergMarquardtSpace::Status minimizeInit(FVectorType &x); - LevenbergMarquardtSpace::Status minimizeOneStep(FVectorType &x); - - static LevenbergMarquardtSpace::Status lmdif1( - FunctorType &functor, - FVectorType &x, - Index *nfev, - const Scalar tol = sqrt_epsilon() - ); - - LevenbergMarquardtSpace::Status lmstr1( - FVectorType &x, - const Scalar tol = sqrt_epsilon() - ); - - LevenbergMarquardtSpace::Status minimizeOptimumStorage(FVectorType &x); - LevenbergMarquardtSpace::Status minimizeOptimumStorageInit(FVectorType &x); - LevenbergMarquardtSpace::Status minimizeOptimumStorageOneStep(FVectorType &x); - - void resetParameters(void) { parameters = Parameters(); } - - Parameters parameters; - FVectorType fvec, qtf, diag; - JacobianType fjac; - PermutationMatrix<Dynamic,Dynamic> permutation; - Index nfev; - Index njev; - Index iter; - Scalar fnorm, gnorm; - bool useExternalScaling; - - Scalar lm_param(void) { return par; } -private: - - FunctorType &functor; - Index n; - Index m; - FVectorType wa1, wa2, wa3, wa4; - - Scalar par, sum; - Scalar temp, temp1, temp2; - Scalar delta; - Scalar ratio; - Scalar pnorm, xnorm, fnorm1, actred, dirder, prered; - - LevenbergMarquardt& operator=(const LevenbergMarquardt&); -}; - -template<typename FunctorType, typename Scalar> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType,Scalar>::lmder1( - FVectorType &x, - const Scalar tol - ) -{ - n = x.size(); - m = functor.values(); - - /* check the input parameters for errors. */ - if (n <= 0 || m < n || tol < 0.) - return LevenbergMarquardtSpace::ImproperInputParameters; - - resetParameters(); - parameters.ftol = tol; - parameters.xtol = tol; - parameters.maxfev = 100*(n+1); - - return minimize(x); -} - - -template<typename FunctorType, typename Scalar> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType,Scalar>::minimize(FVectorType &x) -{ - LevenbergMarquardtSpace::Status status = minimizeInit(x); - if (status==LevenbergMarquardtSpace::ImproperInputParameters) - return status; - do { - status = minimizeOneStep(x); - } while (status==LevenbergMarquardtSpace::Running); - return status; -} - -template<typename FunctorType, typename Scalar> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType,Scalar>::minimizeInit(FVectorType &x) -{ - n = x.size(); - m = functor.values(); - - wa1.resize(n); wa2.resize(n); wa3.resize(n); - wa4.resize(m); - fvec.resize(m); - fjac.resize(m, n); - if (!useExternalScaling) - diag.resize(n); - eigen_assert( (!useExternalScaling || diag.size()==n) && "When useExternalScaling is set, the caller must provide a valid 'diag'"); - qtf.resize(n); - - /* Function Body */ - nfev = 0; - njev = 0; - - /* check the input parameters for errors. */ - if (n <= 0 || m < n || parameters.ftol < 0. || parameters.xtol < 0. || parameters.gtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0.) - return LevenbergMarquardtSpace::ImproperInputParameters; - - if (useExternalScaling) - for (Index j = 0; j < n; ++j) - if (diag[j] <= 0.) - return LevenbergMarquardtSpace::ImproperInputParameters; - - /* evaluate the function at the starting point */ - /* and calculate its norm. */ - nfev = 1; - if ( functor(x, fvec) < 0) - return LevenbergMarquardtSpace::UserAsked; - fnorm = fvec.stableNorm(); - - /* initialize levenberg-marquardt parameter and iteration counter. */ - par = 0.; - iter = 1; - - return LevenbergMarquardtSpace::NotStarted; -} - -template<typename FunctorType, typename Scalar> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(FVectorType &x) -{ - using std::abs; - using std::sqrt; - - eigen_assert(x.size()==n); // check the caller is not cheating us - - /* calculate the jacobian matrix. */ - Index df_ret = functor.df(x, fjac); - if (df_ret<0) - return LevenbergMarquardtSpace::UserAsked; - if (df_ret>0) - // numerical diff, we evaluated the function df_ret times - nfev += df_ret; - else njev++; - - /* compute the qr factorization of the jacobian. */ - wa2 = fjac.colwise().blueNorm(); - ColPivHouseholderQR<JacobianType> qrfac(fjac); - fjac = qrfac.matrixQR(); - permutation = qrfac.colsPermutation(); - - /* on the first iteration and if external scaling is not used, scale according */ - /* to the norms of the columns of the initial jacobian. */ - if (iter == 1) { - if (!useExternalScaling) - for (Index j = 0; j < n; ++j) - diag[j] = (wa2[j]==0.)? 1. : wa2[j]; - - /* on the first iteration, calculate the norm of the scaled x */ - /* and initialize the step bound delta. */ - xnorm = diag.cwiseProduct(x).stableNorm(); - delta = parameters.factor * xnorm; - if (delta == 0.) - delta = parameters.factor; - } - - /* form (q transpose)*fvec and store the first n components in */ - /* qtf. */ - wa4 = fvec; - wa4.applyOnTheLeft(qrfac.householderQ().adjoint()); - qtf = wa4.head(n); - - /* compute the norm of the scaled gradient. */ - gnorm = 0.; - if (fnorm != 0.) - for (Index j = 0; j < n; ++j) - if (wa2[permutation.indices()[j]] != 0.) - gnorm = (std::max)(gnorm, abs( fjac.col(j).head(j+1).dot(qtf.head(j+1)/fnorm) / wa2[permutation.indices()[j]])); - - /* test for convergence of the gradient norm. */ - if (gnorm <= parameters.gtol) - return LevenbergMarquardtSpace::CosinusTooSmall; - - /* rescale if necessary. */ - if (!useExternalScaling) - diag = diag.cwiseMax(wa2); - - do { - - /* determine the levenberg-marquardt parameter. */ - internal::lmpar2<Scalar>(qrfac, diag, qtf, delta, par, wa1); - - /* store the direction p and x + p. calculate the norm of p. */ - wa1 = -wa1; - wa2 = x + wa1; - pnorm = diag.cwiseProduct(wa1).stableNorm(); - - /* on the first iteration, adjust the initial step bound. */ - if (iter == 1) - delta = (std::min)(delta,pnorm); - - /* evaluate the function at x + p and calculate its norm. */ - if ( functor(wa2, wa4) < 0) - return LevenbergMarquardtSpace::UserAsked; - ++nfev; - fnorm1 = wa4.stableNorm(); - - /* compute the scaled actual reduction. */ - actred = -1.; - if (Scalar(.1) * fnorm1 < fnorm) - actred = 1. - numext::abs2(fnorm1 / fnorm); - - /* compute the scaled predicted reduction and */ - /* the scaled directional derivative. */ - wa3 = fjac.template triangularView<Upper>() * (qrfac.colsPermutation().inverse() *wa1); - temp1 = numext::abs2(wa3.stableNorm() / fnorm); - temp2 = numext::abs2(sqrt(par) * pnorm / fnorm); - prered = temp1 + temp2 / Scalar(.5); - dirder = -(temp1 + temp2); - - /* compute the ratio of the actual to the predicted */ - /* reduction. */ - ratio = 0.; - if (prered != 0.) - ratio = actred / prered; - - /* update the step bound. */ - if (ratio <= Scalar(.25)) { - if (actred >= 0.) - temp = Scalar(.5); - if (actred < 0.) - temp = Scalar(.5) * dirder / (dirder + Scalar(.5) * actred); - if (Scalar(.1) * fnorm1 >= fnorm || temp < Scalar(.1)) - temp = Scalar(.1); - /* Computing MIN */ - delta = temp * (std::min)(delta, pnorm / Scalar(.1)); - par /= temp; - } else if (!(par != 0. && ratio < Scalar(.75))) { - delta = pnorm / Scalar(.5); - par = Scalar(.5) * par; - } - - /* test for successful iteration. */ - if (ratio >= Scalar(1e-4)) { - /* successful iteration. update x, fvec, and their norms. */ - x = wa2; - wa2 = diag.cwiseProduct(x); - fvec = wa4; - xnorm = wa2.stableNorm(); - fnorm = fnorm1; - ++iter; - } - - /* tests for convergence. */ - if (abs(actred) <= parameters.ftol && prered <= parameters.ftol && Scalar(.5) * ratio <= 1. && delta <= parameters.xtol * xnorm) - return LevenbergMarquardtSpace::RelativeErrorAndReductionTooSmall; - if (abs(actred) <= parameters.ftol && prered <= parameters.ftol && Scalar(.5) * ratio <= 1.) - return LevenbergMarquardtSpace::RelativeReductionTooSmall; - if (delta <= parameters.xtol * xnorm) - return LevenbergMarquardtSpace::RelativeErrorTooSmall; - - /* tests for termination and stringent tolerances. */ - if (nfev >= parameters.maxfev) - return LevenbergMarquardtSpace::TooManyFunctionEvaluation; - if (abs(actred) <= NumTraits<Scalar>::epsilon() && prered <= NumTraits<Scalar>::epsilon() && Scalar(.5) * ratio <= 1.) - return LevenbergMarquardtSpace::FtolTooSmall; - if (delta <= NumTraits<Scalar>::epsilon() * xnorm) - return LevenbergMarquardtSpace::XtolTooSmall; - if (gnorm <= NumTraits<Scalar>::epsilon()) - return LevenbergMarquardtSpace::GtolTooSmall; - - } while (ratio < Scalar(1e-4)); - - return LevenbergMarquardtSpace::Running; -} - -template<typename FunctorType, typename Scalar> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType,Scalar>::lmstr1( - FVectorType &x, - const Scalar tol - ) -{ - n = x.size(); - m = functor.values(); - - /* check the input parameters for errors. */ - if (n <= 0 || m < n || tol < 0.) - return LevenbergMarquardtSpace::ImproperInputParameters; - - resetParameters(); - parameters.ftol = tol; - parameters.xtol = tol; - parameters.maxfev = 100*(n+1); - - return minimizeOptimumStorage(x); -} - -template<typename FunctorType, typename Scalar> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageInit(FVectorType &x) -{ - n = x.size(); - m = functor.values(); - - wa1.resize(n); wa2.resize(n); wa3.resize(n); - wa4.resize(m); - fvec.resize(m); - // Only R is stored in fjac. Q is only used to compute 'qtf', which is - // Q.transpose()*rhs. qtf will be updated using givens rotation, - // instead of storing them in Q. - // The purpose it to only use a nxn matrix, instead of mxn here, so - // that we can handle cases where m>>n : - fjac.resize(n, n); - if (!useExternalScaling) - diag.resize(n); - eigen_assert( (!useExternalScaling || diag.size()==n) && "When useExternalScaling is set, the caller must provide a valid 'diag'"); - qtf.resize(n); - - /* Function Body */ - nfev = 0; - njev = 0; - - /* check the input parameters for errors. */ - if (n <= 0 || m < n || parameters.ftol < 0. || parameters.xtol < 0. || parameters.gtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0.) - return LevenbergMarquardtSpace::ImproperInputParameters; - - if (useExternalScaling) - for (Index j = 0; j < n; ++j) - if (diag[j] <= 0.) - return LevenbergMarquardtSpace::ImproperInputParameters; - - /* evaluate the function at the starting point */ - /* and calculate its norm. */ - nfev = 1; - if ( functor(x, fvec) < 0) - return LevenbergMarquardtSpace::UserAsked; - fnorm = fvec.stableNorm(); - - /* initialize levenberg-marquardt parameter and iteration counter. */ - par = 0.; - iter = 1; - - return LevenbergMarquardtSpace::NotStarted; -} - - -template<typename FunctorType, typename Scalar> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(FVectorType &x) -{ - using std::abs; - using std::sqrt; - - eigen_assert(x.size()==n); // check the caller is not cheating us - - Index i, j; - bool sing; - - /* compute the qr factorization of the jacobian matrix */ - /* calculated one row at a time, while simultaneously */ - /* forming (q transpose)*fvec and storing the first */ - /* n components in qtf. */ - qtf.fill(0.); - fjac.fill(0.); - Index rownb = 2; - for (i = 0; i < m; ++i) { - if (functor.df(x, wa3, rownb) < 0) return LevenbergMarquardtSpace::UserAsked; - internal::rwupdt<Scalar>(fjac, wa3, qtf, fvec[i]); - ++rownb; - } - ++njev; - - /* if the jacobian is rank deficient, call qrfac to */ - /* reorder its columns and update the components of qtf. */ - sing = false; - for (j = 0; j < n; ++j) { - if (fjac(j,j) == 0.) - sing = true; - wa2[j] = fjac.col(j).head(j).stableNorm(); - } - permutation.setIdentity(n); - if (sing) { - wa2 = fjac.colwise().blueNorm(); - // TODO We have no unit test covering this code path, do not modify - // until it is carefully tested - ColPivHouseholderQR<JacobianType> qrfac(fjac); - fjac = qrfac.matrixQR(); - wa1 = fjac.diagonal(); - fjac.diagonal() = qrfac.hCoeffs(); - permutation = qrfac.colsPermutation(); - // TODO : avoid this: - for(Index ii=0; ii< fjac.cols(); ii++) fjac.col(ii).segment(ii+1, fjac.rows()-ii-1) *= fjac(ii,ii); // rescale vectors - - for (j = 0; j < n; ++j) { - if (fjac(j,j) != 0.) { - sum = 0.; - for (i = j; i < n; ++i) - sum += fjac(i,j) * qtf[i]; - temp = -sum / fjac(j,j); - for (i = j; i < n; ++i) - qtf[i] += fjac(i,j) * temp; - } - fjac(j,j) = wa1[j]; - } - } - - /* on the first iteration and if external scaling is not used, scale according */ - /* to the norms of the columns of the initial jacobian. */ - if (iter == 1) { - if (!useExternalScaling) - for (j = 0; j < n; ++j) - diag[j] = (wa2[j]==0.)? 1. : wa2[j]; - - /* on the first iteration, calculate the norm of the scaled x */ - /* and initialize the step bound delta. */ - xnorm = diag.cwiseProduct(x).stableNorm(); - delta = parameters.factor * xnorm; - if (delta == 0.) - delta = parameters.factor; - } - - /* compute the norm of the scaled gradient. */ - gnorm = 0.; - if (fnorm != 0.) - for (j = 0; j < n; ++j) - if (wa2[permutation.indices()[j]] != 0.) - gnorm = (std::max)(gnorm, abs( fjac.col(j).head(j+1).dot(qtf.head(j+1)/fnorm) / wa2[permutation.indices()[j]])); - - /* test for convergence of the gradient norm. */ - if (gnorm <= parameters.gtol) - return LevenbergMarquardtSpace::CosinusTooSmall; - - /* rescale if necessary. */ - if (!useExternalScaling) - diag = diag.cwiseMax(wa2); - - do { - - /* determine the levenberg-marquardt parameter. */ - internal::lmpar<Scalar>(fjac, permutation.indices(), diag, qtf, delta, par, wa1); - - /* store the direction p and x + p. calculate the norm of p. */ - wa1 = -wa1; - wa2 = x + wa1; - pnorm = diag.cwiseProduct(wa1).stableNorm(); - - /* on the first iteration, adjust the initial step bound. */ - if (iter == 1) - delta = (std::min)(delta,pnorm); - - /* evaluate the function at x + p and calculate its norm. */ - if ( functor(wa2, wa4) < 0) - return LevenbergMarquardtSpace::UserAsked; - ++nfev; - fnorm1 = wa4.stableNorm(); - - /* compute the scaled actual reduction. */ - actred = -1.; - if (Scalar(.1) * fnorm1 < fnorm) - actred = 1. - numext::abs2(fnorm1 / fnorm); - - /* compute the scaled predicted reduction and */ - /* the scaled directional derivative. */ - wa3 = fjac.topLeftCorner(n,n).template triangularView<Upper>() * (permutation.inverse() * wa1); - temp1 = numext::abs2(wa3.stableNorm() / fnorm); - temp2 = numext::abs2(sqrt(par) * pnorm / fnorm); - prered = temp1 + temp2 / Scalar(.5); - dirder = -(temp1 + temp2); - - /* compute the ratio of the actual to the predicted */ - /* reduction. */ - ratio = 0.; - if (prered != 0.) - ratio = actred / prered; - - /* update the step bound. */ - if (ratio <= Scalar(.25)) { - if (actred >= 0.) - temp = Scalar(.5); - if (actred < 0.) - temp = Scalar(.5) * dirder / (dirder + Scalar(.5) * actred); - if (Scalar(.1) * fnorm1 >= fnorm || temp < Scalar(.1)) - temp = Scalar(.1); - /* Computing MIN */ - delta = temp * (std::min)(delta, pnorm / Scalar(.1)); - par /= temp; - } else if (!(par != 0. && ratio < Scalar(.75))) { - delta = pnorm / Scalar(.5); - par = Scalar(.5) * par; - } - - /* test for successful iteration. */ - if (ratio >= Scalar(1e-4)) { - /* successful iteration. update x, fvec, and their norms. */ - x = wa2; - wa2 = diag.cwiseProduct(x); - fvec = wa4; - xnorm = wa2.stableNorm(); - fnorm = fnorm1; - ++iter; - } - - /* tests for convergence. */ - if (abs(actred) <= parameters.ftol && prered <= parameters.ftol && Scalar(.5) * ratio <= 1. && delta <= parameters.xtol * xnorm) - return LevenbergMarquardtSpace::RelativeErrorAndReductionTooSmall; - if (abs(actred) <= parameters.ftol && prered <= parameters.ftol && Scalar(.5) * ratio <= 1.) - return LevenbergMarquardtSpace::RelativeReductionTooSmall; - if (delta <= parameters.xtol * xnorm) - return LevenbergMarquardtSpace::RelativeErrorTooSmall; - - /* tests for termination and stringent tolerances. */ - if (nfev >= parameters.maxfev) - return LevenbergMarquardtSpace::TooManyFunctionEvaluation; - if (abs(actred) <= NumTraits<Scalar>::epsilon() && prered <= NumTraits<Scalar>::epsilon() && Scalar(.5) * ratio <= 1.) - return LevenbergMarquardtSpace::FtolTooSmall; - if (delta <= NumTraits<Scalar>::epsilon() * xnorm) - return LevenbergMarquardtSpace::XtolTooSmall; - if (gnorm <= NumTraits<Scalar>::epsilon()) - return LevenbergMarquardtSpace::GtolTooSmall; - - } while (ratio < Scalar(1e-4)); - - return LevenbergMarquardtSpace::Running; -} - -template<typename FunctorType, typename Scalar> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorage(FVectorType &x) -{ - LevenbergMarquardtSpace::Status status = minimizeOptimumStorageInit(x); - if (status==LevenbergMarquardtSpace::ImproperInputParameters) - return status; - do { - status = minimizeOptimumStorageOneStep(x); - } while (status==LevenbergMarquardtSpace::Running); - return status; -} - -template<typename FunctorType, typename Scalar> -LevenbergMarquardtSpace::Status -LevenbergMarquardt<FunctorType,Scalar>::lmdif1( - FunctorType &functor, - FVectorType &x, - Index *nfev, - const Scalar tol - ) -{ - Index n = x.size(); - Index m = functor.values(); - - /* check the input parameters for errors. */ - if (n <= 0 || m < n || tol < 0.) - return LevenbergMarquardtSpace::ImproperInputParameters; - - NumericalDiff<FunctorType> numDiff(functor); - // embedded LevenbergMarquardt - LevenbergMarquardt<NumericalDiff<FunctorType>, Scalar > lm(numDiff); - lm.parameters.ftol = tol; - lm.parameters.xtol = tol; - lm.parameters.maxfev = 200*(n+1); - - LevenbergMarquardtSpace::Status info = LevenbergMarquardtSpace::Status(lm.minimize(x)); - if (nfev) - * nfev = lm.nfev; - return info; -} - -} // end namespace Eigen - -#endif // EIGEN_LEVENBERGMARQUARDT__H - -//vim: ai ts=4 sts=4 et sw=4 diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h deleted file mode 100644 index db8ff7d..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h +++ /dev/null @@ -1,66 +0,0 @@ -#define chkder_log10e 0.43429448190325182765 -#define chkder_factor 100. - -namespace Eigen { - -namespace internal { - -template<typename Scalar> -void chkder( - const Matrix< Scalar, Dynamic, 1 > &x, - const Matrix< Scalar, Dynamic, 1 > &fvec, - const Matrix< Scalar, Dynamic, Dynamic > &fjac, - Matrix< Scalar, Dynamic, 1 > &xp, - const Matrix< Scalar, Dynamic, 1 > &fvecp, - int mode, - Matrix< Scalar, Dynamic, 1 > &err - ) -{ - using std::sqrt; - using std::abs; - using std::log; - - typedef DenseIndex Index; - - const Scalar eps = sqrt(NumTraits<Scalar>::epsilon()); - const Scalar epsf = chkder_factor * NumTraits<Scalar>::epsilon(); - const Scalar epslog = chkder_log10e * log(eps); - Scalar temp; - - const Index m = fvec.size(), n = x.size(); - - if (mode != 2) { - /* mode = 1. */ - xp.resize(n); - for (Index j = 0; j < n; ++j) { - temp = eps * abs(x[j]); - if (temp == 0.) - temp = eps; - xp[j] = x[j] + temp; - } - } - else { - /* mode = 2. */ - err.setZero(m); - for (Index j = 0; j < n; ++j) { - temp = abs(x[j]); - if (temp == 0.) - temp = 1.; - err += temp * fjac.col(j); - } - for (Index i = 0; i < m; ++i) { - temp = 1.; - if (fvec[i] != 0. && fvecp[i] != 0. && abs(fvecp[i] - fvec[i]) >= epsf * abs(fvec[i])) - temp = eps * abs((fvecp[i] - fvec[i]) / eps - err[i]) / (abs(fvec[i]) + abs(fvecp[i])); - err[i] = 1.; - if (temp > NumTraits<Scalar>::epsilon() && temp < eps) - err[i] = (chkder_log10e * log(temp) - epslog) / epslog; - if (temp >= eps) - err[i] = 0.; - } - } -} - -} // end namespace internal - -} // end namespace Eigen diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h deleted file mode 100644 index 68260d1..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h +++ /dev/null @@ -1,70 +0,0 @@ -namespace Eigen { - -namespace internal { - -template <typename Scalar> -void covar( - Matrix< Scalar, Dynamic, Dynamic > &r, - const VectorXi &ipvt, - Scalar tol = std::sqrt(NumTraits<Scalar>::epsilon()) ) -{ - using std::abs; - typedef DenseIndex Index; - - /* Local variables */ - Index i, j, k, l, ii, jj; - bool sing; - Scalar temp; - - /* Function Body */ - const Index n = r.cols(); - const Scalar tolr = tol * abs(r(0,0)); - Matrix< Scalar, Dynamic, 1 > wa(n); - eigen_assert(ipvt.size()==n); - - /* form the inverse of r in the full upper triangle of r. */ - l = -1; - for (k = 0; k < n; ++k) - if (abs(r(k,k)) > tolr) { - r(k,k) = 1. / r(k,k); - for (j = 0; j <= k-1; ++j) { - temp = r(k,k) * r(j,k); - r(j,k) = 0.; - r.col(k).head(j+1) -= r.col(j).head(j+1) * temp; - } - l = k; - } - - /* form the full upper triangle of the inverse of (r transpose)*r */ - /* in the full upper triangle of r. */ - for (k = 0; k <= l; ++k) { - for (j = 0; j <= k-1; ++j) - r.col(j).head(j+1) += r.col(k).head(j+1) * r(j,k); - r.col(k).head(k+1) *= r(k,k); - } - - /* form the full lower triangle of the covariance matrix */ - /* in the strict lower triangle of r and in wa. */ - for (j = 0; j < n; ++j) { - jj = ipvt[j]; - sing = j > l; - for (i = 0; i <= j; ++i) { - if (sing) - r(i,j) = 0.; - ii = ipvt[i]; - if (ii > jj) - r(ii,jj) = r(i,j); - if (ii < jj) - r(jj,ii) = r(i,j); - } - wa[jj] = r(j,j); - } - - /* symmetrize the covariance matrix in r. */ - r.topLeftCorner(n,n).template triangularView<StrictlyUpper>() = r.topLeftCorner(n,n).transpose(); - r.diagonal() = wa; -} - -} // end namespace internal - -} // end namespace Eigen diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h deleted file mode 100644 index 80c5d27..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h +++ /dev/null @@ -1,107 +0,0 @@ -namespace Eigen { - -namespace internal { - -template <typename Scalar> -void dogleg( - const Matrix< Scalar, Dynamic, Dynamic > &qrfac, - const Matrix< Scalar, Dynamic, 1 > &diag, - const Matrix< Scalar, Dynamic, 1 > &qtb, - Scalar delta, - Matrix< Scalar, Dynamic, 1 > &x) -{ - using std::abs; - using std::sqrt; - - typedef DenseIndex Index; - - /* Local variables */ - Index i, j; - Scalar sum, temp, alpha, bnorm; - Scalar gnorm, qnorm; - Scalar sgnorm; - - /* Function Body */ - const Scalar epsmch = NumTraits<Scalar>::epsilon(); - const Index n = qrfac.cols(); - eigen_assert(n==qtb.size()); - eigen_assert(n==x.size()); - eigen_assert(n==diag.size()); - Matrix< Scalar, Dynamic, 1 > wa1(n), wa2(n); - - /* first, calculate the gauss-newton direction. */ - for (j = n-1; j >=0; --j) { - temp = qrfac(j,j); - if (temp == 0.) { - temp = epsmch * qrfac.col(j).head(j+1).maxCoeff(); - if (temp == 0.) - temp = epsmch; - } - if (j==n-1) - x[j] = qtb[j] / temp; - else - x[j] = (qtb[j] - qrfac.row(j).tail(n-j-1).dot(x.tail(n-j-1))) / temp; - } - - /* test whether the gauss-newton direction is acceptable. */ - qnorm = diag.cwiseProduct(x).stableNorm(); - if (qnorm <= delta) - return; - - // TODO : this path is not tested by Eigen unit tests - - /* the gauss-newton direction is not acceptable. */ - /* next, calculate the scaled gradient direction. */ - - wa1.fill(0.); - for (j = 0; j < n; ++j) { - wa1.tail(n-j) += qrfac.row(j).tail(n-j) * qtb[j]; - wa1[j] /= diag[j]; - } - - /* calculate the norm of the scaled gradient and test for */ - /* the special case in which the scaled gradient is zero. */ - gnorm = wa1.stableNorm(); - sgnorm = 0.; - alpha = delta / qnorm; - if (gnorm == 0.) - goto algo_end; - - /* calculate the point along the scaled gradient */ - /* at which the quadratic is minimized. */ - wa1.array() /= (diag*gnorm).array(); - // TODO : once unit tests cover this part,: - // wa2 = qrfac.template triangularView<Upper>() * wa1; - for (j = 0; j < n; ++j) { - sum = 0.; - for (i = j; i < n; ++i) { - sum += qrfac(j,i) * wa1[i]; - } - wa2[j] = sum; - } - temp = wa2.stableNorm(); - sgnorm = gnorm / temp / temp; - - /* test whether the scaled gradient direction is acceptable. */ - alpha = 0.; - if (sgnorm >= delta) - goto algo_end; - - /* the scaled gradient direction is not acceptable. */ - /* finally, calculate the point along the dogleg */ - /* at which the quadratic is minimized. */ - bnorm = qtb.stableNorm(); - temp = bnorm / gnorm * (bnorm / qnorm) * (sgnorm / delta); - temp = temp - delta / qnorm * numext::abs2(sgnorm / delta) + sqrt(numext::abs2(temp - delta / qnorm) + (1.-numext::abs2(delta / qnorm)) * (1.-numext::abs2(sgnorm / delta))); - alpha = delta / qnorm * (1. - numext::abs2(sgnorm / delta)) / temp; -algo_end: - - /* form appropriate convex combination of the gauss-newton */ - /* direction and the scaled gradient direction. */ - temp = (1.-alpha) * (std::min)(sgnorm,delta); - x = temp * wa1 + alpha * x; -} - -} // end namespace internal - -} // end namespace Eigen diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h deleted file mode 100644 index bb7cf26..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h +++ /dev/null @@ -1,79 +0,0 @@ -namespace Eigen { - -namespace internal { - -template<typename FunctorType, typename Scalar> -DenseIndex fdjac1( - const FunctorType &Functor, - Matrix< Scalar, Dynamic, 1 > &x, - Matrix< Scalar, Dynamic, 1 > &fvec, - Matrix< Scalar, Dynamic, Dynamic > &fjac, - DenseIndex ml, DenseIndex mu, - Scalar epsfcn) -{ - using std::sqrt; - using std::abs; - - typedef DenseIndex Index; - - /* Local variables */ - Scalar h; - Index j, k; - Scalar eps, temp; - Index msum; - int iflag; - Index start, length; - - /* Function Body */ - const Scalar epsmch = NumTraits<Scalar>::epsilon(); - const Index n = x.size(); - eigen_assert(fvec.size()==n); - Matrix< Scalar, Dynamic, 1 > wa1(n); - Matrix< Scalar, Dynamic, 1 > wa2(n); - - eps = sqrt((std::max)(epsfcn,epsmch)); - msum = ml + mu + 1; - if (msum >= n) { - /* computation of dense approximate jacobian. */ - for (j = 0; j < n; ++j) { - temp = x[j]; - h = eps * abs(temp); - if (h == 0.) - h = eps; - x[j] = temp + h; - iflag = Functor(x, wa1); - if (iflag < 0) - return iflag; - x[j] = temp; - fjac.col(j) = (wa1-fvec)/h; - } - - }else { - /* computation of banded approximate jacobian. */ - for (k = 0; k < msum; ++k) { - for (j = k; (msum<0) ? (j>n): (j<n); j += msum) { - wa2[j] = x[j]; - h = eps * abs(wa2[j]); - if (h == 0.) h = eps; - x[j] = wa2[j] + h; - } - iflag = Functor(x, wa1); - if (iflag < 0) - return iflag; - for (j = k; (msum<0) ? (j>n): (j<n); j += msum) { - x[j] = wa2[j]; - h = eps * abs(wa2[j]); - if (h == 0.) h = eps; - fjac.col(j).setZero(); - start = std::max<Index>(0,j-mu); - length = (std::min)(n-1, j+ml) - start + 1; - fjac.col(j).segment(start, length) = ( wa1.segment(start, length)-fvec.segment(start, length))/h; - } - } - } - return 0; -} - -} // end namespace internal - -} // end namespace Eigen diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h deleted file mode 100644 index 4c17d4c..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h +++ /dev/null @@ -1,298 +0,0 @@ -namespace Eigen { - -namespace internal { - -template <typename Scalar> -void lmpar( - Matrix< Scalar, Dynamic, Dynamic > &r, - const VectorXi &ipvt, - const Matrix< Scalar, Dynamic, 1 > &diag, - const Matrix< Scalar, Dynamic, 1 > &qtb, - Scalar delta, - Scalar &par, - Matrix< Scalar, Dynamic, 1 > &x) -{ - using std::abs; - using std::sqrt; - typedef DenseIndex Index; - - /* Local variables */ - Index i, j, l; - Scalar fp; - Scalar parc, parl; - Index iter; - Scalar temp, paru; - Scalar gnorm; - Scalar dxnorm; - - - /* Function Body */ - const Scalar dwarf = (std::numeric_limits<Scalar>::min)(); - const Index n = r.cols(); - eigen_assert(n==diag.size()); - eigen_assert(n==qtb.size()); - eigen_assert(n==x.size()); - - Matrix< Scalar, Dynamic, 1 > wa1, wa2; - - /* compute and store in x the gauss-newton direction. if the */ - /* jacobian is rank-deficient, obtain a least squares solution. */ - Index nsing = n-1; - wa1 = qtb; - for (j = 0; j < n; ++j) { - if (r(j,j) == 0. && nsing == n-1) - nsing = j - 1; - if (nsing < n-1) - wa1[j] = 0.; - } - for (j = nsing; j>=0; --j) { - wa1[j] /= r(j,j); - temp = wa1[j]; - for (i = 0; i < j ; ++i) - wa1[i] -= r(i,j) * temp; - } - - for (j = 0; j < n; ++j) - x[ipvt[j]] = wa1[j]; - - /* initialize the iteration counter. */ - /* evaluate the function at the origin, and test */ - /* for acceptance of the gauss-newton direction. */ - iter = 0; - wa2 = diag.cwiseProduct(x); - dxnorm = wa2.blueNorm(); - fp = dxnorm - delta; - if (fp <= Scalar(0.1) * delta) { - par = 0; - return; - } - - /* if the jacobian is not rank deficient, the newton */ - /* step provides a lower bound, parl, for the zero of */ - /* the function. otherwise set this bound to zero. */ - parl = 0.; - if (nsing >= n-1) { - for (j = 0; j < n; ++j) { - l = ipvt[j]; - wa1[j] = diag[l] * (wa2[l] / dxnorm); - } - // it's actually a triangularView.solveInplace(), though in a weird - // way: - for (j = 0; j < n; ++j) { - Scalar sum = 0.; - for (i = 0; i < j; ++i) - sum += r(i,j) * wa1[i]; - wa1[j] = (wa1[j] - sum) / r(j,j); - } - temp = wa1.blueNorm(); - parl = fp / delta / temp / temp; - } - - /* calculate an upper bound, paru, for the zero of the function. */ - for (j = 0; j < n; ++j) - wa1[j] = r.col(j).head(j+1).dot(qtb.head(j+1)) / diag[ipvt[j]]; - - gnorm = wa1.stableNorm(); - paru = gnorm / delta; - if (paru == 0.) - paru = dwarf / (std::min)(delta,Scalar(0.1)); - - /* if the input par lies outside of the interval (parl,paru), */ - /* set par to the closer endpoint. */ - par = (std::max)(par,parl); - par = (std::min)(par,paru); - if (par == 0.) - par = gnorm / dxnorm; - - /* beginning of an iteration. */ - while (true) { - ++iter; - - /* evaluate the function at the current value of par. */ - if (par == 0.) - par = (std::max)(dwarf,Scalar(.001) * paru); /* Computing MAX */ - wa1 = sqrt(par)* diag; - - Matrix< Scalar, Dynamic, 1 > sdiag(n); - qrsolv<Scalar>(r, ipvt, wa1, qtb, x, sdiag); - - wa2 = diag.cwiseProduct(x); - dxnorm = wa2.blueNorm(); - temp = fp; - fp = dxnorm - delta; - - /* if the function is small enough, accept the current value */ - /* of par. also test for the exceptional cases where parl */ - /* is zero or the number of iterations has reached 10. */ - if (abs(fp) <= Scalar(0.1) * delta || (parl == 0. && fp <= temp && temp < 0.) || iter == 10) - break; - - /* compute the newton correction. */ - for (j = 0; j < n; ++j) { - l = ipvt[j]; - wa1[j] = diag[l] * (wa2[l] / dxnorm); - } - for (j = 0; j < n; ++j) { - wa1[j] /= sdiag[j]; - temp = wa1[j]; - for (i = j+1; i < n; ++i) - wa1[i] -= r(i,j) * temp; - } - temp = wa1.blueNorm(); - parc = fp / delta / temp / temp; - - /* depending on the sign of the function, update parl or paru. */ - if (fp > 0.) - parl = (std::max)(parl,par); - if (fp < 0.) - paru = (std::min)(paru,par); - - /* compute an improved estimate for par. */ - /* Computing MAX */ - par = (std::max)(parl,par+parc); - - /* end of an iteration. */ - } - - /* termination. */ - if (iter == 0) - par = 0.; - return; -} - -template <typename Scalar> -void lmpar2( - const ColPivHouseholderQR<Matrix< Scalar, Dynamic, Dynamic> > &qr, - const Matrix< Scalar, Dynamic, 1 > &diag, - const Matrix< Scalar, Dynamic, 1 > &qtb, - Scalar delta, - Scalar &par, - Matrix< Scalar, Dynamic, 1 > &x) - -{ - using std::sqrt; - using std::abs; - typedef DenseIndex Index; - - /* Local variables */ - Index j; - Scalar fp; - Scalar parc, parl; - Index iter; - Scalar temp, paru; - Scalar gnorm; - Scalar dxnorm; - - - /* Function Body */ - const Scalar dwarf = (std::numeric_limits<Scalar>::min)(); - const Index n = qr.matrixQR().cols(); - eigen_assert(n==diag.size()); - eigen_assert(n==qtb.size()); - - Matrix< Scalar, Dynamic, 1 > wa1, wa2; - - /* compute and store in x the gauss-newton direction. if the */ - /* jacobian is rank-deficient, obtain a least squares solution. */ - -// const Index rank = qr.nonzeroPivots(); // exactly double(0.) - const Index rank = qr.rank(); // use a threshold - wa1 = qtb; - wa1.tail(n-rank).setZero(); - qr.matrixQR().topLeftCorner(rank, rank).template triangularView<Upper>().solveInPlace(wa1.head(rank)); - - x = qr.colsPermutation()*wa1; - - /* initialize the iteration counter. */ - /* evaluate the function at the origin, and test */ - /* for acceptance of the gauss-newton direction. */ - iter = 0; - wa2 = diag.cwiseProduct(x); - dxnorm = wa2.blueNorm(); - fp = dxnorm - delta; - if (fp <= Scalar(0.1) * delta) { - par = 0; - return; - } - - /* if the jacobian is not rank deficient, the newton */ - /* step provides a lower bound, parl, for the zero of */ - /* the function. otherwise set this bound to zero. */ - parl = 0.; - if (rank==n) { - wa1 = qr.colsPermutation().inverse() * diag.cwiseProduct(wa2)/dxnorm; - qr.matrixQR().topLeftCorner(n, n).transpose().template triangularView<Lower>().solveInPlace(wa1); - temp = wa1.blueNorm(); - parl = fp / delta / temp / temp; - } - - /* calculate an upper bound, paru, for the zero of the function. */ - for (j = 0; j < n; ++j) - wa1[j] = qr.matrixQR().col(j).head(j+1).dot(qtb.head(j+1)) / diag[qr.colsPermutation().indices()(j)]; - - gnorm = wa1.stableNorm(); - paru = gnorm / delta; - if (paru == 0.) - paru = dwarf / (std::min)(delta,Scalar(0.1)); - - /* if the input par lies outside of the interval (parl,paru), */ - /* set par to the closer endpoint. */ - par = (std::max)(par,parl); - par = (std::min)(par,paru); - if (par == 0.) - par = gnorm / dxnorm; - - /* beginning of an iteration. */ - Matrix< Scalar, Dynamic, Dynamic > s = qr.matrixQR(); - while (true) { - ++iter; - - /* evaluate the function at the current value of par. */ - if (par == 0.) - par = (std::max)(dwarf,Scalar(.001) * paru); /* Computing MAX */ - wa1 = sqrt(par)* diag; - - Matrix< Scalar, Dynamic, 1 > sdiag(n); - qrsolv<Scalar>(s, qr.colsPermutation().indices(), wa1, qtb, x, sdiag); - - wa2 = diag.cwiseProduct(x); - dxnorm = wa2.blueNorm(); - temp = fp; - fp = dxnorm - delta; - - /* if the function is small enough, accept the current value */ - /* of par. also test for the exceptional cases where parl */ - /* is zero or the number of iterations has reached 10. */ - if (abs(fp) <= Scalar(0.1) * delta || (parl == 0. && fp <= temp && temp < 0.) || iter == 10) - break; - - /* compute the newton correction. */ - wa1 = qr.colsPermutation().inverse() * diag.cwiseProduct(wa2/dxnorm); - // we could almost use this here, but the diagonal is outside qr, in sdiag[] - // qr.matrixQR().topLeftCorner(n, n).transpose().template triangularView<Lower>().solveInPlace(wa1); - for (j = 0; j < n; ++j) { - wa1[j] /= sdiag[j]; - temp = wa1[j]; - for (Index i = j+1; i < n; ++i) - wa1[i] -= s(i,j) * temp; - } - temp = wa1.blueNorm(); - parc = fp / delta / temp / temp; - - /* depending on the sign of the function, update parl or paru. */ - if (fp > 0.) - parl = (std::max)(parl,par); - if (fp < 0.) - paru = (std::min)(paru,par); - - /* compute an improved estimate for par. */ - par = (std::max)(parl,par+parc); - } - if (iter == 0) - par = 0.; - return; -} - -} // end namespace internal - -} // end namespace Eigen diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h deleted file mode 100644 index feafd62..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h +++ /dev/null @@ -1,91 +0,0 @@ -namespace Eigen { - -namespace internal { - -// TODO : once qrsolv2 is removed, use ColPivHouseholderQR or PermutationMatrix instead of ipvt -template <typename Scalar> -void qrsolv( - Matrix< Scalar, Dynamic, Dynamic > &s, - // TODO : use a PermutationMatrix once lmpar is no more: - const VectorXi &ipvt, - const Matrix< Scalar, Dynamic, 1 > &diag, - const Matrix< Scalar, Dynamic, 1 > &qtb, - Matrix< Scalar, Dynamic, 1 > &x, - Matrix< Scalar, Dynamic, 1 > &sdiag) - -{ - typedef DenseIndex Index; - - /* Local variables */ - Index i, j, k, l; - Scalar temp; - Index n = s.cols(); - Matrix< Scalar, Dynamic, 1 > wa(n); - JacobiRotation<Scalar> givens; - - /* Function Body */ - // the following will only change the lower triangular part of s, including - // the diagonal, though the diagonal is restored afterward - - /* copy r and (q transpose)*b to preserve input and initialize s. */ - /* in particular, save the diagonal elements of r in x. */ - x = s.diagonal(); - wa = qtb; - - s.topLeftCorner(n,n).template triangularView<StrictlyLower>() = s.topLeftCorner(n,n).transpose(); - - /* eliminate the diagonal matrix d using a givens rotation. */ - for (j = 0; j < n; ++j) { - - /* prepare the row of d to be eliminated, locating the */ - /* diagonal element using p from the qr factorization. */ - l = ipvt[j]; - if (diag[l] == 0.) - break; - sdiag.tail(n-j).setZero(); - sdiag[j] = diag[l]; - - /* the transformations to eliminate the row of d */ - /* modify only a single element of (q transpose)*b */ - /* beyond the first n, which is initially zero. */ - Scalar qtbpj = 0.; - for (k = j; k < n; ++k) { - /* determine a givens rotation which eliminates the */ - /* appropriate element in the current row of d. */ - givens.makeGivens(-s(k,k), sdiag[k]); - - /* compute the modified diagonal element of r and */ - /* the modified element of ((q transpose)*b,0). */ - s(k,k) = givens.c() * s(k,k) + givens.s() * sdiag[k]; - temp = givens.c() * wa[k] + givens.s() * qtbpj; - qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj; - wa[k] = temp; - - /* accumulate the tranformation in the row of s. */ - for (i = k+1; i<n; ++i) { - temp = givens.c() * s(i,k) + givens.s() * sdiag[i]; - sdiag[i] = -givens.s() * s(i,k) + givens.c() * sdiag[i]; - s(i,k) = temp; - } - } - } - - /* solve the triangular system for z. if the system is */ - /* singular, then obtain a least squares solution. */ - Index nsing; - for(nsing=0; nsing<n && sdiag[nsing]!=0; nsing++) {} - - wa.tail(n-nsing).setZero(); - s.topLeftCorner(nsing, nsing).transpose().template triangularView<Upper>().solveInPlace(wa.head(nsing)); - - // restore - sdiag = s.diagonal(); - s.diagonal() = x; - - /* permute the components of z back to components of x. */ - for (j = 0; j < n; ++j) x[ipvt[j]] = wa[j]; -} - -} // end namespace internal - -} // end namespace Eigen diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h deleted file mode 100644 index 36ff700..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h +++ /dev/null @@ -1,30 +0,0 @@ -namespace Eigen { - -namespace internal { - -// TODO : move this to GivensQR once there's such a thing in Eigen - -template <typename Scalar> -void r1mpyq(DenseIndex m, DenseIndex n, Scalar *a, const std::vector<JacobiRotation<Scalar> > &v_givens, const std::vector<JacobiRotation<Scalar> > &w_givens) -{ - typedef DenseIndex Index; - - /* apply the first set of givens rotations to a. */ - for (Index j = n-2; j>=0; --j) - for (Index i = 0; i<m; ++i) { - Scalar temp = v_givens[j].c() * a[i+m*j] - v_givens[j].s() * a[i+m*(n-1)]; - a[i+m*(n-1)] = v_givens[j].s() * a[i+m*j] + v_givens[j].c() * a[i+m*(n-1)]; - a[i+m*j] = temp; - } - /* apply the second set of givens rotations to a. */ - for (Index j = 0; j<n-1; ++j) - for (Index i = 0; i<m; ++i) { - Scalar temp = w_givens[j].c() * a[i+m*j] + w_givens[j].s() * a[i+m*(n-1)]; - a[i+m*(n-1)] = -w_givens[j].s() * a[i+m*j] + w_givens[j].c() * a[i+m*(n-1)]; - a[i+m*j] = temp; - } -} - -} // end namespace internal - -} // end namespace Eigen diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h deleted file mode 100644 index f287660..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h +++ /dev/null @@ -1,99 +0,0 @@ -namespace Eigen { - -namespace internal { - -template <typename Scalar> -void r1updt( - Matrix< Scalar, Dynamic, Dynamic > &s, - const Matrix< Scalar, Dynamic, 1> &u, - std::vector<JacobiRotation<Scalar> > &v_givens, - std::vector<JacobiRotation<Scalar> > &w_givens, - Matrix< Scalar, Dynamic, 1> &v, - Matrix< Scalar, Dynamic, 1> &w, - bool *sing) -{ - typedef DenseIndex Index; - const JacobiRotation<Scalar> IdentityRotation = JacobiRotation<Scalar>(1,0); - - /* Local variables */ - const Index m = s.rows(); - const Index n = s.cols(); - Index i, j=1; - Scalar temp; - JacobiRotation<Scalar> givens; - - // r1updt had a broader usecase, but we dont use it here. And, more - // importantly, we can not test it. - eigen_assert(m==n); - eigen_assert(u.size()==m); - eigen_assert(v.size()==n); - eigen_assert(w.size()==n); - - /* move the nontrivial part of the last column of s into w. */ - w[n-1] = s(n-1,n-1); - - /* rotate the vector v into a multiple of the n-th unit vector */ - /* in such a way that a spike is introduced into w. */ - for (j=n-2; j>=0; --j) { - w[j] = 0.; - if (v[j] != 0.) { - /* determine a givens rotation which eliminates the */ - /* j-th element of v. */ - givens.makeGivens(-v[n-1], v[j]); - - /* apply the transformation to v and store the information */ - /* necessary to recover the givens rotation. */ - v[n-1] = givens.s() * v[j] + givens.c() * v[n-1]; - v_givens[j] = givens; - - /* apply the transformation to s and extend the spike in w. */ - for (i = j; i < m; ++i) { - temp = givens.c() * s(j,i) - givens.s() * w[i]; - w[i] = givens.s() * s(j,i) + givens.c() * w[i]; - s(j,i) = temp; - } - } else - v_givens[j] = IdentityRotation; - } - - /* add the spike from the rank 1 update to w. */ - w += v[n-1] * u; - - /* eliminate the spike. */ - *sing = false; - for (j = 0; j < n-1; ++j) { - if (w[j] != 0.) { - /* determine a givens rotation which eliminates the */ - /* j-th element of the spike. */ - givens.makeGivens(-s(j,j), w[j]); - - /* apply the transformation to s and reduce the spike in w. */ - for (i = j; i < m; ++i) { - temp = givens.c() * s(j,i) + givens.s() * w[i]; - w[i] = -givens.s() * s(j,i) + givens.c() * w[i]; - s(j,i) = temp; - } - - /* store the information necessary to recover the */ - /* givens rotation. */ - w_givens[j] = givens; - } else - v_givens[j] = IdentityRotation; - - /* test for zero diagonal elements in the output s. */ - if (s(j,j) == 0.) { - *sing = true; - } - } - /* move w back into the last column of the output s. */ - s(n-1,n-1) = w[n-1]; - - if (s(j,j) == 0.) { - *sing = true; - } - return; -} - -} // end namespace internal - -} // end namespace Eigen diff --git a/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h b/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h deleted file mode 100644 index 6ebf856..0000000 --- a/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h +++ /dev/null @@ -1,49 +0,0 @@ -namespace Eigen { - -namespace internal { - -template <typename Scalar> -void rwupdt( - Matrix< Scalar, Dynamic, Dynamic > &r, - const Matrix< Scalar, Dynamic, 1> &w, - Matrix< Scalar, Dynamic, 1> &b, - Scalar alpha) -{ - typedef DenseIndex Index; - - const Index n = r.cols(); - eigen_assert(r.rows()>=n); - std::vector<JacobiRotation<Scalar> > givens(n); - - /* Local variables */ - Scalar temp, rowj; - - /* Function Body */ - for (Index j = 0; j < n; ++j) { - rowj = w[j]; - - /* apply the previous transformations to */ - /* r(i,j), i=0,1,...,j-1, and to w(j). */ - for (Index i = 0; i < j; ++i) { - temp = givens[i].c() * r(i,j) + givens[i].s() * rowj; - rowj = -givens[i].s() * r(i,j) + givens[i].c() * rowj; - r(i,j) = temp; - } - - /* determine a givens rotation which eliminates w(j). */ - givens[j].makeGivens(-r(j,j), rowj); - - if (rowj == 0.) - continue; // givens[j] is identity - - /* apply the current transformation to r(j,j), b(j), and alpha. */ - r(j,j) = givens[j].c() * r(j,j) + givens[j].s() * rowj; - temp = givens[j].c() * b[j] + givens[j].s() * alpha; - alpha = -givens[j].s() * b[j] + givens[j].c() * alpha; - b[j] = temp; - } -} - -} // end namespace internal - -} // end namespace Eigen diff --git a/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h b/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h deleted file mode 100644 index ea5d8bc..0000000 --- a/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h +++ /dev/null @@ -1,130 +0,0 @@ -// -*- coding: utf-8 -// vim: set fileencoding=utf-8 - -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Thomas Capricelli <orzel@freehackers.org> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_NUMERICAL_DIFF_H -#define EIGEN_NUMERICAL_DIFF_H - -namespace Eigen { - -enum NumericalDiffMode { - Forward, - Central -}; - - -/** - * This class allows you to add a method df() to your functor, which will - * use numerical differentiation to compute an approximate of the - * derivative for the functor. Of course, if you have an analytical form - * for the derivative, you should rather implement df() by yourself. - * - * More information on - * http://en.wikipedia.org/wiki/Numerical_differentiation - * - * Currently only "Forward" and "Central" scheme are implemented. - */ -template<typename _Functor, NumericalDiffMode mode=Forward> -class NumericalDiff : public _Functor -{ -public: - typedef _Functor Functor; - typedef typename Functor::Scalar Scalar; - typedef typename Functor::InputType InputType; - typedef typename Functor::ValueType ValueType; - typedef typename Functor::JacobianType JacobianType; - - NumericalDiff(Scalar _epsfcn=0.) : Functor(), epsfcn(_epsfcn) {} - NumericalDiff(const Functor& f, Scalar _epsfcn=0.) : Functor(f), epsfcn(_epsfcn) {} - - // forward constructors - template<typename T0> - NumericalDiff(const T0& a0) : Functor(a0), epsfcn(0) {} - template<typename T0, typename T1> - NumericalDiff(const T0& a0, const T1& a1) : Functor(a0, a1), epsfcn(0) {} - template<typename T0, typename T1, typename T2> - NumericalDiff(const T0& a0, const T1& a1, const T2& a2) : Functor(a0, a1, a2), epsfcn(0) {} - - enum { - InputsAtCompileTime = Functor::InputsAtCompileTime, - ValuesAtCompileTime = Functor::ValuesAtCompileTime - }; - - /** - * return the number of evaluation of functor - */ - int df(const InputType& _x, JacobianType &jac) const - { - using std::sqrt; - using std::abs; - /* Local variables */ - Scalar h; - int nfev=0; - const typename InputType::Index n = _x.size(); - const Scalar eps = sqrt(((std::max)(epsfcn,NumTraits<Scalar>::epsilon() ))); - ValueType val1, val2; - InputType x = _x; - // TODO : we should do this only if the size is not already known - val1.resize(Functor::values()); - val2.resize(Functor::values()); - - // initialization - switch(mode) { - case Forward: - // compute f(x) - Functor::operator()(x, val1); nfev++; - break; - case Central: - // do nothing - break; - default: - eigen_assert(false); - }; - - // Function Body - for (int j = 0; j < n; ++j) { - h = eps * abs(x[j]); - if (h == 0.) { - h = eps; - } - switch(mode) { - case Forward: - x[j] += h; - Functor::operator()(x, val2); - nfev++; - x[j] = _x[j]; - jac.col(j) = (val2-val1)/h; - break; - case Central: - x[j] += h; - Functor::operator()(x, val2); nfev++; - x[j] -= 2*h; - Functor::operator()(x, val1); nfev++; - x[j] = _x[j]; - jac.col(j) = (val2-val1)/(2*h); - break; - default: - eigen_assert(false); - }; - } - return nfev; - } -private: - Scalar epsfcn; - - NumericalDiff& operator=(const NumericalDiff&); -}; - -} // end namespace Eigen - -//vim: ai ts=4 sts=4 et sw=4 -#endif // EIGEN_NUMERICAL_DIFF_H - diff --git a/eigen/unsupported/Eigen/src/Polynomials/Companion.h b/eigen/unsupported/Eigen/src/Polynomials/Companion.h deleted file mode 100644 index b515c29..0000000 --- a/eigen/unsupported/Eigen/src/Polynomials/Companion.h +++ /dev/null @@ -1,276 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Manuel Yguel <manuel.yguel@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_COMPANION_H -#define EIGEN_COMPANION_H - -// This file requires the user to include -// * Eigen/Core -// * Eigen/src/PolynomialSolver.h - -namespace Eigen { - -namespace internal { - -#ifndef EIGEN_PARSED_BY_DOXYGEN - -template <typename T> -T radix(){ return 2; } - -template <typename T> -T radix2(){ return radix<T>()*radix<T>(); } - -template<int Size> -struct decrement_if_fixed_size -{ - enum { - ret = (Size == Dynamic) ? Dynamic : Size-1 }; -}; - -#endif - -template< typename _Scalar, int _Deg > -class companion -{ - public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Deg==Dynamic ? Dynamic : _Deg) - - enum { - Deg = _Deg, - Deg_1=decrement_if_fixed_size<Deg>::ret - }; - - typedef _Scalar Scalar; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef Matrix<Scalar, Deg, 1> RightColumn; - //typedef DiagonalMatrix< Scalar, Deg_1, Deg_1 > BottomLeftDiagonal; - typedef Matrix<Scalar, Deg_1, 1> BottomLeftDiagonal; - - typedef Matrix<Scalar, Deg, Deg> DenseCompanionMatrixType; - typedef Matrix< Scalar, _Deg, Deg_1 > LeftBlock; - typedef Matrix< Scalar, Deg_1, Deg_1 > BottomLeftBlock; - typedef Matrix< Scalar, 1, Deg_1 > LeftBlockFirstRow; - - typedef DenseIndex Index; - - public: - EIGEN_STRONG_INLINE const _Scalar operator()(Index row, Index col ) const - { - if( m_bl_diag.rows() > col ) - { - if( 0 < row ){ return m_bl_diag[col]; } - else{ return 0; } - } - else{ return m_monic[row]; } - } - - public: - template<typename VectorType> - void setPolynomial( const VectorType& poly ) - { - const Index deg = poly.size()-1; - m_monic = -1/poly[deg] * poly.head(deg); - //m_bl_diag.setIdentity( deg-1 ); - m_bl_diag.setOnes(deg-1); - } - - template<typename VectorType> - companion( const VectorType& poly ){ - setPolynomial( poly ); } - - public: - DenseCompanionMatrixType denseMatrix() const - { - const Index deg = m_monic.size(); - const Index deg_1 = deg-1; - DenseCompanionMatrixType companion(deg,deg); - companion << - ( LeftBlock(deg,deg_1) - << LeftBlockFirstRow::Zero(1,deg_1), - BottomLeftBlock::Identity(deg-1,deg-1)*m_bl_diag.asDiagonal() ).finished() - , m_monic; - return companion; - } - - - - protected: - /** Helper function for the balancing algorithm. - * \returns true if the row and the column, having colNorm and rowNorm - * as norms, are balanced, false otherwise. - * colB and rowB are repectively the multipliers for - * the column and the row in order to balance them. - * */ - bool balanced( Scalar colNorm, Scalar rowNorm, - bool& isBalanced, Scalar& colB, Scalar& rowB ); - - /** Helper function for the balancing algorithm. - * \returns true if the row and the column, having colNorm and rowNorm - * as norms, are balanced, false otherwise. - * colB and rowB are repectively the multipliers for - * the column and the row in order to balance them. - * */ - bool balancedR( Scalar colNorm, Scalar rowNorm, - bool& isBalanced, Scalar& colB, Scalar& rowB ); - - public: - /** - * Balancing algorithm from B. N. PARLETT and C. REINSCH (1969) - * "Balancing a matrix for calculation of eigenvalues and eigenvectors" - * adapted to the case of companion matrices. - * A matrix with non zero row and non zero column is balanced - * for a certain norm if the i-th row and the i-th column - * have same norm for all i. - */ - void balance(); - - protected: - RightColumn m_monic; - BottomLeftDiagonal m_bl_diag; -}; - - - -template< typename _Scalar, int _Deg > -inline -bool companion<_Scalar,_Deg>::balanced( Scalar colNorm, Scalar rowNorm, - bool& isBalanced, Scalar& colB, Scalar& rowB ) -{ - if( Scalar(0) == colNorm || Scalar(0) == rowNorm ){ return true; } - else - { - //To find the balancing coefficients, if the radix is 2, - //one finds \f$ \sigma \f$ such that - // \f$ 2^{2\sigma-1} < rowNorm / colNorm \le 2^{2\sigma+1} \f$ - // then the balancing coefficient for the row is \f$ 1/2^{\sigma} \f$ - // and the balancing coefficient for the column is \f$ 2^{\sigma} \f$ - rowB = rowNorm / radix<Scalar>(); - colB = Scalar(1); - const Scalar s = colNorm + rowNorm; - - while (colNorm < rowB) - { - colB *= radix<Scalar>(); - colNorm *= radix2<Scalar>(); - } - - rowB = rowNorm * radix<Scalar>(); - - while (colNorm >= rowB) - { - colB /= radix<Scalar>(); - colNorm /= radix2<Scalar>(); - } - - //This line is used to avoid insubstantial balancing - if ((rowNorm + colNorm) < Scalar(0.95) * s * colB) - { - isBalanced = false; - rowB = Scalar(1) / colB; - return false; - } - else{ - return true; } - } -} - -template< typename _Scalar, int _Deg > -inline -bool companion<_Scalar,_Deg>::balancedR( Scalar colNorm, Scalar rowNorm, - bool& isBalanced, Scalar& colB, Scalar& rowB ) -{ - if( Scalar(0) == colNorm || Scalar(0) == rowNorm ){ return true; } - else - { - /** - * Set the norm of the column and the row to the geometric mean - * of the row and column norm - */ - const _Scalar q = colNorm/rowNorm; - if( !isApprox( q, _Scalar(1) ) ) - { - rowB = sqrt( colNorm/rowNorm ); - colB = Scalar(1)/rowB; - - isBalanced = false; - return false; - } - else{ - return true; } - } -} - - -template< typename _Scalar, int _Deg > -void companion<_Scalar,_Deg>::balance() -{ - using std::abs; - EIGEN_STATIC_ASSERT( Deg == Dynamic || 1 < Deg, YOU_MADE_A_PROGRAMMING_MISTAKE ); - const Index deg = m_monic.size(); - const Index deg_1 = deg-1; - - bool hasConverged=false; - while( !hasConverged ) - { - hasConverged = true; - Scalar colNorm,rowNorm; - Scalar colB,rowB; - - //First row, first column excluding the diagonal - //============================================== - colNorm = abs(m_bl_diag[0]); - rowNorm = abs(m_monic[0]); - - //Compute balancing of the row and the column - if( !balanced( colNorm, rowNorm, hasConverged, colB, rowB ) ) - { - m_bl_diag[0] *= colB; - m_monic[0] *= rowB; - } - - //Middle rows and columns excluding the diagonal - //============================================== - for( Index i=1; i<deg_1; ++i ) - { - // column norm, excluding the diagonal - colNorm = abs(m_bl_diag[i]); - - // row norm, excluding the diagonal - rowNorm = abs(m_bl_diag[i-1]) + abs(m_monic[i]); - - //Compute balancing of the row and the column - if( !balanced( colNorm, rowNorm, hasConverged, colB, rowB ) ) - { - m_bl_diag[i] *= colB; - m_bl_diag[i-1] *= rowB; - m_monic[i] *= rowB; - } - } - - //Last row, last column excluding the diagonal - //============================================ - const Index ebl = m_bl_diag.size()-1; - VectorBlock<RightColumn,Deg_1> headMonic( m_monic, 0, deg_1 ); - colNorm = headMonic.array().abs().sum(); - rowNorm = abs( m_bl_diag[ebl] ); - - //Compute balancing of the row and the column - if( !balanced( colNorm, rowNorm, hasConverged, colB, rowB ) ) - { - headMonic *= colB; - m_bl_diag[ebl] *= rowB; - } - } -} - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_COMPANION_H diff --git a/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h b/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h deleted file mode 100644 index 03198ec..0000000 --- a/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h +++ /dev/null @@ -1,406 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Manuel Yguel <manuel.yguel@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_POLYNOMIAL_SOLVER_H -#define EIGEN_POLYNOMIAL_SOLVER_H - -namespace Eigen { - -/** \ingroup Polynomials_Module - * \class PolynomialSolverBase. - * - * \brief Defined to be inherited by polynomial solvers: it provides - * convenient methods such as - * - real roots, - * - greatest, smallest complex roots, - * - real roots with greatest, smallest absolute real value, - * - greatest, smallest real roots. - * - * It stores the set of roots as a vector of complexes. - * - */ -template< typename _Scalar, int _Deg > -class PolynomialSolverBase -{ - public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Deg==Dynamic ? Dynamic : _Deg) - - typedef _Scalar Scalar; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef std::complex<RealScalar> RootType; - typedef Matrix<RootType,_Deg,1> RootsType; - - typedef DenseIndex Index; - - protected: - template< typename OtherPolynomial > - inline void setPolynomial( const OtherPolynomial& poly ){ - m_roots.resize(poly.size()-1); } - - public: - template< typename OtherPolynomial > - inline PolynomialSolverBase( const OtherPolynomial& poly ){ - setPolynomial( poly() ); } - - inline PolynomialSolverBase(){} - - public: - /** \returns the complex roots of the polynomial */ - inline const RootsType& roots() const { return m_roots; } - - public: - /** Clear and fills the back insertion sequence with the real roots of the polynomial - * i.e. the real part of the complex roots that have an imaginary part which - * absolute value is smaller than absImaginaryThreshold. - * absImaginaryThreshold takes the dummy_precision associated - * with the _Scalar template parameter of the PolynomialSolver class as the default value. - * - * \param[out] bi_seq : the back insertion sequence (stl concept) - * \param[in] absImaginaryThreshold : the maximum bound of the imaginary part of a complex - * number that is considered as real. - * */ - template<typename Stl_back_insertion_sequence> - inline void realRoots( Stl_back_insertion_sequence& bi_seq, - const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const - { - using std::abs; - bi_seq.clear(); - for(Index i=0; i<m_roots.size(); ++i ) - { - if( abs( m_roots[i].imag() ) < absImaginaryThreshold ){ - bi_seq.push_back( m_roots[i].real() ); } - } - } - - protected: - template<typename squaredNormBinaryPredicate> - inline const RootType& selectComplexRoot_withRespectToNorm( squaredNormBinaryPredicate& pred ) const - { - Index res=0; - RealScalar norm2 = numext::abs2( m_roots[0] ); - for( Index i=1; i<m_roots.size(); ++i ) - { - const RealScalar currNorm2 = numext::abs2( m_roots[i] ); - if( pred( currNorm2, norm2 ) ){ - res=i; norm2=currNorm2; } - } - return m_roots[res]; - } - - public: - /** - * \returns the complex root with greatest norm. - */ - inline const RootType& greatestRoot() const - { - std::greater<Scalar> greater; - return selectComplexRoot_withRespectToNorm( greater ); - } - - /** - * \returns the complex root with smallest norm. - */ - inline const RootType& smallestRoot() const - { - std::less<Scalar> less; - return selectComplexRoot_withRespectToNorm( less ); - } - - protected: - template<typename squaredRealPartBinaryPredicate> - inline const RealScalar& selectRealRoot_withRespectToAbsRealPart( - squaredRealPartBinaryPredicate& pred, - bool& hasArealRoot, - const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const - { - using std::abs; - hasArealRoot = false; - Index res=0; - RealScalar abs2(0); - - for( Index i=0; i<m_roots.size(); ++i ) - { - if( abs( m_roots[i].imag() ) < absImaginaryThreshold ) - { - if( !hasArealRoot ) - { - hasArealRoot = true; - res = i; - abs2 = m_roots[i].real() * m_roots[i].real(); - } - else - { - const RealScalar currAbs2 = m_roots[i].real() * m_roots[i].real(); - if( pred( currAbs2, abs2 ) ) - { - abs2 = currAbs2; - res = i; - } - } - } - else - { - if( abs( m_roots[i].imag() ) < abs( m_roots[res].imag() ) ){ - res = i; } - } - } - return numext::real_ref(m_roots[res]); - } - - - template<typename RealPartBinaryPredicate> - inline const RealScalar& selectRealRoot_withRespectToRealPart( - RealPartBinaryPredicate& pred, - bool& hasArealRoot, - const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const - { - using std::abs; - hasArealRoot = false; - Index res=0; - RealScalar val(0); - - for( Index i=0; i<m_roots.size(); ++i ) - { - if( abs( m_roots[i].imag() ) < absImaginaryThreshold ) - { - if( !hasArealRoot ) - { - hasArealRoot = true; - res = i; - val = m_roots[i].real(); - } - else - { - const RealScalar curr = m_roots[i].real(); - if( pred( curr, val ) ) - { - val = curr; - res = i; - } - } - } - else - { - if( abs( m_roots[i].imag() ) < abs( m_roots[res].imag() ) ){ - res = i; } - } - } - return numext::real_ref(m_roots[res]); - } - - public: - /** - * \returns a real root with greatest absolute magnitude. - * A real root is defined as the real part of a complex root with absolute imaginary - * part smallest than absImaginaryThreshold. - * absImaginaryThreshold takes the dummy_precision associated - * with the _Scalar template parameter of the PolynomialSolver class as the default value. - * If no real root is found the boolean hasArealRoot is set to false and the real part of - * the root with smallest absolute imaginary part is returned instead. - * - * \param[out] hasArealRoot : boolean true if a real root is found according to the - * absImaginaryThreshold criterion, false otherwise. - * \param[in] absImaginaryThreshold : threshold on the absolute imaginary part to decide - * whether or not a root is real. - */ - inline const RealScalar& absGreatestRealRoot( - bool& hasArealRoot, - const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const - { - std::greater<Scalar> greater; - return selectRealRoot_withRespectToAbsRealPart( greater, hasArealRoot, absImaginaryThreshold ); - } - - - /** - * \returns a real root with smallest absolute magnitude. - * A real root is defined as the real part of a complex root with absolute imaginary - * part smallest than absImaginaryThreshold. - * absImaginaryThreshold takes the dummy_precision associated - * with the _Scalar template parameter of the PolynomialSolver class as the default value. - * If no real root is found the boolean hasArealRoot is set to false and the real part of - * the root with smallest absolute imaginary part is returned instead. - * - * \param[out] hasArealRoot : boolean true if a real root is found according to the - * absImaginaryThreshold criterion, false otherwise. - * \param[in] absImaginaryThreshold : threshold on the absolute imaginary part to decide - * whether or not a root is real. - */ - inline const RealScalar& absSmallestRealRoot( - bool& hasArealRoot, - const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const - { - std::less<Scalar> less; - return selectRealRoot_withRespectToAbsRealPart( less, hasArealRoot, absImaginaryThreshold ); - } - - - /** - * \returns the real root with greatest value. - * A real root is defined as the real part of a complex root with absolute imaginary - * part smallest than absImaginaryThreshold. - * absImaginaryThreshold takes the dummy_precision associated - * with the _Scalar template parameter of the PolynomialSolver class as the default value. - * If no real root is found the boolean hasArealRoot is set to false and the real part of - * the root with smallest absolute imaginary part is returned instead. - * - * \param[out] hasArealRoot : boolean true if a real root is found according to the - * absImaginaryThreshold criterion, false otherwise. - * \param[in] absImaginaryThreshold : threshold on the absolute imaginary part to decide - * whether or not a root is real. - */ - inline const RealScalar& greatestRealRoot( - bool& hasArealRoot, - const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const - { - std::greater<Scalar> greater; - return selectRealRoot_withRespectToRealPart( greater, hasArealRoot, absImaginaryThreshold ); - } - - - /** - * \returns the real root with smallest value. - * A real root is defined as the real part of a complex root with absolute imaginary - * part smallest than absImaginaryThreshold. - * absImaginaryThreshold takes the dummy_precision associated - * with the _Scalar template parameter of the PolynomialSolver class as the default value. - * If no real root is found the boolean hasArealRoot is set to false and the real part of - * the root with smallest absolute imaginary part is returned instead. - * - * \param[out] hasArealRoot : boolean true if a real root is found according to the - * absImaginaryThreshold criterion, false otherwise. - * \param[in] absImaginaryThreshold : threshold on the absolute imaginary part to decide - * whether or not a root is real. - */ - inline const RealScalar& smallestRealRoot( - bool& hasArealRoot, - const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const - { - std::less<Scalar> less; - return selectRealRoot_withRespectToRealPart( less, hasArealRoot, absImaginaryThreshold ); - } - - protected: - RootsType m_roots; -}; - -#define EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES( BASE ) \ - typedef typename BASE::Scalar Scalar; \ - typedef typename BASE::RealScalar RealScalar; \ - typedef typename BASE::RootType RootType; \ - typedef typename BASE::RootsType RootsType; - - - -/** \ingroup Polynomials_Module - * - * \class PolynomialSolver - * - * \brief A polynomial solver - * - * Computes the complex roots of a real polynomial. - * - * \param _Scalar the scalar type, i.e., the type of the polynomial coefficients - * \param _Deg the degree of the polynomial, can be a compile time value or Dynamic. - * Notice that the number of polynomial coefficients is _Deg+1. - * - * This class implements a polynomial solver and provides convenient methods such as - * - real roots, - * - greatest, smallest complex roots, - * - real roots with greatest, smallest absolute real value. - * - greatest, smallest real roots. - * - * WARNING: this polynomial solver is experimental, part of the unsupported Eigen modules. - * - * - * Currently a QR algorithm is used to compute the eigenvalues of the companion matrix of - * the polynomial to compute its roots. - * This supposes that the complex moduli of the roots are all distinct: e.g. there should - * be no multiple roots or conjugate roots for instance. - * With 32bit (float) floating types this problem shows up frequently. - * However, almost always, correct accuracy is reached even in these cases for 64bit - * (double) floating types and small polynomial degree (<20). - */ -template< typename _Scalar, int _Deg > -class PolynomialSolver : public PolynomialSolverBase<_Scalar,_Deg> -{ - public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Deg==Dynamic ? Dynamic : _Deg) - - typedef PolynomialSolverBase<_Scalar,_Deg> PS_Base; - EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES( PS_Base ) - - typedef Matrix<Scalar,_Deg,_Deg> CompanionMatrixType; - typedef EigenSolver<CompanionMatrixType> EigenSolverType; - - public: - /** Computes the complex roots of a new polynomial. */ - template< typename OtherPolynomial > - void compute( const OtherPolynomial& poly ) - { - eigen_assert( Scalar(0) != poly[poly.size()-1] ); - eigen_assert( poly.size() > 1 ); - if(poly.size() > 2 ) - { - internal::companion<Scalar,_Deg> companion( poly ); - companion.balance(); - m_eigenSolver.compute( companion.denseMatrix() ); - m_roots = m_eigenSolver.eigenvalues(); - } - else if(poly.size () == 2) - { - m_roots.resize(1); - m_roots[0] = -poly[0]/poly[1]; - } - } - - public: - template< typename OtherPolynomial > - inline PolynomialSolver( const OtherPolynomial& poly ){ - compute( poly ); } - - inline PolynomialSolver(){} - - protected: - using PS_Base::m_roots; - EigenSolverType m_eigenSolver; -}; - - -template< typename _Scalar > -class PolynomialSolver<_Scalar,1> : public PolynomialSolverBase<_Scalar,1> -{ - public: - typedef PolynomialSolverBase<_Scalar,1> PS_Base; - EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES( PS_Base ) - - public: - /** Computes the complex roots of a new polynomial. */ - template< typename OtherPolynomial > - void compute( const OtherPolynomial& poly ) - { - eigen_assert( poly.size() == 2 ); - eigen_assert( Scalar(0) != poly[1] ); - m_roots[0] = -poly[0]/poly[1]; - } - - public: - template< typename OtherPolynomial > - inline PolynomialSolver( const OtherPolynomial& poly ){ - compute( poly ); } - - inline PolynomialSolver(){} - - protected: - using PS_Base::m_roots; -}; - -} // end namespace Eigen - -#endif // EIGEN_POLYNOMIAL_SOLVER_H diff --git a/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h b/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h deleted file mode 100644 index 394e857..0000000 --- a/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h +++ /dev/null @@ -1,143 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Manuel Yguel <manuel.yguel@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_POLYNOMIAL_UTILS_H -#define EIGEN_POLYNOMIAL_UTILS_H - -namespace Eigen { - -/** \ingroup Polynomials_Module - * \returns the evaluation of the polynomial at x using Horner algorithm. - * - * \param[in] poly : the vector of coefficients of the polynomial ordered - * by degrees i.e. poly[i] is the coefficient of degree i of the polynomial - * e.g. \f$ 1 + 3x^2 \f$ is stored as a vector \f$ [ 1, 0, 3 ] \f$. - * \param[in] x : the value to evaluate the polynomial at. - * - * \note for stability: - * \f$ |x| \le 1 \f$ - */ -template <typename Polynomials, typename T> -inline -T poly_eval_horner( const Polynomials& poly, const T& x ) -{ - T val=poly[poly.size()-1]; - for(DenseIndex i=poly.size()-2; i>=0; --i ){ - val = val*x + poly[i]; } - return val; -} - -/** \ingroup Polynomials_Module - * \returns the evaluation of the polynomial at x using stabilized Horner algorithm. - * - * \param[in] poly : the vector of coefficients of the polynomial ordered - * by degrees i.e. poly[i] is the coefficient of degree i of the polynomial - * e.g. \f$ 1 + 3x^2 \f$ is stored as a vector \f$ [ 1, 0, 3 ] \f$. - * \param[in] x : the value to evaluate the polynomial at. - */ -template <typename Polynomials, typename T> -inline -T poly_eval( const Polynomials& poly, const T& x ) -{ - typedef typename NumTraits<T>::Real Real; - - if( numext::abs2( x ) <= Real(1) ){ - return poly_eval_horner( poly, x ); } - else - { - T val=poly[0]; - T inv_x = T(1)/x; - for( DenseIndex i=1; i<poly.size(); ++i ){ - val = val*inv_x + poly[i]; } - - return numext::pow(x,(T)(poly.size()-1)) * val; - } -} - -/** \ingroup Polynomials_Module - * \returns a maximum bound for the absolute value of any root of the polynomial. - * - * \param[in] poly : the vector of coefficients of the polynomial ordered - * by degrees i.e. poly[i] is the coefficient of degree i of the polynomial - * e.g. \f$ 1 + 3x^2 \f$ is stored as a vector \f$ [ 1, 0, 3 ] \f$. - * - * \pre - * the leading coefficient of the input polynomial poly must be non zero - */ -template <typename Polynomial> -inline -typename NumTraits<typename Polynomial::Scalar>::Real cauchy_max_bound( const Polynomial& poly ) -{ - using std::abs; - typedef typename Polynomial::Scalar Scalar; - typedef typename NumTraits<Scalar>::Real Real; - - eigen_assert( Scalar(0) != poly[poly.size()-1] ); - const Scalar inv_leading_coeff = Scalar(1)/poly[poly.size()-1]; - Real cb(0); - - for( DenseIndex i=0; i<poly.size()-1; ++i ){ - cb += abs(poly[i]*inv_leading_coeff); } - return cb + Real(1); -} - -/** \ingroup Polynomials_Module - * \returns a minimum bound for the absolute value of any non zero root of the polynomial. - * \param[in] poly : the vector of coefficients of the polynomial ordered - * by degrees i.e. poly[i] is the coefficient of degree i of the polynomial - * e.g. \f$ 1 + 3x^2 \f$ is stored as a vector \f$ [ 1, 0, 3 ] \f$. - */ -template <typename Polynomial> -inline -typename NumTraits<typename Polynomial::Scalar>::Real cauchy_min_bound( const Polynomial& poly ) -{ - using std::abs; - typedef typename Polynomial::Scalar Scalar; - typedef typename NumTraits<Scalar>::Real Real; - - DenseIndex i=0; - while( i<poly.size()-1 && Scalar(0) == poly(i) ){ ++i; } - if( poly.size()-1 == i ){ - return Real(1); } - - const Scalar inv_min_coeff = Scalar(1)/poly[i]; - Real cb(1); - for( DenseIndex j=i+1; j<poly.size(); ++j ){ - cb += abs(poly[j]*inv_min_coeff); } - return Real(1)/cb; -} - -/** \ingroup Polynomials_Module - * Given the roots of a polynomial compute the coefficients in the - * monomial basis of the monic polynomial with same roots and minimal degree. - * If RootVector is a vector of complexes, Polynomial should also be a vector - * of complexes. - * \param[in] rv : a vector containing the roots of a polynomial. - * \param[out] poly : the vector of coefficients of the polynomial ordered - * by degrees i.e. poly[i] is the coefficient of degree i of the polynomial - * e.g. \f$ 3 + x^2 \f$ is stored as a vector \f$ [ 3, 0, 1 ] \f$. - */ -template <typename RootVector, typename Polynomial> -void roots_to_monicPolynomial( const RootVector& rv, Polynomial& poly ) -{ - - typedef typename Polynomial::Scalar Scalar; - - poly.setZero( rv.size()+1 ); - poly[0] = -rv[0]; poly[1] = Scalar(1); - for( DenseIndex i=1; i< rv.size(); ++i ) - { - for( DenseIndex j=i+1; j>0; --j ){ poly[j] = poly[j-1] - rv[i]*poly[j]; } - poly[0] = -rv[i]*poly[0]; - } -} - -} // end namespace Eigen - -#endif // EIGEN_POLYNOMIAL_UTILS_H diff --git a/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h b/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h deleted file mode 100644 index a1f54ed..0000000 --- a/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h +++ /dev/null @@ -1,352 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Guillaume Saupin <guillaume.saupin@cea.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SKYLINEINPLACELU_H -#define EIGEN_SKYLINEINPLACELU_H - -namespace Eigen { - -/** \ingroup Skyline_Module - * - * \class SkylineInplaceLU - * - * \brief Inplace LU decomposition of a skyline matrix and associated features - * - * \param MatrixType the type of the matrix of which we are computing the LU factorization - * - */ -template<typename MatrixType> -class SkylineInplaceLU { -protected: - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - - typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; - -public: - - /** Creates a LU object and compute the respective factorization of \a matrix using - * flags \a flags. */ - SkylineInplaceLU(MatrixType& matrix, int flags = 0) - : /*m_matrix(matrix.rows(), matrix.cols()),*/ m_flags(flags), m_status(0), m_lu(matrix) { - m_precision = RealScalar(0.1) * Eigen::dummy_precision<RealScalar > (); - m_lu.IsRowMajor ? computeRowMajor() : compute(); - } - - /** Sets the relative threshold value used to prune zero coefficients during the decomposition. - * - * Setting a value greater than zero speeds up computation, and yields to an imcomplete - * factorization with fewer non zero coefficients. Such approximate factors are especially - * useful to initialize an iterative solver. - * - * Note that the exact meaning of this parameter might depends on the actual - * backend. Moreover, not all backends support this feature. - * - * \sa precision() */ - void setPrecision(RealScalar v) { - m_precision = v; - } - - /** \returns the current precision. - * - * \sa setPrecision() */ - RealScalar precision() const { - return m_precision; - } - - /** Sets the flags. Possible values are: - * - CompleteFactorization - * - IncompleteFactorization - * - MemoryEfficient - * - one of the ordering methods - * - etc... - * - * \sa flags() */ - void setFlags(int f) { - m_flags = f; - } - - /** \returns the current flags */ - int flags() const { - return m_flags; - } - - void setOrderingMethod(int m) { - m_flags = m; - } - - int orderingMethod() const { - return m_flags; - } - - /** Computes/re-computes the LU factorization */ - void compute(); - void computeRowMajor(); - - /** \returns the lower triangular matrix L */ - //inline const MatrixType& matrixL() const { return m_matrixL; } - - /** \returns the upper triangular matrix U */ - //inline const MatrixType& matrixU() const { return m_matrixU; } - - template<typename BDerived, typename XDerived> - bool solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>* x, - const int transposed = 0) const; - - /** \returns true if the factorization succeeded */ - inline bool succeeded(void) const { - return m_succeeded; - } - -protected: - RealScalar m_precision; - int m_flags; - mutable int m_status; - bool m_succeeded; - MatrixType& m_lu; -}; - -/** Computes / recomputes the in place LU decomposition of the SkylineInplaceLU. - * using the default algorithm. - */ -template<typename MatrixType> -//template<typename _Scalar> -void SkylineInplaceLU<MatrixType>::compute() { - const size_t rows = m_lu.rows(); - const size_t cols = m_lu.cols(); - - eigen_assert(rows == cols && "We do not (yet) support rectangular LU."); - eigen_assert(!m_lu.IsRowMajor && "LU decomposition does not work with rowMajor Storage"); - - for (Index row = 0; row < rows; row++) { - const double pivot = m_lu.coeffDiag(row); - - //Lower matrix Columns update - const Index& col = row; - for (typename MatrixType::InnerLowerIterator lIt(m_lu, col); lIt; ++lIt) { - lIt.valueRef() /= pivot; - } - - //Upper matrix update -> contiguous memory access - typename MatrixType::InnerLowerIterator lIt(m_lu, col); - for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) { - typename MatrixType::InnerUpperIterator uItPivot(m_lu, row); - typename MatrixType::InnerUpperIterator uIt(m_lu, rrow); - const double coef = lIt.value(); - - uItPivot += (rrow - row - 1); - - //update upper part -> contiguous memory access - for (++uItPivot; uIt && uItPivot;) { - uIt.valueRef() -= uItPivot.value() * coef; - - ++uIt; - ++uItPivot; - } - ++lIt; - } - - //Upper matrix update -> non contiguous memory access - typename MatrixType::InnerLowerIterator lIt3(m_lu, col); - for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) { - typename MatrixType::InnerUpperIterator uItPivot(m_lu, row); - const double coef = lIt3.value(); - - //update lower part -> non contiguous memory access - for (Index i = 0; i < rrow - row - 1; i++) { - m_lu.coeffRefLower(rrow, row + i + 1) -= uItPivot.value() * coef; - ++uItPivot; - } - ++lIt3; - } - //update diag -> contiguous - typename MatrixType::InnerLowerIterator lIt2(m_lu, col); - for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) { - - typename MatrixType::InnerUpperIterator uItPivot(m_lu, row); - typename MatrixType::InnerUpperIterator uIt(m_lu, rrow); - const double coef = lIt2.value(); - - uItPivot += (rrow - row - 1); - m_lu.coeffRefDiag(rrow) -= uItPivot.value() * coef; - ++lIt2; - } - } -} - -template<typename MatrixType> -void SkylineInplaceLU<MatrixType>::computeRowMajor() { - const size_t rows = m_lu.rows(); - const size_t cols = m_lu.cols(); - - eigen_assert(rows == cols && "We do not (yet) support rectangular LU."); - eigen_assert(m_lu.IsRowMajor && "You're trying to apply rowMajor decomposition on a ColMajor matrix !"); - - for (Index row = 0; row < rows; row++) { - typename MatrixType::InnerLowerIterator llIt(m_lu, row); - - - for (Index col = llIt.col(); col < row; col++) { - if (m_lu.coeffExistLower(row, col)) { - const double diag = m_lu.coeffDiag(col); - - typename MatrixType::InnerLowerIterator lIt(m_lu, row); - typename MatrixType::InnerUpperIterator uIt(m_lu, col); - - - const Index offset = lIt.col() - uIt.row(); - - - Index stop = offset > 0 ? col - lIt.col() : col - uIt.row(); - - //#define VECTORIZE -#ifdef VECTORIZE - Map<VectorXd > rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop); - Map<VectorXd > colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop); - - - Scalar newCoeff = m_lu.coeffLower(row, col) - rowVal.dot(colVal); -#else - if (offset > 0) //Skip zero value of lIt - uIt += offset; - else //Skip zero values of uIt - lIt += -offset; - Scalar newCoeff = m_lu.coeffLower(row, col); - - for (Index k = 0; k < stop; ++k) { - const Scalar tmp = newCoeff; - newCoeff = tmp - lIt.value() * uIt.value(); - ++lIt; - ++uIt; - } -#endif - - m_lu.coeffRefLower(row, col) = newCoeff / diag; - } - } - - //Upper matrix update - const Index col = row; - typename MatrixType::InnerUpperIterator uuIt(m_lu, col); - for (Index rrow = uuIt.row(); rrow < col; rrow++) { - - typename MatrixType::InnerLowerIterator lIt(m_lu, rrow); - typename MatrixType::InnerUpperIterator uIt(m_lu, col); - const Index offset = lIt.col() - uIt.row(); - - Index stop = offset > 0 ? rrow - lIt.col() : rrow - uIt.row(); - -#ifdef VECTORIZE - Map<VectorXd > rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop); - Map<VectorXd > colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop); - - Scalar newCoeff = m_lu.coeffUpper(rrow, col) - rowVal.dot(colVal); -#else - if (offset > 0) //Skip zero value of lIt - uIt += offset; - else //Skip zero values of uIt - lIt += -offset; - Scalar newCoeff = m_lu.coeffUpper(rrow, col); - for (Index k = 0; k < stop; ++k) { - const Scalar tmp = newCoeff; - newCoeff = tmp - lIt.value() * uIt.value(); - - ++lIt; - ++uIt; - } -#endif - m_lu.coeffRefUpper(rrow, col) = newCoeff; - } - - - //Diag matrix update - typename MatrixType::InnerLowerIterator lIt(m_lu, row); - typename MatrixType::InnerUpperIterator uIt(m_lu, row); - - const Index offset = lIt.col() - uIt.row(); - - - Index stop = offset > 0 ? lIt.size() : uIt.size(); -#ifdef VECTORIZE - Map<VectorXd > rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop); - Map<VectorXd > colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop); - Scalar newCoeff = m_lu.coeffDiag(row) - rowVal.dot(colVal); -#else - if (offset > 0) //Skip zero value of lIt - uIt += offset; - else //Skip zero values of uIt - lIt += -offset; - Scalar newCoeff = m_lu.coeffDiag(row); - for (Index k = 0; k < stop; ++k) { - const Scalar tmp = newCoeff; - newCoeff = tmp - lIt.value() * uIt.value(); - ++lIt; - ++uIt; - } -#endif - m_lu.coeffRefDiag(row) = newCoeff; - } -} - -/** Computes *x = U^-1 L^-1 b - * - * If \a transpose is set to SvTranspose or SvAdjoint, the solution - * of the transposed/adjoint system is computed instead. - * - * Not all backends implement the solution of the transposed or - * adjoint system. - */ -template<typename MatrixType> -template<typename BDerived, typename XDerived> -bool SkylineInplaceLU<MatrixType>::solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>* x, const int transposed) const { - const size_t rows = m_lu.rows(); - const size_t cols = m_lu.cols(); - - - for (Index row = 0; row < rows; row++) { - x->coeffRef(row) = b.coeff(row); - Scalar newVal = x->coeff(row); - typename MatrixType::InnerLowerIterator lIt(m_lu, row); - - Index col = lIt.col(); - while (lIt.col() < row) { - - newVal -= x->coeff(col++) * lIt.value(); - ++lIt; - } - - x->coeffRef(row) = newVal; - } - - - for (Index col = rows - 1; col > 0; col--) { - x->coeffRef(col) = x->coeff(col) / m_lu.coeffDiag(col); - - const Scalar x_col = x->coeff(col); - - typename MatrixType::InnerUpperIterator uIt(m_lu, col); - uIt += uIt.size()-1; - - - while (uIt) { - x->coeffRef(uIt.row()) -= x_col * uIt.value(); - //TODO : introduce --operator - uIt += -1; - } - - - } - x->coeffRef(0) = x->coeff(0) / m_lu.coeffDiag(0); - - return true; -} - -} // end namespace Eigen - -#endif // EIGEN_SKYLINELU_H diff --git a/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h b/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h deleted file mode 100644 index a2a8933..0000000 --- a/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h +++ /dev/null @@ -1,862 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Guillaume Saupin <guillaume.saupin@cea.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SKYLINEMATRIX_H -#define EIGEN_SKYLINEMATRIX_H - -#include "SkylineStorage.h" -#include "SkylineMatrixBase.h" - -namespace Eigen { - -/** \ingroup Skyline_Module - * - * \class SkylineMatrix - * - * \brief The main skyline matrix class - * - * This class implements a skyline matrix using the very uncommon storage - * scheme. - * - * \param _Scalar the scalar type, i.e. the type of the coefficients - * \param _Options Union of bit flags controlling the storage scheme. Currently the only possibility - * is RowMajor. The default is 0 which means column-major. - * - * - */ -namespace internal { -template<typename _Scalar, int _Options> -struct traits<SkylineMatrix<_Scalar, _Options> > { - typedef _Scalar Scalar; - typedef Sparse StorageKind; - - enum { - RowsAtCompileTime = Dynamic, - ColsAtCompileTime = Dynamic, - MaxRowsAtCompileTime = Dynamic, - MaxColsAtCompileTime = Dynamic, - Flags = SkylineBit | _Options, - CoeffReadCost = NumTraits<Scalar>::ReadCost, - }; -}; -} - -template<typename _Scalar, int _Options> -class SkylineMatrix -: public SkylineMatrixBase<SkylineMatrix<_Scalar, _Options> > { -public: - EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(SkylineMatrix) - EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(SkylineMatrix, +=) - EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(SkylineMatrix, -=) - - using Base::IsRowMajor; - -protected: - - typedef SkylineMatrix<Scalar, (Flags&~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0) > TransposedSkylineMatrix; - - Index m_outerSize; - Index m_innerSize; - -public: - Index* m_colStartIndex; - Index* m_rowStartIndex; - SkylineStorage<Scalar> m_data; - -public: - - inline Index rows() const { - return IsRowMajor ? m_outerSize : m_innerSize; - } - - inline Index cols() const { - return IsRowMajor ? m_innerSize : m_outerSize; - } - - inline Index innerSize() const { - return m_innerSize; - } - - inline Index outerSize() const { - return m_outerSize; - } - - inline Index upperNonZeros() const { - return m_data.upperSize(); - } - - inline Index lowerNonZeros() const { - return m_data.lowerSize(); - } - - inline Index upperNonZeros(Index j) const { - return m_colStartIndex[j + 1] - m_colStartIndex[j]; - } - - inline Index lowerNonZeros(Index j) const { - return m_rowStartIndex[j + 1] - m_rowStartIndex[j]; - } - - inline const Scalar* _diagPtr() const { - return &m_data.diag(0); - } - - inline Scalar* _diagPtr() { - return &m_data.diag(0); - } - - inline const Scalar* _upperPtr() const { - return &m_data.upper(0); - } - - inline Scalar* _upperPtr() { - return &m_data.upper(0); - } - - inline const Scalar* _lowerPtr() const { - return &m_data.lower(0); - } - - inline Scalar* _lowerPtr() { - return &m_data.lower(0); - } - - inline const Index* _upperProfilePtr() const { - return &m_data.upperProfile(0); - } - - inline Index* _upperProfilePtr() { - return &m_data.upperProfile(0); - } - - inline const Index* _lowerProfilePtr() const { - return &m_data.lowerProfile(0); - } - - inline Index* _lowerProfilePtr() { - return &m_data.lowerProfile(0); - } - - inline Scalar coeff(Index row, Index col) const { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - eigen_assert(outer < outerSize()); - eigen_assert(inner < innerSize()); - - if (outer == inner) - return this->m_data.diag(outer); - - if (IsRowMajor) { - if (inner > outer) //upper matrix - { - const Index minOuterIndex = inner - m_data.upperProfile(inner); - if (outer >= minOuterIndex) - return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); - else - return Scalar(0); - } - if (inner < outer) //lower matrix - { - const Index minInnerIndex = outer - m_data.lowerProfile(outer); - if (inner >= minInnerIndex) - return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); - else - return Scalar(0); - } - return m_data.upper(m_colStartIndex[inner] + outer - inner); - } else { - if (outer > inner) //upper matrix - { - const Index maxOuterIndex = inner + m_data.upperProfile(inner); - if (outer <= maxOuterIndex) - return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); - else - return Scalar(0); - } - if (outer < inner) //lower matrix - { - const Index maxInnerIndex = outer + m_data.lowerProfile(outer); - - if (inner <= maxInnerIndex) - return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); - else - return Scalar(0); - } - } - } - - inline Scalar& coeffRef(Index row, Index col) { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - eigen_assert(outer < outerSize()); - eigen_assert(inner < innerSize()); - - if (outer == inner) - return this->m_data.diag(outer); - - if (IsRowMajor) { - if (col > row) //upper matrix - { - const Index minOuterIndex = inner - m_data.upperProfile(inner); - eigen_assert(outer >= minOuterIndex && "you try to acces a coeff that do not exist in the storage"); - return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); - } - if (col < row) //lower matrix - { - const Index minInnerIndex = outer - m_data.lowerProfile(outer); - eigen_assert(inner >= minInnerIndex && "you try to acces a coeff that do not exist in the storage"); - return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); - } - } else { - if (outer > inner) //upper matrix - { - const Index maxOuterIndex = inner + m_data.upperProfile(inner); - eigen_assert(outer <= maxOuterIndex && "you try to acces a coeff that do not exist in the storage"); - return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); - } - if (outer < inner) //lower matrix - { - const Index maxInnerIndex = outer + m_data.lowerProfile(outer); - eigen_assert(inner <= maxInnerIndex && "you try to acces a coeff that do not exist in the storage"); - return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); - } - } - } - - inline Scalar coeffDiag(Index idx) const { - eigen_assert(idx < outerSize()); - eigen_assert(idx < innerSize()); - return this->m_data.diag(idx); - } - - inline Scalar coeffLower(Index row, Index col) const { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - eigen_assert(outer < outerSize()); - eigen_assert(inner < innerSize()); - eigen_assert(inner != outer); - - if (IsRowMajor) { - const Index minInnerIndex = outer - m_data.lowerProfile(outer); - if (inner >= minInnerIndex) - return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); - else - return Scalar(0); - - } else { - const Index maxInnerIndex = outer + m_data.lowerProfile(outer); - if (inner <= maxInnerIndex) - return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); - else - return Scalar(0); - } - } - - inline Scalar coeffUpper(Index row, Index col) const { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - eigen_assert(outer < outerSize()); - eigen_assert(inner < innerSize()); - eigen_assert(inner != outer); - - if (IsRowMajor) { - const Index minOuterIndex = inner - m_data.upperProfile(inner); - if (outer >= minOuterIndex) - return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); - else - return Scalar(0); - } else { - const Index maxOuterIndex = inner + m_data.upperProfile(inner); - if (outer <= maxOuterIndex) - return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); - else - return Scalar(0); - } - } - - inline Scalar& coeffRefDiag(Index idx) { - eigen_assert(idx < outerSize()); - eigen_assert(idx < innerSize()); - return this->m_data.diag(idx); - } - - inline Scalar& coeffRefLower(Index row, Index col) { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - eigen_assert(outer < outerSize()); - eigen_assert(inner < innerSize()); - eigen_assert(inner != outer); - - if (IsRowMajor) { - const Index minInnerIndex = outer - m_data.lowerProfile(outer); - eigen_assert(inner >= minInnerIndex && "you try to acces a coeff that do not exist in the storage"); - return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); - } else { - const Index maxInnerIndex = outer + m_data.lowerProfile(outer); - eigen_assert(inner <= maxInnerIndex && "you try to acces a coeff that do not exist in the storage"); - return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); - } - } - - inline bool coeffExistLower(Index row, Index col) { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - eigen_assert(outer < outerSize()); - eigen_assert(inner < innerSize()); - eigen_assert(inner != outer); - - if (IsRowMajor) { - const Index minInnerIndex = outer - m_data.lowerProfile(outer); - return inner >= minInnerIndex; - } else { - const Index maxInnerIndex = outer + m_data.lowerProfile(outer); - return inner <= maxInnerIndex; - } - } - - inline Scalar& coeffRefUpper(Index row, Index col) { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - eigen_assert(outer < outerSize()); - eigen_assert(inner < innerSize()); - eigen_assert(inner != outer); - - if (IsRowMajor) { - const Index minOuterIndex = inner - m_data.upperProfile(inner); - eigen_assert(outer >= minOuterIndex && "you try to acces a coeff that do not exist in the storage"); - return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); - } else { - const Index maxOuterIndex = inner + m_data.upperProfile(inner); - eigen_assert(outer <= maxOuterIndex && "you try to acces a coeff that do not exist in the storage"); - return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); - } - } - - inline bool coeffExistUpper(Index row, Index col) { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - eigen_assert(outer < outerSize()); - eigen_assert(inner < innerSize()); - eigen_assert(inner != outer); - - if (IsRowMajor) { - const Index minOuterIndex = inner - m_data.upperProfile(inner); - return outer >= minOuterIndex; - } else { - const Index maxOuterIndex = inner + m_data.upperProfile(inner); - return outer <= maxOuterIndex; - } - } - - -protected: - -public: - class InnerUpperIterator; - class InnerLowerIterator; - - class OuterUpperIterator; - class OuterLowerIterator; - - /** Removes all non zeros */ - inline void setZero() { - m_data.clear(); - memset(m_colStartIndex, 0, (m_outerSize + 1) * sizeof (Index)); - memset(m_rowStartIndex, 0, (m_outerSize + 1) * sizeof (Index)); - } - - /** \returns the number of non zero coefficients */ - inline Index nonZeros() const { - return m_data.diagSize() + m_data.upperSize() + m_data.lowerSize(); - } - - /** Preallocates \a reserveSize non zeros */ - inline void reserve(Index reserveSize, Index reserveUpperSize, Index reserveLowerSize) { - m_data.reserve(reserveSize, reserveUpperSize, reserveLowerSize); - } - - /** \returns a reference to a novel non zero coefficient with coordinates \a row x \a col. - - * - * \warning This function can be extremely slow if the non zero coefficients - * are not inserted in a coherent order. - * - * After an insertion session, you should call the finalize() function. - */ - EIGEN_DONT_INLINE Scalar & insert(Index row, Index col) { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - eigen_assert(outer < outerSize()); - eigen_assert(inner < innerSize()); - - if (outer == inner) - return m_data.diag(col); - - if (IsRowMajor) { - if (outer < inner) //upper matrix - { - Index minOuterIndex = 0; - minOuterIndex = inner - m_data.upperProfile(inner); - - if (outer < minOuterIndex) //The value does not yet exist - { - const Index previousProfile = m_data.upperProfile(inner); - - m_data.upperProfile(inner) = inner - outer; - - - const Index bandIncrement = m_data.upperProfile(inner) - previousProfile; - //shift data stored after this new one - const Index stop = m_colStartIndex[cols()]; - const Index start = m_colStartIndex[inner]; - - - for (Index innerIdx = stop; innerIdx >= start; innerIdx--) { - m_data.upper(innerIdx + bandIncrement) = m_data.upper(innerIdx); - } - - for (Index innerIdx = cols(); innerIdx > inner; innerIdx--) { - m_colStartIndex[innerIdx] += bandIncrement; - } - - //zeros new data - memset(this->_upperPtr() + start, 0, (bandIncrement - 1) * sizeof (Scalar)); - - return m_data.upper(m_colStartIndex[inner]); - } else { - return m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); - } - } - - if (outer > inner) //lower matrix - { - const Index minInnerIndex = outer - m_data.lowerProfile(outer); - if (inner < minInnerIndex) //The value does not yet exist - { - const Index previousProfile = m_data.lowerProfile(outer); - m_data.lowerProfile(outer) = outer - inner; - - const Index bandIncrement = m_data.lowerProfile(outer) - previousProfile; - //shift data stored after this new one - const Index stop = m_rowStartIndex[rows()]; - const Index start = m_rowStartIndex[outer]; - - - for (Index innerIdx = stop; innerIdx >= start; innerIdx--) { - m_data.lower(innerIdx + bandIncrement) = m_data.lower(innerIdx); - } - - for (Index innerIdx = rows(); innerIdx > outer; innerIdx--) { - m_rowStartIndex[innerIdx] += bandIncrement; - } - - //zeros new data - memset(this->_lowerPtr() + start, 0, (bandIncrement - 1) * sizeof (Scalar)); - return m_data.lower(m_rowStartIndex[outer]); - } else { - return m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); - } - } - } else { - if (outer > inner) //upper matrix - { - const Index maxOuterIndex = inner + m_data.upperProfile(inner); - if (outer > maxOuterIndex) //The value does not yet exist - { - const Index previousProfile = m_data.upperProfile(inner); - m_data.upperProfile(inner) = outer - inner; - - const Index bandIncrement = m_data.upperProfile(inner) - previousProfile; - //shift data stored after this new one - const Index stop = m_rowStartIndex[rows()]; - const Index start = m_rowStartIndex[inner + 1]; - - for (Index innerIdx = stop; innerIdx >= start; innerIdx--) { - m_data.upper(innerIdx + bandIncrement) = m_data.upper(innerIdx); - } - - for (Index innerIdx = inner + 1; innerIdx < outerSize() + 1; innerIdx++) { - m_rowStartIndex[innerIdx] += bandIncrement; - } - memset(this->_upperPtr() + m_rowStartIndex[inner] + previousProfile + 1, 0, (bandIncrement - 1) * sizeof (Scalar)); - return m_data.upper(m_rowStartIndex[inner] + m_data.upperProfile(inner)); - } else { - return m_data.upper(m_rowStartIndex[inner] + (outer - inner)); - } - } - - if (outer < inner) //lower matrix - { - const Index maxInnerIndex = outer + m_data.lowerProfile(outer); - if (inner > maxInnerIndex) //The value does not yet exist - { - const Index previousProfile = m_data.lowerProfile(outer); - m_data.lowerProfile(outer) = inner - outer; - - const Index bandIncrement = m_data.lowerProfile(outer) - previousProfile; - //shift data stored after this new one - const Index stop = m_colStartIndex[cols()]; - const Index start = m_colStartIndex[outer + 1]; - - for (Index innerIdx = stop; innerIdx >= start; innerIdx--) { - m_data.lower(innerIdx + bandIncrement) = m_data.lower(innerIdx); - } - - for (Index innerIdx = outer + 1; innerIdx < outerSize() + 1; innerIdx++) { - m_colStartIndex[innerIdx] += bandIncrement; - } - memset(this->_lowerPtr() + m_colStartIndex[outer] + previousProfile + 1, 0, (bandIncrement - 1) * sizeof (Scalar)); - return m_data.lower(m_colStartIndex[outer] + m_data.lowerProfile(outer)); - } else { - return m_data.lower(m_colStartIndex[outer] + (inner - outer)); - } - } - } - } - - /** Must be called after inserting a set of non zero entries. - */ - inline void finalize() { - if (IsRowMajor) { - if (rows() > cols()) - m_data.resize(cols(), cols(), rows(), m_colStartIndex[cols()] + 1, m_rowStartIndex[rows()] + 1); - else - m_data.resize(rows(), cols(), rows(), m_colStartIndex[cols()] + 1, m_rowStartIndex[rows()] + 1); - - // eigen_assert(rows() == cols() && "memory reorganisatrion only works with suare matrix"); - // - // Scalar* newArray = new Scalar[m_colStartIndex[cols()] + 1 + m_rowStartIndex[rows()] + 1]; - // Index dataIdx = 0; - // for (Index row = 0; row < rows(); row++) { - // - // const Index nbLowerElts = m_rowStartIndex[row + 1] - m_rowStartIndex[row]; - // // std::cout << "nbLowerElts" << nbLowerElts << std::endl; - // memcpy(newArray + dataIdx, m_data.m_lower + m_rowStartIndex[row], nbLowerElts * sizeof (Scalar)); - // m_rowStartIndex[row] = dataIdx; - // dataIdx += nbLowerElts; - // - // const Index nbUpperElts = m_colStartIndex[row + 1] - m_colStartIndex[row]; - // memcpy(newArray + dataIdx, m_data.m_upper + m_colStartIndex[row], nbUpperElts * sizeof (Scalar)); - // m_colStartIndex[row] = dataIdx; - // dataIdx += nbUpperElts; - // - // - // } - // //todo : don't access m_data profile directly : add an accessor from SkylineMatrix - // m_rowStartIndex[rows()] = m_rowStartIndex[rows()-1] + m_data.lowerProfile(rows()-1); - // m_colStartIndex[cols()] = m_colStartIndex[cols()-1] + m_data.upperProfile(cols()-1); - // - // delete[] m_data.m_lower; - // delete[] m_data.m_upper; - // - // m_data.m_lower = newArray; - // m_data.m_upper = newArray; - } else { - if (rows() > cols()) - m_data.resize(cols(), rows(), cols(), m_rowStartIndex[cols()] + 1, m_colStartIndex[cols()] + 1); - else - m_data.resize(rows(), rows(), cols(), m_rowStartIndex[rows()] + 1, m_colStartIndex[rows()] + 1); - } - } - - inline void squeeze() { - finalize(); - m_data.squeeze(); - } - - void prune(Scalar reference, RealScalar epsilon = dummy_precision<RealScalar > ()) { - //TODO - } - - /** Resizes the matrix to a \a rows x \a cols matrix and initializes it to zero - * \sa resizeNonZeros(Index), reserve(), setZero() - */ - void resize(size_t rows, size_t cols) { - const Index diagSize = rows > cols ? cols : rows; - m_innerSize = IsRowMajor ? cols : rows; - - eigen_assert(rows == cols && "Skyline matrix must be square matrix"); - - if (diagSize % 2) { // diagSize is odd - const Index k = (diagSize - 1) / 2; - - m_data.resize(diagSize, IsRowMajor ? cols : rows, IsRowMajor ? rows : cols, - 2 * k * k + k + 1, - 2 * k * k + k + 1); - - } else // diagSize is even - { - const Index k = diagSize / 2; - m_data.resize(diagSize, IsRowMajor ? cols : rows, IsRowMajor ? rows : cols, - 2 * k * k - k + 1, - 2 * k * k - k + 1); - } - - if (m_colStartIndex && m_rowStartIndex) { - delete[] m_colStartIndex; - delete[] m_rowStartIndex; - } - m_colStartIndex = new Index [cols + 1]; - m_rowStartIndex = new Index [rows + 1]; - m_outerSize = diagSize; - - m_data.reset(); - m_data.clear(); - - m_outerSize = diagSize; - memset(m_colStartIndex, 0, (cols + 1) * sizeof (Index)); - memset(m_rowStartIndex, 0, (rows + 1) * sizeof (Index)); - } - - void resizeNonZeros(Index size) { - m_data.resize(size); - } - - inline SkylineMatrix() - : m_outerSize(-1), m_innerSize(0), m_colStartIndex(0), m_rowStartIndex(0) { - resize(0, 0); - } - - inline SkylineMatrix(size_t rows, size_t cols) - : m_outerSize(0), m_innerSize(0), m_colStartIndex(0), m_rowStartIndex(0) { - resize(rows, cols); - } - - template<typename OtherDerived> - inline SkylineMatrix(const SkylineMatrixBase<OtherDerived>& other) - : m_outerSize(0), m_innerSize(0), m_colStartIndex(0), m_rowStartIndex(0) { - *this = other.derived(); - } - - inline SkylineMatrix(const SkylineMatrix & other) - : Base(), m_outerSize(0), m_innerSize(0), m_colStartIndex(0), m_rowStartIndex(0) { - *this = other.derived(); - } - - inline void swap(SkylineMatrix & other) { - //EIGEN_DBG_SKYLINE(std::cout << "SkylineMatrix:: swap\n"); - std::swap(m_colStartIndex, other.m_colStartIndex); - std::swap(m_rowStartIndex, other.m_rowStartIndex); - std::swap(m_innerSize, other.m_innerSize); - std::swap(m_outerSize, other.m_outerSize); - m_data.swap(other.m_data); - } - - inline SkylineMatrix & operator=(const SkylineMatrix & other) { - std::cout << "SkylineMatrix& operator=(const SkylineMatrix& other)\n"; - if (other.isRValue()) { - swap(other.const_cast_derived()); - } else { - resize(other.rows(), other.cols()); - memcpy(m_colStartIndex, other.m_colStartIndex, (m_outerSize + 1) * sizeof (Index)); - memcpy(m_rowStartIndex, other.m_rowStartIndex, (m_outerSize + 1) * sizeof (Index)); - m_data = other.m_data; - } - return *this; - } - - template<typename OtherDerived> - inline SkylineMatrix & operator=(const SkylineMatrixBase<OtherDerived>& other) { - const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); - if (needToTranspose) { - // TODO - // return *this; - } else { - // there is no special optimization - return SkylineMatrixBase<SkylineMatrix>::operator=(other.derived()); - } - } - - friend std::ostream & operator <<(std::ostream & s, const SkylineMatrix & m) { - - EIGEN_DBG_SKYLINE( - std::cout << "upper elements : " << std::endl; - for (Index i = 0; i < m.m_data.upperSize(); i++) - std::cout << m.m_data.upper(i) << "\t"; - std::cout << std::endl; - std::cout << "upper profile : " << std::endl; - for (Index i = 0; i < m.m_data.upperProfileSize(); i++) - std::cout << m.m_data.upperProfile(i) << "\t"; - std::cout << std::endl; - std::cout << "lower startIdx : " << std::endl; - for (Index i = 0; i < m.m_data.upperProfileSize(); i++) - std::cout << (IsRowMajor ? m.m_colStartIndex[i] : m.m_rowStartIndex[i]) << "\t"; - std::cout << std::endl; - - - std::cout << "lower elements : " << std::endl; - for (Index i = 0; i < m.m_data.lowerSize(); i++) - std::cout << m.m_data.lower(i) << "\t"; - std::cout << std::endl; - std::cout << "lower profile : " << std::endl; - for (Index i = 0; i < m.m_data.lowerProfileSize(); i++) - std::cout << m.m_data.lowerProfile(i) << "\t"; - std::cout << std::endl; - std::cout << "lower startIdx : " << std::endl; - for (Index i = 0; i < m.m_data.lowerProfileSize(); i++) - std::cout << (IsRowMajor ? m.m_rowStartIndex[i] : m.m_colStartIndex[i]) << "\t"; - std::cout << std::endl; - ); - for (Index rowIdx = 0; rowIdx < m.rows(); rowIdx++) { - for (Index colIdx = 0; colIdx < m.cols(); colIdx++) { - s << m.coeff(rowIdx, colIdx) << "\t"; - } - s << std::endl; - } - return s; - } - - /** Destructor */ - inline ~SkylineMatrix() { - delete[] m_colStartIndex; - delete[] m_rowStartIndex; - } - - /** Overloaded for performance */ - Scalar sum() const; -}; - -template<typename Scalar, int _Options> -class SkylineMatrix<Scalar, _Options>::InnerUpperIterator { -public: - - InnerUpperIterator(const SkylineMatrix& mat, Index outer) - : m_matrix(mat), m_outer(outer), - m_id(_Options == RowMajor ? mat.m_colStartIndex[outer] : mat.m_rowStartIndex[outer] + 1), - m_start(m_id), - m_end(_Options == RowMajor ? mat.m_colStartIndex[outer + 1] : mat.m_rowStartIndex[outer + 1] + 1) { - } - - inline InnerUpperIterator & operator++() { - m_id++; - return *this; - } - - inline InnerUpperIterator & operator+=(Index shift) { - m_id += shift; - return *this; - } - - inline Scalar value() const { - return m_matrix.m_data.upper(m_id); - } - - inline Scalar* valuePtr() { - return const_cast<Scalar*> (&(m_matrix.m_data.upper(m_id))); - } - - inline Scalar& valueRef() { - return const_cast<Scalar&> (m_matrix.m_data.upper(m_id)); - } - - inline Index index() const { - return IsRowMajor ? m_outer - m_matrix.m_data.upperProfile(m_outer) + (m_id - m_start) : - m_outer + (m_id - m_start) + 1; - } - - inline Index row() const { - return IsRowMajor ? index() : m_outer; - } - - inline Index col() const { - return IsRowMajor ? m_outer : index(); - } - - inline size_t size() const { - return m_matrix.m_data.upperProfile(m_outer); - } - - inline operator bool() const { - return (m_id < m_end) && (m_id >= m_start); - } - -protected: - const SkylineMatrix& m_matrix; - const Index m_outer; - Index m_id; - const Index m_start; - const Index m_end; -}; - -template<typename Scalar, int _Options> -class SkylineMatrix<Scalar, _Options>::InnerLowerIterator { -public: - - InnerLowerIterator(const SkylineMatrix& mat, Index outer) - : m_matrix(mat), - m_outer(outer), - m_id(_Options == RowMajor ? mat.m_rowStartIndex[outer] : mat.m_colStartIndex[outer] + 1), - m_start(m_id), - m_end(_Options == RowMajor ? mat.m_rowStartIndex[outer + 1] : mat.m_colStartIndex[outer + 1] + 1) { - } - - inline InnerLowerIterator & operator++() { - m_id++; - return *this; - } - - inline InnerLowerIterator & operator+=(Index shift) { - m_id += shift; - return *this; - } - - inline Scalar value() const { - return m_matrix.m_data.lower(m_id); - } - - inline Scalar* valuePtr() { - return const_cast<Scalar*> (&(m_matrix.m_data.lower(m_id))); - } - - inline Scalar& valueRef() { - return const_cast<Scalar&> (m_matrix.m_data.lower(m_id)); - } - - inline Index index() const { - return IsRowMajor ? m_outer - m_matrix.m_data.lowerProfile(m_outer) + (m_id - m_start) : - m_outer + (m_id - m_start) + 1; - ; - } - - inline Index row() const { - return IsRowMajor ? m_outer : index(); - } - - inline Index col() const { - return IsRowMajor ? index() : m_outer; - } - - inline size_t size() const { - return m_matrix.m_data.lowerProfile(m_outer); - } - - inline operator bool() const { - return (m_id < m_end) && (m_id >= m_start); - } - -protected: - const SkylineMatrix& m_matrix; - const Index m_outer; - Index m_id; - const Index m_start; - const Index m_end; -}; - -} // end namespace Eigen - -#endif // EIGEN_SkylineMatrix_H diff --git a/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h b/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h deleted file mode 100644 index b3a2372..0000000 --- a/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h +++ /dev/null @@ -1,212 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Guillaume Saupin <guillaume.saupin@cea.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SKYLINEMATRIXBASE_H -#define EIGEN_SKYLINEMATRIXBASE_H - -#include "SkylineUtil.h" - -namespace Eigen { - -/** \ingroup Skyline_Module - * - * \class SkylineMatrixBase - * - * \brief Base class of any skyline matrices or skyline expressions - * - * \param Derived - * - */ -template<typename Derived> class SkylineMatrixBase : public EigenBase<Derived> { -public: - - typedef typename internal::traits<Derived>::Scalar Scalar; - typedef typename internal::traits<Derived>::StorageKind StorageKind; - typedef typename internal::index<StorageKind>::type Index; - - enum { - RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime, - /**< The number of rows at compile-time. This is just a copy of the value provided - * by the \a Derived type. If a value is not known at compile-time, - * it is set to the \a Dynamic constant. - * \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */ - - ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime, - /**< The number of columns at compile-time. This is just a copy of the value provided - * by the \a Derived type. If a value is not known at compile-time, - * it is set to the \a Dynamic constant. - * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */ - - - SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime, - internal::traits<Derived>::ColsAtCompileTime>::ret), - /**< This is equal to the number of coefficients, i.e. the number of - * rows times the number of columns, or to \a Dynamic if this is not - * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ - - MaxRowsAtCompileTime = RowsAtCompileTime, - MaxColsAtCompileTime = ColsAtCompileTime, - - MaxSizeAtCompileTime = (internal::size_at_compile_time<MaxRowsAtCompileTime, - MaxColsAtCompileTime>::ret), - - IsVectorAtCompileTime = RowsAtCompileTime == 1 || ColsAtCompileTime == 1, - /**< This is set to true if either the number of rows or the number of - * columns is known at compile-time to be equal to 1. Indeed, in that case, - * we are dealing with a column-vector (if there is only one column) or with - * a row-vector (if there is only one row). */ - - Flags = internal::traits<Derived>::Flags, - /**< This stores expression \ref flags flags which may or may not be inherited by new expressions - * constructed from this one. See the \ref flags "list of flags". - */ - - CoeffReadCost = internal::traits<Derived>::CoeffReadCost, - /**< This is a rough measure of how expensive it is to read one coefficient from - * this expression. - */ - - IsRowMajor = Flags & RowMajorBit ? 1 : 0 - }; - -#ifndef EIGEN_PARSED_BY_DOXYGEN - /** This is the "real scalar" type; if the \a Scalar type is already real numbers - * (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If - * \a Scalar is \a std::complex<T> then RealScalar is \a T. - * - * \sa class NumTraits - */ - typedef typename NumTraits<Scalar>::Real RealScalar; - - /** type of the equivalent square matrix */ - typedef Matrix<Scalar, EIGEN_SIZE_MAX(RowsAtCompileTime, ColsAtCompileTime), - EIGEN_SIZE_MAX(RowsAtCompileTime, ColsAtCompileTime) > SquareMatrixType; - - inline const Derived& derived() const { - return *static_cast<const Derived*> (this); - } - - inline Derived& derived() { - return *static_cast<Derived*> (this); - } - - inline Derived& const_cast_derived() const { - return *static_cast<Derived*> (const_cast<SkylineMatrixBase*> (this)); - } -#endif // not EIGEN_PARSED_BY_DOXYGEN - - /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ - inline Index rows() const { - return derived().rows(); - } - - /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/ - inline Index cols() const { - return derived().cols(); - } - - /** \returns the number of coefficients, which is \a rows()*cols(). - * \sa rows(), cols(), SizeAtCompileTime. */ - inline Index size() const { - return rows() * cols(); - } - - /** \returns the number of nonzero coefficients which is in practice the number - * of stored coefficients. */ - inline Index nonZeros() const { - return derived().nonZeros(); - } - - /** \returns the size of the storage major dimension, - * i.e., the number of columns for a columns major matrix, and the number of rows otherwise */ - Index outerSize() const { - return (int(Flags) & RowMajorBit) ? this->rows() : this->cols(); - } - - /** \returns the size of the inner dimension according to the storage order, - * i.e., the number of rows for a columns major matrix, and the number of cols otherwise */ - Index innerSize() const { - return (int(Flags) & RowMajorBit) ? this->cols() : this->rows(); - } - - bool isRValue() const { - return m_isRValue; - } - - Derived& markAsRValue() { - m_isRValue = true; - return derived(); - } - - SkylineMatrixBase() : m_isRValue(false) { - /* TODO check flags */ - } - - inline Derived & operator=(const Derived& other) { - this->operator=<Derived > (other); - return derived(); - } - - template<typename OtherDerived> - inline void assignGeneric(const OtherDerived& other) { - derived().resize(other.rows(), other.cols()); - for (Index row = 0; row < rows(); row++) - for (Index col = 0; col < cols(); col++) { - if (other.coeff(row, col) != Scalar(0)) - derived().insert(row, col) = other.coeff(row, col); - } - derived().finalize(); - } - - template<typename OtherDerived> - inline Derived & operator=(const SkylineMatrixBase<OtherDerived>& other) { - //TODO - } - - template<typename Lhs, typename Rhs> - inline Derived & operator=(const SkylineProduct<Lhs, Rhs, SkylineTimeSkylineProduct>& product); - - friend std::ostream & operator <<(std::ostream & s, const SkylineMatrixBase& m) { - s << m.derived(); - return s; - } - - template<typename OtherDerived> - const typename SkylineProductReturnType<Derived, OtherDerived>::Type - operator*(const MatrixBase<OtherDerived> &other) const; - - /** \internal use operator= */ - template<typename DenseDerived> - void evalTo(MatrixBase<DenseDerived>& dst) const { - dst.setZero(); - for (Index i = 0; i < rows(); i++) - for (Index j = 0; j < rows(); j++) - dst(i, j) = derived().coeff(i, j); - } - - Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime> toDense() const { - return derived(); - } - - /** \returns the matrix or vector obtained by evaluating this expression. - * - * Notice that in the case of a plain matrix or vector (not an expression) this function just returns - * a const reference, in order to avoid a useless copy. - */ - EIGEN_STRONG_INLINE const typename internal::eval<Derived, IsSkyline>::type eval() const { - return typename internal::eval<Derived>::type(derived()); - } - -protected: - bool m_isRValue; -}; - -} // end namespace Eigen - -#endif // EIGEN_SkylineMatrixBase_H diff --git a/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h b/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h deleted file mode 100644 index d9eb814..0000000 --- a/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h +++ /dev/null @@ -1,295 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Guillaume Saupin <guillaume.saupin@cea.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SKYLINEPRODUCT_H -#define EIGEN_SKYLINEPRODUCT_H - -namespace Eigen { - -template<typename Lhs, typename Rhs, int ProductMode> -struct SkylineProductReturnType { - typedef const typename internal::nested_eval<Lhs, Rhs::RowsAtCompileTime>::type LhsNested; - typedef const typename internal::nested_eval<Rhs, Lhs::RowsAtCompileTime>::type RhsNested; - - typedef SkylineProduct<LhsNested, RhsNested, ProductMode> Type; -}; - -template<typename LhsNested, typename RhsNested, int ProductMode> -struct internal::traits<SkylineProduct<LhsNested, RhsNested, ProductMode> > { - // clean the nested types: - typedef typename internal::remove_all<LhsNested>::type _LhsNested; - typedef typename internal::remove_all<RhsNested>::type _RhsNested; - typedef typename _LhsNested::Scalar Scalar; - - enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - - RowsAtCompileTime = _LhsNested::RowsAtCompileTime, - ColsAtCompileTime = _RhsNested::ColsAtCompileTime, - InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), - - MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, - MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, - - EvalToRowMajor = (RhsFlags & LhsFlags & RowMajorBit), - ResultIsSkyline = ProductMode == SkylineTimeSkylineProduct, - - RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit) | (ResultIsSkyline ? 0 : SkylineBit)), - - Flags = (int(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) - | EvalBeforeAssigningBit - | EvalBeforeNestingBit, - - CoeffReadCost = HugeCost - }; - - typedef typename internal::conditional<ResultIsSkyline, - SkylineMatrixBase<SkylineProduct<LhsNested, RhsNested, ProductMode> >, - MatrixBase<SkylineProduct<LhsNested, RhsNested, ProductMode> > >::type Base; -}; - -namespace internal { -template<typename LhsNested, typename RhsNested, int ProductMode> -class SkylineProduct : no_assignment_operator, -public traits<SkylineProduct<LhsNested, RhsNested, ProductMode> >::Base { -public: - - EIGEN_GENERIC_PUBLIC_INTERFACE(SkylineProduct) - -private: - - typedef typename traits<SkylineProduct>::_LhsNested _LhsNested; - typedef typename traits<SkylineProduct>::_RhsNested _RhsNested; - -public: - - template<typename Lhs, typename Rhs> - EIGEN_STRONG_INLINE SkylineProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs) { - eigen_assert(lhs.cols() == rhs.rows()); - - enum { - ProductIsValid = _LhsNested::ColsAtCompileTime == Dynamic - || _RhsNested::RowsAtCompileTime == Dynamic - || int(_LhsNested::ColsAtCompileTime) == int(_RhsNested::RowsAtCompileTime), - AreVectors = _LhsNested::IsVectorAtCompileTime && _RhsNested::IsVectorAtCompileTime, - SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(_LhsNested, _RhsNested) - }; - // note to the lost user: - // * for a dot product use: v1.dot(v2) - // * for a coeff-wise product use: v1.cwise()*v2 - EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), - INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) - EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), - INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) - EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) - } - - EIGEN_STRONG_INLINE Index rows() const { - return m_lhs.rows(); - } - - EIGEN_STRONG_INLINE Index cols() const { - return m_rhs.cols(); - } - - EIGEN_STRONG_INLINE const _LhsNested& lhs() const { - return m_lhs; - } - - EIGEN_STRONG_INLINE const _RhsNested& rhs() const { - return m_rhs; - } - -protected: - LhsNested m_lhs; - RhsNested m_rhs; -}; - -// dense = skyline * dense -// Note that here we force no inlining and separate the setZero() because GCC messes up otherwise - -template<typename Lhs, typename Rhs, typename Dest> -EIGEN_DONT_INLINE void skyline_row_major_time_dense_product(const Lhs& lhs, const Rhs& rhs, Dest& dst) { - typedef typename remove_all<Lhs>::type _Lhs; - typedef typename remove_all<Rhs>::type _Rhs; - typedef typename traits<Lhs>::Scalar Scalar; - - enum { - LhsIsRowMajor = (_Lhs::Flags & RowMajorBit) == RowMajorBit, - LhsIsSelfAdjoint = (_Lhs::Flags & SelfAdjointBit) == SelfAdjointBit, - ProcessFirstHalf = LhsIsSelfAdjoint - && (((_Lhs::Flags & (UpperTriangularBit | LowerTriangularBit)) == 0) - || ((_Lhs::Flags & UpperTriangularBit) && !LhsIsRowMajor) - || ((_Lhs::Flags & LowerTriangularBit) && LhsIsRowMajor)), - ProcessSecondHalf = LhsIsSelfAdjoint && (!ProcessFirstHalf) - }; - - //Use matrix diagonal part <- Improvement : use inner iterator on dense matrix. - for (Index col = 0; col < rhs.cols(); col++) { - for (Index row = 0; row < lhs.rows(); row++) { - dst(row, col) = lhs.coeffDiag(row) * rhs(row, col); - } - } - //Use matrix lower triangular part - for (Index row = 0; row < lhs.rows(); row++) { - typename _Lhs::InnerLowerIterator lIt(lhs, row); - const Index stop = lIt.col() + lIt.size(); - for (Index col = 0; col < rhs.cols(); col++) { - - Index k = lIt.col(); - Scalar tmp = 0; - while (k < stop) { - tmp += - lIt.value() * - rhs(k++, col); - ++lIt; - } - dst(row, col) += tmp; - lIt += -lIt.size(); - } - - } - - //Use matrix upper triangular part - for (Index lhscol = 0; lhscol < lhs.cols(); lhscol++) { - typename _Lhs::InnerUpperIterator uIt(lhs, lhscol); - const Index stop = uIt.size() + uIt.row(); - for (Index rhscol = 0; rhscol < rhs.cols(); rhscol++) { - - - const Scalar rhsCoeff = rhs.coeff(lhscol, rhscol); - Index k = uIt.row(); - while (k < stop) { - dst(k++, rhscol) += - uIt.value() * - rhsCoeff; - ++uIt; - } - uIt += -uIt.size(); - } - } - -} - -template<typename Lhs, typename Rhs, typename Dest> -EIGEN_DONT_INLINE void skyline_col_major_time_dense_product(const Lhs& lhs, const Rhs& rhs, Dest& dst) { - typedef typename remove_all<Lhs>::type _Lhs; - typedef typename remove_all<Rhs>::type _Rhs; - typedef typename traits<Lhs>::Scalar Scalar; - - enum { - LhsIsRowMajor = (_Lhs::Flags & RowMajorBit) == RowMajorBit, - LhsIsSelfAdjoint = (_Lhs::Flags & SelfAdjointBit) == SelfAdjointBit, - ProcessFirstHalf = LhsIsSelfAdjoint - && (((_Lhs::Flags & (UpperTriangularBit | LowerTriangularBit)) == 0) - || ((_Lhs::Flags & UpperTriangularBit) && !LhsIsRowMajor) - || ((_Lhs::Flags & LowerTriangularBit) && LhsIsRowMajor)), - ProcessSecondHalf = LhsIsSelfAdjoint && (!ProcessFirstHalf) - }; - - //Use matrix diagonal part <- Improvement : use inner iterator on dense matrix. - for (Index col = 0; col < rhs.cols(); col++) { - for (Index row = 0; row < lhs.rows(); row++) { - dst(row, col) = lhs.coeffDiag(row) * rhs(row, col); - } - } - - //Use matrix upper triangular part - for (Index row = 0; row < lhs.rows(); row++) { - typename _Lhs::InnerUpperIterator uIt(lhs, row); - const Index stop = uIt.col() + uIt.size(); - for (Index col = 0; col < rhs.cols(); col++) { - - Index k = uIt.col(); - Scalar tmp = 0; - while (k < stop) { - tmp += - uIt.value() * - rhs(k++, col); - ++uIt; - } - - - dst(row, col) += tmp; - uIt += -uIt.size(); - } - } - - //Use matrix lower triangular part - for (Index lhscol = 0; lhscol < lhs.cols(); lhscol++) { - typename _Lhs::InnerLowerIterator lIt(lhs, lhscol); - const Index stop = lIt.size() + lIt.row(); - for (Index rhscol = 0; rhscol < rhs.cols(); rhscol++) { - - const Scalar rhsCoeff = rhs.coeff(lhscol, rhscol); - Index k = lIt.row(); - while (k < stop) { - dst(k++, rhscol) += - lIt.value() * - rhsCoeff; - ++lIt; - } - lIt += -lIt.size(); - } - } - -} - -template<typename Lhs, typename Rhs, typename ResultType, - int LhsStorageOrder = traits<Lhs>::Flags&RowMajorBit> - struct skyline_product_selector; - -template<typename Lhs, typename Rhs, typename ResultType> -struct skyline_product_selector<Lhs, Rhs, ResultType, RowMajor> { - typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar; - - static void run(const Lhs& lhs, const Rhs& rhs, ResultType & res) { - skyline_row_major_time_dense_product<Lhs, Rhs, ResultType > (lhs, rhs, res); - } -}; - -template<typename Lhs, typename Rhs, typename ResultType> -struct skyline_product_selector<Lhs, Rhs, ResultType, ColMajor> { - typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar; - - static void run(const Lhs& lhs, const Rhs& rhs, ResultType & res) { - skyline_col_major_time_dense_product<Lhs, Rhs, ResultType > (lhs, rhs, res); - } -}; - -} // end namespace internal - -// template<typename Derived> -// template<typename Lhs, typename Rhs > -// Derived & MatrixBase<Derived>::lazyAssign(const SkylineProduct<Lhs, Rhs, SkylineTimeDenseProduct>& product) { -// typedef typename internal::remove_all<Lhs>::type _Lhs; -// internal::skyline_product_selector<typename internal::remove_all<Lhs>::type, -// typename internal::remove_all<Rhs>::type, -// Derived>::run(product.lhs(), product.rhs(), derived()); -// -// return derived(); -// } - -// skyline * dense - -template<typename Derived> -template<typename OtherDerived > -EIGEN_STRONG_INLINE const typename SkylineProductReturnType<Derived, OtherDerived>::Type -SkylineMatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const { - - return typename SkylineProductReturnType<Derived, OtherDerived>::Type(derived(), other.derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_SKYLINEPRODUCT_H diff --git a/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h b/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h deleted file mode 100644 index 378a8de..0000000 --- a/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h +++ /dev/null @@ -1,259 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Guillaume Saupin <guillaume.saupin@cea.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SKYLINE_STORAGE_H -#define EIGEN_SKYLINE_STORAGE_H - -namespace Eigen { - -/** Stores a skyline set of values in three structures : - * The diagonal elements - * The upper elements - * The lower elements - * - */ -template<typename Scalar> -class SkylineStorage { - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef SparseIndex Index; -public: - - SkylineStorage() - : m_diag(0), - m_lower(0), - m_upper(0), - m_lowerProfile(0), - m_upperProfile(0), - m_diagSize(0), - m_upperSize(0), - m_lowerSize(0), - m_upperProfileSize(0), - m_lowerProfileSize(0), - m_allocatedSize(0) { - } - - SkylineStorage(const SkylineStorage& other) - : m_diag(0), - m_lower(0), - m_upper(0), - m_lowerProfile(0), - m_upperProfile(0), - m_diagSize(0), - m_upperSize(0), - m_lowerSize(0), - m_upperProfileSize(0), - m_lowerProfileSize(0), - m_allocatedSize(0) { - *this = other; - } - - SkylineStorage & operator=(const SkylineStorage& other) { - resize(other.diagSize(), other.m_upperProfileSize, other.m_lowerProfileSize, other.upperSize(), other.lowerSize()); - memcpy(m_diag, other.m_diag, m_diagSize * sizeof (Scalar)); - memcpy(m_upper, other.m_upper, other.upperSize() * sizeof (Scalar)); - memcpy(m_lower, other.m_lower, other.lowerSize() * sizeof (Scalar)); - memcpy(m_upperProfile, other.m_upperProfile, m_upperProfileSize * sizeof (Index)); - memcpy(m_lowerProfile, other.m_lowerProfile, m_lowerProfileSize * sizeof (Index)); - return *this; - } - - void swap(SkylineStorage& other) { - std::swap(m_diag, other.m_diag); - std::swap(m_upper, other.m_upper); - std::swap(m_lower, other.m_lower); - std::swap(m_upperProfile, other.m_upperProfile); - std::swap(m_lowerProfile, other.m_lowerProfile); - std::swap(m_diagSize, other.m_diagSize); - std::swap(m_upperSize, other.m_upperSize); - std::swap(m_lowerSize, other.m_lowerSize); - std::swap(m_allocatedSize, other.m_allocatedSize); - } - - ~SkylineStorage() { - delete[] m_diag; - delete[] m_upper; - if (m_upper != m_lower) - delete[] m_lower; - delete[] m_upperProfile; - delete[] m_lowerProfile; - } - - void reserve(Index size, Index upperProfileSize, Index lowerProfileSize, Index upperSize, Index lowerSize) { - Index newAllocatedSize = size + upperSize + lowerSize; - if (newAllocatedSize > m_allocatedSize) - reallocate(size, upperProfileSize, lowerProfileSize, upperSize, lowerSize); - } - - void squeeze() { - if (m_allocatedSize > m_diagSize + m_upperSize + m_lowerSize) - reallocate(m_diagSize, m_upperProfileSize, m_lowerProfileSize, m_upperSize, m_lowerSize); - } - - void resize(Index diagSize, Index upperProfileSize, Index lowerProfileSize, Index upperSize, Index lowerSize, float reserveSizeFactor = 0) { - if (m_allocatedSize < diagSize + upperSize + lowerSize) - reallocate(diagSize, upperProfileSize, lowerProfileSize, upperSize + Index(reserveSizeFactor * upperSize), lowerSize + Index(reserveSizeFactor * lowerSize)); - m_diagSize = diagSize; - m_upperSize = upperSize; - m_lowerSize = lowerSize; - m_upperProfileSize = upperProfileSize; - m_lowerProfileSize = lowerProfileSize; - } - - inline Index diagSize() const { - return m_diagSize; - } - - inline Index upperSize() const { - return m_upperSize; - } - - inline Index lowerSize() const { - return m_lowerSize; - } - - inline Index upperProfileSize() const { - return m_upperProfileSize; - } - - inline Index lowerProfileSize() const { - return m_lowerProfileSize; - } - - inline Index allocatedSize() const { - return m_allocatedSize; - } - - inline void clear() { - m_diagSize = 0; - } - - inline Scalar& diag(Index i) { - return m_diag[i]; - } - - inline const Scalar& diag(Index i) const { - return m_diag[i]; - } - - inline Scalar& upper(Index i) { - return m_upper[i]; - } - - inline const Scalar& upper(Index i) const { - return m_upper[i]; - } - - inline Scalar& lower(Index i) { - return m_lower[i]; - } - - inline const Scalar& lower(Index i) const { - return m_lower[i]; - } - - inline Index& upperProfile(Index i) { - return m_upperProfile[i]; - } - - inline const Index& upperProfile(Index i) const { - return m_upperProfile[i]; - } - - inline Index& lowerProfile(Index i) { - return m_lowerProfile[i]; - } - - inline const Index& lowerProfile(Index i) const { - return m_lowerProfile[i]; - } - - static SkylineStorage Map(Index* upperProfile, Index* lowerProfile, Scalar* diag, Scalar* upper, Scalar* lower, Index size, Index upperSize, Index lowerSize) { - SkylineStorage res; - res.m_upperProfile = upperProfile; - res.m_lowerProfile = lowerProfile; - res.m_diag = diag; - res.m_upper = upper; - res.m_lower = lower; - res.m_allocatedSize = res.m_diagSize = size; - res.m_upperSize = upperSize; - res.m_lowerSize = lowerSize; - return res; - } - - inline void reset() { - memset(m_diag, 0, m_diagSize * sizeof (Scalar)); - memset(m_upper, 0, m_upperSize * sizeof (Scalar)); - memset(m_lower, 0, m_lowerSize * sizeof (Scalar)); - memset(m_upperProfile, 0, m_diagSize * sizeof (Index)); - memset(m_lowerProfile, 0, m_diagSize * sizeof (Index)); - } - - void prune(Scalar reference, RealScalar epsilon = dummy_precision<RealScalar>()) { - //TODO - } - -protected: - - inline void reallocate(Index diagSize, Index upperProfileSize, Index lowerProfileSize, Index upperSize, Index lowerSize) { - - Scalar* diag = new Scalar[diagSize]; - Scalar* upper = new Scalar[upperSize]; - Scalar* lower = new Scalar[lowerSize]; - Index* upperProfile = new Index[upperProfileSize]; - Index* lowerProfile = new Index[lowerProfileSize]; - - Index copyDiagSize = (std::min)(diagSize, m_diagSize); - Index copyUpperSize = (std::min)(upperSize, m_upperSize); - Index copyLowerSize = (std::min)(lowerSize, m_lowerSize); - Index copyUpperProfileSize = (std::min)(upperProfileSize, m_upperProfileSize); - Index copyLowerProfileSize = (std::min)(lowerProfileSize, m_lowerProfileSize); - - // copy - memcpy(diag, m_diag, copyDiagSize * sizeof (Scalar)); - memcpy(upper, m_upper, copyUpperSize * sizeof (Scalar)); - memcpy(lower, m_lower, copyLowerSize * sizeof (Scalar)); - memcpy(upperProfile, m_upperProfile, copyUpperProfileSize * sizeof (Index)); - memcpy(lowerProfile, m_lowerProfile, copyLowerProfileSize * sizeof (Index)); - - - - // delete old stuff - delete[] m_diag; - delete[] m_upper; - delete[] m_lower; - delete[] m_upperProfile; - delete[] m_lowerProfile; - m_diag = diag; - m_upper = upper; - m_lower = lower; - m_upperProfile = upperProfile; - m_lowerProfile = lowerProfile; - m_allocatedSize = diagSize + upperSize + lowerSize; - m_upperSize = upperSize; - m_lowerSize = lowerSize; - } - -public: - Scalar* m_diag; - Scalar* m_upper; - Scalar* m_lower; - Index* m_upperProfile; - Index* m_lowerProfile; - Index m_diagSize; - Index m_upperSize; - Index m_lowerSize; - Index m_upperProfileSize; - Index m_lowerProfileSize; - Index m_allocatedSize; - -}; - -} // end namespace Eigen - -#endif // EIGEN_COMPRESSED_STORAGE_H diff --git a/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h b/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h deleted file mode 100644 index 75eb612..0000000 --- a/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h +++ /dev/null @@ -1,89 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Guillaume Saupin <guillaume.saupin@cea.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SKYLINEUTIL_H -#define EIGEN_SKYLINEUTIL_H - -namespace Eigen { - -#ifdef NDEBUG -#define EIGEN_DBG_SKYLINE(X) -#else -#define EIGEN_DBG_SKYLINE(X) X -#endif - -const unsigned int SkylineBit = 0x1200; -template<typename Lhs, typename Rhs, int ProductMode> class SkylineProduct; -enum AdditionalProductEvaluationMode {SkylineTimeDenseProduct, SkylineTimeSkylineProduct, DenseTimeSkylineProduct}; -enum {IsSkyline = SkylineBit}; - - -#define EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \ -template<typename OtherDerived> \ -EIGEN_STRONG_INLINE Derived& operator Op(const Eigen::SkylineMatrixBase<OtherDerived>& other) \ -{ \ - return Base::operator Op(other.derived()); \ -} \ -EIGEN_STRONG_INLINE Derived& operator Op(const Derived& other) \ -{ \ - return Base::operator Op(other); \ -} - -#define EIGEN_SKYLINE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, Op) \ -template<typename Other> \ -EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \ -{ \ - return Base::operator Op(scalar); \ -} - -#define EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATORS(Derived) \ - EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =) \ - EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(Derived, +=) \ - EIGEN_SKYLINE_INHERIT_ASSIGNMENT_OPERATOR(Derived, -=) \ - EIGEN_SKYLINE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, *=) \ - EIGEN_SKYLINE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=) - -#define _EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(Derived, BaseClass) \ - typedef BaseClass Base; \ - typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; \ - typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; \ - typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \ - typedef typename Eigen::internal::index<StorageKind>::type Index; \ - enum { Flags = Eigen::internal::traits<Derived>::Flags, }; - -#define EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(Derived) \ - _EIGEN_SKYLINE_GENERIC_PUBLIC_INTERFACE(Derived, Eigen::SkylineMatrixBase<Derived>) - -template<typename Derived> class SkylineMatrixBase; -template<typename _Scalar, int _Flags = 0> class SkylineMatrix; -template<typename _Scalar, int _Flags = 0> class DynamicSkylineMatrix; -template<typename _Scalar, int _Flags = 0> class SkylineVector; -template<typename _Scalar, int _Flags = 0> class MappedSkylineMatrix; - -namespace internal { - -template<typename Lhs, typename Rhs> struct skyline_product_mode; -template<typename Lhs, typename Rhs, int ProductMode = skyline_product_mode<Lhs,Rhs>::value> struct SkylineProductReturnType; - -template<typename T> class eval<T,IsSkyline> -{ - typedef typename traits<T>::Scalar _Scalar; - enum { - _Flags = traits<T>::Flags - }; - - public: - typedef SkylineMatrix<_Scalar, _Flags> type; -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SKYLINEUTIL_H diff --git a/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h b/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h deleted file mode 100644 index e9ec746..0000000 --- a/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h +++ /dev/null @@ -1,122 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_BLOCKFORDYNAMICMATRIX_H -#define EIGEN_SPARSE_BLOCKFORDYNAMICMATRIX_H - -namespace Eigen { - -#if 0 - -// NOTE Have to be reimplemented as a specialization of BlockImpl< DynamicSparseMatrix<_Scalar, _Options, _Index>, ... > -// See SparseBlock.h for an example - - -/*************************************************************************** -* specialisation for DynamicSparseMatrix -***************************************************************************/ - -template<typename _Scalar, int _Options, typename _Index, int Size> -class SparseInnerVectorSet<DynamicSparseMatrix<_Scalar, _Options, _Index>, Size> - : public SparseMatrixBase<SparseInnerVectorSet<DynamicSparseMatrix<_Scalar, _Options, _Index>, Size> > -{ - typedef DynamicSparseMatrix<_Scalar, _Options, _Index> MatrixType; - public: - - enum { IsRowMajor = internal::traits<SparseInnerVectorSet>::IsRowMajor }; - - EIGEN_SPARSE_PUBLIC_INTERFACE(SparseInnerVectorSet) - class InnerIterator: public MatrixType::InnerIterator - { - public: - inline InnerIterator(const SparseInnerVectorSet& xpr, Index outer) - : MatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer) - {} - inline Index row() const { return IsRowMajor ? m_outer : this->index(); } - inline Index col() const { return IsRowMajor ? this->index() : m_outer; } - protected: - Index m_outer; - }; - - inline SparseInnerVectorSet(const MatrixType& matrix, Index outerStart, Index outerSize) - : m_matrix(matrix), m_outerStart(outerStart), m_outerSize(outerSize) - { - eigen_assert( (outerStart>=0) && ((outerStart+outerSize)<=matrix.outerSize()) ); - } - - inline SparseInnerVectorSet(const MatrixType& matrix, Index outer) - : m_matrix(matrix), m_outerStart(outer), m_outerSize(Size) - { - eigen_assert(Size!=Dynamic); - eigen_assert( (outer>=0) && (outer<matrix.outerSize()) ); - } - - template<typename OtherDerived> - inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other) - { - if (IsRowMajor != ((OtherDerived::Flags&RowMajorBit)==RowMajorBit)) - { - // need to transpose => perform a block evaluation followed by a big swap - DynamicSparseMatrix<Scalar,IsRowMajor?RowMajorBit:0> aux(other); - *this = aux.markAsRValue(); - } - else - { - // evaluate/copy vector per vector - for (Index j=0; j<m_outerSize.value(); ++j) - { - SparseVector<Scalar,IsRowMajor ? RowMajorBit : 0> aux(other.innerVector(j)); - m_matrix.const_cast_derived()._data()[m_outerStart+j].swap(aux._data()); - } - } - return *this; - } - - inline SparseInnerVectorSet& operator=(const SparseInnerVectorSet& other) - { - return operator=<SparseInnerVectorSet>(other); - } - - Index nonZeros() const - { - Index count = 0; - for (Index j=0; j<m_outerSize.value(); ++j) - count += m_matrix._data()[m_outerStart+j].size(); - return count; - } - - const Scalar& lastCoeff() const - { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(SparseInnerVectorSet); - eigen_assert(m_matrix.data()[m_outerStart].size()>0); - return m_matrix.data()[m_outerStart].vale(m_matrix.data()[m_outerStart].size()-1); - } - -// template<typename Sparse> -// inline SparseInnerVectorSet& operator=(const SparseMatrixBase<OtherDerived>& other) -// { -// return *this; -// } - - EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); } - - protected: - - const typename MatrixType::Nested m_matrix; - Index m_outerStart; - const internal::variable_if_dynamic<Index, Size> m_outerSize; - -}; - -#endif - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_BLOCKFORDYNAMICMATRIX_H diff --git a/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h b/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h deleted file mode 100644 index 536a0c3..0000000 --- a/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h +++ /dev/null @@ -1,1079 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Desire Nuentsa <desire.nuentsa_wakam@inria.fr> -// Copyright (C) 2013 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSEBLOCKMATRIX_H -#define EIGEN_SPARSEBLOCKMATRIX_H - -namespace Eigen { -/** \ingroup SparseCore_Module - * - * \class BlockSparseMatrix - * - * \brief A versatile sparse matrix representation where each element is a block - * - * This class provides routines to manipulate block sparse matrices stored in a - * BSR-like representation. There are two main types : - * - * 1. All blocks have the same number of rows and columns, called block size - * in the following. In this case, if this block size is known at compile time, - * it can be given as a template parameter like - * \code - * BlockSparseMatrix<Scalar, 3, ColMajor> bmat(b_rows, b_cols); - * \endcode - * Here, bmat is a b_rows x b_cols block sparse matrix - * where each coefficient is a 3x3 dense matrix. - * If the block size is fixed but will be given at runtime, - * \code - * BlockSparseMatrix<Scalar, Dynamic, ColMajor> bmat(b_rows, b_cols); - * bmat.setBlockSize(block_size); - * \endcode - * - * 2. The second case is for variable-block sparse matrices. - * Here each block has its own dimensions. The only restriction is that all the blocks - * in a row (resp. a column) should have the same number of rows (resp. of columns). - * It is thus required in this case to describe the layout of the matrix by calling - * setBlockLayout(rowBlocks, colBlocks). - * - * In any of the previous case, the matrix can be filled by calling setFromTriplets(). - * A regular sparse matrix can be converted to a block sparse matrix and vice versa. - * It is obviously required to describe the block layout beforehand by calling either - * setBlockSize() for fixed-size blocks or setBlockLayout for variable-size blocks. - * - * \tparam _Scalar The Scalar type - * \tparam _BlockAtCompileTime The block layout option. It takes the following values - * Dynamic : block size known at runtime - * a numeric number : fixed-size block known at compile time - */ -template<typename _Scalar, int _BlockAtCompileTime=Dynamic, int _Options=ColMajor, typename _StorageIndex=int> class BlockSparseMatrix; - -template<typename BlockSparseMatrixT> class BlockSparseMatrixView; - -namespace internal { -template<typename _Scalar, int _BlockAtCompileTime, int _Options, typename _Index> -struct traits<BlockSparseMatrix<_Scalar,_BlockAtCompileTime,_Options, _Index> > -{ - typedef _Scalar Scalar; - typedef _Index Index; - typedef Sparse StorageKind; // FIXME Where is it used ?? - typedef MatrixXpr XprKind; - enum { - RowsAtCompileTime = Dynamic, - ColsAtCompileTime = Dynamic, - MaxRowsAtCompileTime = Dynamic, - MaxColsAtCompileTime = Dynamic, - BlockSize = _BlockAtCompileTime, - Flags = _Options | NestByRefBit | LvalueBit, - CoeffReadCost = NumTraits<Scalar>::ReadCost, - SupportedAccessPatterns = InnerRandomAccessPattern - }; -}; -template<typename BlockSparseMatrixT> -struct traits<BlockSparseMatrixView<BlockSparseMatrixT> > -{ - typedef Ref<Matrix<typename BlockSparseMatrixT::Scalar, BlockSparseMatrixT::BlockSize, BlockSparseMatrixT::BlockSize> > Scalar; - typedef Ref<Matrix<typename BlockSparseMatrixT::RealScalar, BlockSparseMatrixT::BlockSize, BlockSparseMatrixT::BlockSize> > RealScalar; - -}; - -// Function object to sort a triplet list -template<typename Iterator, bool IsColMajor> -struct TripletComp -{ - typedef typename Iterator::value_type Triplet; - bool operator()(const Triplet& a, const Triplet& b) - { if(IsColMajor) - return ((a.col() == b.col() && a.row() < b.row()) || (a.col() < b.col())); - else - return ((a.row() == b.row() && a.col() < b.col()) || (a.row() < b.row())); - } -}; -} // end namespace internal - - -/* Proxy to view the block sparse matrix as a regular sparse matrix */ -template<typename BlockSparseMatrixT> -class BlockSparseMatrixView : public SparseMatrixBase<BlockSparseMatrixT> -{ - public: - typedef Ref<typename BlockSparseMatrixT::BlockScalar> Scalar; - typedef Ref<typename BlockSparseMatrixT::BlockRealScalar> RealScalar; - typedef typename BlockSparseMatrixT::Index Index; - typedef BlockSparseMatrixT Nested; - enum { - Flags = BlockSparseMatrixT::Options, - Options = BlockSparseMatrixT::Options, - RowsAtCompileTime = BlockSparseMatrixT::RowsAtCompileTime, - ColsAtCompileTime = BlockSparseMatrixT::ColsAtCompileTime, - MaxColsAtCompileTime = BlockSparseMatrixT::MaxColsAtCompileTime, - MaxRowsAtCompileTime = BlockSparseMatrixT::MaxRowsAtCompileTime - }; - public: - BlockSparseMatrixView(const BlockSparseMatrixT& spblockmat) - : m_spblockmat(spblockmat) - {} - - Index outerSize() const - { - return (Flags&RowMajorBit) == 1 ? this->rows() : this->cols(); - } - Index cols() const - { - return m_spblockmat.blockCols(); - } - Index rows() const - { - return m_spblockmat.blockRows(); - } - Scalar coeff(Index row, Index col) - { - return m_spblockmat.coeff(row, col); - } - Scalar coeffRef(Index row, Index col) - { - return m_spblockmat.coeffRef(row, col); - } - // Wrapper to iterate over all blocks - class InnerIterator : public BlockSparseMatrixT::BlockInnerIterator - { - public: - InnerIterator(const BlockSparseMatrixView& mat, Index outer) - : BlockSparseMatrixT::BlockInnerIterator(mat.m_spblockmat, outer) - {} - - }; - - protected: - const BlockSparseMatrixT& m_spblockmat; -}; - -// Proxy to view a regular vector as a block vector -template<typename BlockSparseMatrixT, typename VectorType> -class BlockVectorView -{ - public: - enum { - BlockSize = BlockSparseMatrixT::BlockSize, - ColsAtCompileTime = VectorType::ColsAtCompileTime, - RowsAtCompileTime = VectorType::RowsAtCompileTime, - Flags = VectorType::Flags - }; - typedef Ref<const Matrix<typename BlockSparseMatrixT::Scalar, (RowsAtCompileTime==1)? 1 : BlockSize, (ColsAtCompileTime==1)? 1 : BlockSize> >Scalar; - typedef typename BlockSparseMatrixT::Index Index; - public: - BlockVectorView(const BlockSparseMatrixT& spblockmat, const VectorType& vec) - : m_spblockmat(spblockmat),m_vec(vec) - { } - inline Index cols() const - { - return m_vec.cols(); - } - inline Index size() const - { - return m_spblockmat.blockRows(); - } - inline Scalar coeff(Index bi) const - { - Index startRow = m_spblockmat.blockRowsIndex(bi); - Index rowSize = m_spblockmat.blockRowsIndex(bi+1) - startRow; - return m_vec.middleRows(startRow, rowSize); - } - inline Scalar coeff(Index bi, Index j) const - { - Index startRow = m_spblockmat.blockRowsIndex(bi); - Index rowSize = m_spblockmat.blockRowsIndex(bi+1) - startRow; - return m_vec.block(startRow, j, rowSize, 1); - } - protected: - const BlockSparseMatrixT& m_spblockmat; - const VectorType& m_vec; -}; - -template<typename VectorType, typename Index> class BlockVectorReturn; - - -// Proxy to view a regular vector as a block vector -template<typename BlockSparseMatrixT, typename VectorType> -class BlockVectorReturn -{ - public: - enum { - ColsAtCompileTime = VectorType::ColsAtCompileTime, - RowsAtCompileTime = VectorType::RowsAtCompileTime, - Flags = VectorType::Flags - }; - typedef Ref<Matrix<typename VectorType::Scalar, RowsAtCompileTime, ColsAtCompileTime> > Scalar; - typedef typename BlockSparseMatrixT::Index Index; - public: - BlockVectorReturn(const BlockSparseMatrixT& spblockmat, VectorType& vec) - : m_spblockmat(spblockmat),m_vec(vec) - { } - inline Index size() const - { - return m_spblockmat.blockRows(); - } - inline Scalar coeffRef(Index bi) - { - Index startRow = m_spblockmat.blockRowsIndex(bi); - Index rowSize = m_spblockmat.blockRowsIndex(bi+1) - startRow; - return m_vec.middleRows(startRow, rowSize); - } - inline Scalar coeffRef(Index bi, Index j) - { - Index startRow = m_spblockmat.blockRowsIndex(bi); - Index rowSize = m_spblockmat.blockRowsIndex(bi+1) - startRow; - return m_vec.block(startRow, j, rowSize, 1); - } - - protected: - const BlockSparseMatrixT& m_spblockmat; - VectorType& m_vec; -}; - -// Block version of the sparse dense product -template<typename Lhs, typename Rhs> -class BlockSparseTimeDenseProduct; - -namespace internal { - -template<typename BlockSparseMatrixT, typename VecType> -struct traits<BlockSparseTimeDenseProduct<BlockSparseMatrixT, VecType> > -{ - typedef Dense StorageKind; - typedef MatrixXpr XprKind; - typedef typename BlockSparseMatrixT::Scalar Scalar; - typedef typename BlockSparseMatrixT::Index Index; - enum { - RowsAtCompileTime = Dynamic, - ColsAtCompileTime = Dynamic, - MaxRowsAtCompileTime = Dynamic, - MaxColsAtCompileTime = Dynamic, - Flags = 0, - CoeffReadCost = internal::traits<BlockSparseMatrixT>::CoeffReadCost - }; -}; -} // end namespace internal - -template<typename Lhs, typename Rhs> -class BlockSparseTimeDenseProduct - : public ProductBase<BlockSparseTimeDenseProduct<Lhs,Rhs>, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(BlockSparseTimeDenseProduct) - - BlockSparseTimeDenseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - {} - - template<typename Dest> void scaleAndAddTo(Dest& dest, const typename Rhs::Scalar& alpha) const - { - BlockVectorReturn<Lhs,Dest> tmpDest(m_lhs, dest); - internal::sparse_time_dense_product( BlockSparseMatrixView<Lhs>(m_lhs), BlockVectorView<Lhs, Rhs>(m_lhs, m_rhs), tmpDest, alpha); - } - - private: - BlockSparseTimeDenseProduct& operator=(const BlockSparseTimeDenseProduct&); -}; - -template<typename _Scalar, int _BlockAtCompileTime, int _Options, typename _StorageIndex> -class BlockSparseMatrix : public SparseMatrixBase<BlockSparseMatrix<_Scalar,_BlockAtCompileTime, _Options,_StorageIndex> > -{ - public: - typedef _Scalar Scalar; - typedef typename NumTraits<Scalar>::Real RealScalar; - typedef _StorageIndex StorageIndex; - typedef typename internal::ref_selector<BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, _StorageIndex> >::type Nested; - - enum { - Options = _Options, - Flags = Options, - BlockSize=_BlockAtCompileTime, - RowsAtCompileTime = Dynamic, - ColsAtCompileTime = Dynamic, - MaxRowsAtCompileTime = Dynamic, - MaxColsAtCompileTime = Dynamic, - IsVectorAtCompileTime = 0, - IsColMajor = Flags&RowMajorBit ? 0 : 1 - }; - typedef Matrix<Scalar, _BlockAtCompileTime, _BlockAtCompileTime,IsColMajor ? ColMajor : RowMajor> BlockScalar; - typedef Matrix<RealScalar, _BlockAtCompileTime, _BlockAtCompileTime,IsColMajor ? ColMajor : RowMajor> BlockRealScalar; - typedef typename internal::conditional<_BlockAtCompileTime==Dynamic, Scalar, BlockScalar>::type BlockScalarReturnType; - typedef BlockSparseMatrix<Scalar, BlockSize, IsColMajor ? ColMajor : RowMajor, StorageIndex> PlainObject; - public: - // Default constructor - BlockSparseMatrix() - : m_innerBSize(0),m_outerBSize(0),m_innerOffset(0),m_outerOffset(0), - m_nonzerosblocks(0),m_values(0),m_blockPtr(0),m_indices(0), - m_outerIndex(0),m_blockSize(BlockSize) - { } - - - /** - * \brief Construct and resize - * - */ - BlockSparseMatrix(Index brow, Index bcol) - : m_innerBSize(IsColMajor ? brow : bcol), - m_outerBSize(IsColMajor ? bcol : brow), - m_innerOffset(0),m_outerOffset(0),m_nonzerosblocks(0), - m_values(0),m_blockPtr(0),m_indices(0), - m_outerIndex(0),m_blockSize(BlockSize) - { } - - /** - * \brief Copy-constructor - */ - BlockSparseMatrix(const BlockSparseMatrix& other) - : m_innerBSize(other.m_innerBSize),m_outerBSize(other.m_outerBSize), - m_nonzerosblocks(other.m_nonzerosblocks),m_nonzeros(other.m_nonzeros), - m_blockPtr(0),m_blockSize(other.m_blockSize) - { - // should we allow copying between variable-size blocks and fixed-size blocks ?? - eigen_assert(m_blockSize == BlockSize && " CAN NOT COPY BETWEEN FIXED-SIZE AND VARIABLE-SIZE BLOCKS"); - - std::copy(other.m_innerOffset, other.m_innerOffset+m_innerBSize+1, m_innerOffset); - std::copy(other.m_outerOffset, other.m_outerOffset+m_outerBSize+1, m_outerOffset); - std::copy(other.m_values, other.m_values+m_nonzeros, m_values); - - if(m_blockSize != Dynamic) - std::copy(other.m_blockPtr, other.m_blockPtr+m_nonzerosblocks, m_blockPtr); - - std::copy(other.m_indices, other.m_indices+m_nonzerosblocks, m_indices); - std::copy(other.m_outerIndex, other.m_outerIndex+m_outerBSize, m_outerIndex); - } - - friend void swap(BlockSparseMatrix& first, BlockSparseMatrix& second) - { - std::swap(first.m_innerBSize, second.m_innerBSize); - std::swap(first.m_outerBSize, second.m_outerBSize); - std::swap(first.m_innerOffset, second.m_innerOffset); - std::swap(first.m_outerOffset, second.m_outerOffset); - std::swap(first.m_nonzerosblocks, second.m_nonzerosblocks); - std::swap(first.m_nonzeros, second.m_nonzeros); - std::swap(first.m_values, second.m_values); - std::swap(first.m_blockPtr, second.m_blockPtr); - std::swap(first.m_indices, second.m_indices); - std::swap(first.m_outerIndex, second.m_outerIndex); - std::swap(first.m_BlockSize, second.m_blockSize); - } - - BlockSparseMatrix& operator=(BlockSparseMatrix other) - { - //Copy-and-swap paradigm ... avoid leaked data if thrown - swap(*this, other); - return *this; - } - - // Destructor - ~BlockSparseMatrix() - { - delete[] m_outerIndex; - delete[] m_innerOffset; - delete[] m_outerOffset; - delete[] m_indices; - delete[] m_blockPtr; - delete[] m_values; - } - - - /** - * \brief Constructor from a sparse matrix - * - */ - template<typename MatrixType> - inline BlockSparseMatrix(const MatrixType& spmat) : m_blockSize(BlockSize) - { - EIGEN_STATIC_ASSERT((m_blockSize != Dynamic), THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE); - - *this = spmat; - } - - /** - * \brief Assignment from a sparse matrix with the same storage order - * - * Convert from a sparse matrix to block sparse matrix. - * \warning Before calling this function, tt is necessary to call - * either setBlockLayout() (matrices with variable-size blocks) - * or setBlockSize() (for fixed-size blocks). - */ - template<typename MatrixType> - inline BlockSparseMatrix& operator=(const MatrixType& spmat) - { - eigen_assert((m_innerBSize != 0 && m_outerBSize != 0) - && "Trying to assign to a zero-size matrix, call resize() first"); - eigen_assert(((MatrixType::Options&RowMajorBit) != IsColMajor) && "Wrong storage order"); - typedef SparseMatrix<bool,MatrixType::Options,typename MatrixType::Index> MatrixPatternType; - MatrixPatternType blockPattern(blockRows(), blockCols()); - m_nonzeros = 0; - - // First, compute the number of nonzero blocks and their locations - for(StorageIndex bj = 0; bj < m_outerBSize; ++bj) - { - // Browse each outer block and compute the structure - std::vector<bool> nzblocksFlag(m_innerBSize,false); // Record the existing blocks - blockPattern.startVec(bj); - for(StorageIndex j = blockOuterIndex(bj); j < blockOuterIndex(bj+1); ++j) - { - typename MatrixType::InnerIterator it_spmat(spmat, j); - for(; it_spmat; ++it_spmat) - { - StorageIndex bi = innerToBlock(it_spmat.index()); // Index of the current nonzero block - if(!nzblocksFlag[bi]) - { - // Save the index of this nonzero block - nzblocksFlag[bi] = true; - blockPattern.insertBackByOuterInnerUnordered(bj, bi) = true; - // Compute the total number of nonzeros (including explicit zeros in blocks) - m_nonzeros += blockOuterSize(bj) * blockInnerSize(bi); - } - } - } // end current outer block - } - blockPattern.finalize(); - - // Allocate the internal arrays - setBlockStructure(blockPattern); - - for(StorageIndex nz = 0; nz < m_nonzeros; ++nz) m_values[nz] = Scalar(0); - for(StorageIndex bj = 0; bj < m_outerBSize; ++bj) - { - // Now copy the values - for(StorageIndex j = blockOuterIndex(bj); j < blockOuterIndex(bj+1); ++j) - { - // Browse the outer block column by column (for column-major matrices) - typename MatrixType::InnerIterator it_spmat(spmat, j); - for(; it_spmat; ++it_spmat) - { - StorageIndex idx = 0; // Position of this block in the column block - StorageIndex bi = innerToBlock(it_spmat.index()); // Index of the current nonzero block - // Go to the inner block where this element belongs to - while(bi > m_indices[m_outerIndex[bj]+idx]) ++idx; // Not expensive for ordered blocks - StorageIndex idxVal;// Get the right position in the array of values for this element - if(m_blockSize == Dynamic) - { - // Offset from all blocks before ... - idxVal = m_blockPtr[m_outerIndex[bj]+idx]; - // ... and offset inside the block - idxVal += (j - blockOuterIndex(bj)) * blockOuterSize(bj) + it_spmat.index() - m_innerOffset[bi]; - } - else - { - // All blocks before - idxVal = (m_outerIndex[bj] + idx) * m_blockSize * m_blockSize; - // inside the block - idxVal += (j - blockOuterIndex(bj)) * m_blockSize + (it_spmat.index()%m_blockSize); - } - // Insert the value - m_values[idxVal] = it_spmat.value(); - } // end of this column - } // end of this block - } // end of this outer block - - return *this; - } - - /** - * \brief Set the nonzero block pattern of the matrix - * - * Given a sparse matrix describing the nonzero block pattern, - * this function prepares the internal pointers for values. - * After calling this function, any *nonzero* block (bi, bj) can be set - * with a simple call to coeffRef(bi,bj). - * - * - * \warning Before calling this function, tt is necessary to call - * either setBlockLayout() (matrices with variable-size blocks) - * or setBlockSize() (for fixed-size blocks). - * - * \param blockPattern Sparse matrix of boolean elements describing the block structure - * - * \sa setBlockLayout() \sa setBlockSize() - */ - template<typename MatrixType> - void setBlockStructure(const MatrixType& blockPattern) - { - resize(blockPattern.rows(), blockPattern.cols()); - reserve(blockPattern.nonZeros()); - - // Browse the block pattern and set up the various pointers - m_outerIndex[0] = 0; - if(m_blockSize == Dynamic) m_blockPtr[0] = 0; - for(StorageIndex nz = 0; nz < m_nonzeros; ++nz) m_values[nz] = Scalar(0); - for(StorageIndex bj = 0; bj < m_outerBSize; ++bj) - { - //Browse each outer block - - //First, copy and save the indices of nonzero blocks - //FIXME : find a way to avoid this ... - std::vector<int> nzBlockIdx; - typename MatrixType::InnerIterator it(blockPattern, bj); - for(; it; ++it) - { - nzBlockIdx.push_back(it.index()); - } - std::sort(nzBlockIdx.begin(), nzBlockIdx.end()); - - // Now, fill block indices and (eventually) pointers to blocks - for(StorageIndex idx = 0; idx < nzBlockIdx.size(); ++idx) - { - StorageIndex offset = m_outerIndex[bj]+idx; // offset in m_indices - m_indices[offset] = nzBlockIdx[idx]; - if(m_blockSize == Dynamic) - m_blockPtr[offset] = m_blockPtr[offset-1] + blockInnerSize(nzBlockIdx[idx]) * blockOuterSize(bj); - // There is no blockPtr for fixed-size blocks... not needed !??? - } - // Save the pointer to the next outer block - m_outerIndex[bj+1] = m_outerIndex[bj] + nzBlockIdx.size(); - } - } - - /** - * \brief Set the number of rows and columns blocks - */ - inline void resize(Index brow, Index bcol) - { - m_innerBSize = IsColMajor ? brow : bcol; - m_outerBSize = IsColMajor ? bcol : brow; - } - - /** - * \brief set the block size at runtime for fixed-size block layout - * - * Call this only for fixed-size blocks - */ - inline void setBlockSize(Index blockSize) - { - m_blockSize = blockSize; - } - - /** - * \brief Set the row and column block layouts, - * - * This function set the size of each row and column block. - * So this function should be used only for blocks with variable size. - * \param rowBlocks : Number of rows per row block - * \param colBlocks : Number of columns per column block - * \sa resize(), setBlockSize() - */ - inline void setBlockLayout(const VectorXi& rowBlocks, const VectorXi& colBlocks) - { - const VectorXi& innerBlocks = IsColMajor ? rowBlocks : colBlocks; - const VectorXi& outerBlocks = IsColMajor ? colBlocks : rowBlocks; - eigen_assert(m_innerBSize == innerBlocks.size() && "CHECK THE NUMBER OF ROW OR COLUMN BLOCKS"); - eigen_assert(m_outerBSize == outerBlocks.size() && "CHECK THE NUMBER OF ROW OR COLUMN BLOCKS"); - m_outerBSize = outerBlocks.size(); - // starting index of blocks... cumulative sums - m_innerOffset = new StorageIndex[m_innerBSize+1]; - m_outerOffset = new StorageIndex[m_outerBSize+1]; - m_innerOffset[0] = 0; - m_outerOffset[0] = 0; - std::partial_sum(&innerBlocks[0], &innerBlocks[m_innerBSize-1]+1, &m_innerOffset[1]); - std::partial_sum(&outerBlocks[0], &outerBlocks[m_outerBSize-1]+1, &m_outerOffset[1]); - - // Compute the total number of nonzeros - m_nonzeros = 0; - for(StorageIndex bj = 0; bj < m_outerBSize; ++bj) - for(StorageIndex bi = 0; bi < m_innerBSize; ++bi) - m_nonzeros += outerBlocks[bj] * innerBlocks[bi]; - - } - - /** - * \brief Allocate the internal array of pointers to blocks and their inner indices - * - * \note For fixed-size blocks, call setBlockSize() to set the block. - * And For variable-size blocks, call setBlockLayout() before using this function - * - * \param nonzerosblocks Number of nonzero blocks. The total number of nonzeros is - * is computed in setBlockLayout() for variable-size blocks - * \sa setBlockSize() - */ - inline void reserve(const Index nonzerosblocks) - { - eigen_assert((m_innerBSize != 0 && m_outerBSize != 0) && - "TRYING TO RESERVE ZERO-SIZE MATRICES, CALL resize() first"); - - //FIXME Should free if already allocated - m_outerIndex = new StorageIndex[m_outerBSize+1]; - - m_nonzerosblocks = nonzerosblocks; - if(m_blockSize != Dynamic) - { - m_nonzeros = nonzerosblocks * (m_blockSize * m_blockSize); - m_blockPtr = 0; - } - else - { - // m_nonzeros is already computed in setBlockLayout() - m_blockPtr = new StorageIndex[m_nonzerosblocks+1]; - } - m_indices = new StorageIndex[m_nonzerosblocks+1]; - m_values = new Scalar[m_nonzeros]; - } - - - /** - * \brief Fill values in a matrix from a triplet list. - * - * Each triplet item has a block stored in an Eigen dense matrix. - * The InputIterator class should provide the functions row(), col() and value() - * - * \note For fixed-size blocks, call setBlockSize() before this function. - * - * FIXME Do not accept duplicates - */ - template<typename InputIterator> - void setFromTriplets(const InputIterator& begin, const InputIterator& end) - { - eigen_assert((m_innerBSize!=0 && m_outerBSize !=0) && "ZERO BLOCKS, PLEASE CALL resize() before"); - - /* First, sort the triplet list - * FIXME This can be unnecessarily expensive since only the inner indices have to be sorted - * The best approach is like in SparseMatrix::setFromTriplets() - */ - internal::TripletComp<InputIterator, IsColMajor> tripletcomp; - std::sort(begin, end, tripletcomp); - - /* Count the number of rows and column blocks, - * and the number of nonzero blocks per outer dimension - */ - VectorXi rowBlocks(m_innerBSize); // Size of each block row - VectorXi colBlocks(m_outerBSize); // Size of each block column - rowBlocks.setZero(); colBlocks.setZero(); - VectorXi nzblock_outer(m_outerBSize); // Number of nz blocks per outer vector - VectorXi nz_outer(m_outerBSize); // Number of nz per outer vector...for variable-size blocks - nzblock_outer.setZero(); - nz_outer.setZero(); - for(InputIterator it(begin); it !=end; ++it) - { - eigen_assert(it->row() >= 0 && it->row() < this->blockRows() && it->col() >= 0 && it->col() < this->blockCols()); - eigen_assert((it->value().rows() == it->value().cols() && (it->value().rows() == m_blockSize)) - || (m_blockSize == Dynamic)); - - if(m_blockSize == Dynamic) - { - eigen_assert((rowBlocks[it->row()] == 0 || rowBlocks[it->row()] == it->value().rows()) && - "NON CORRESPONDING SIZES FOR ROW BLOCKS"); - eigen_assert((colBlocks[it->col()] == 0 || colBlocks[it->col()] == it->value().cols()) && - "NON CORRESPONDING SIZES FOR COLUMN BLOCKS"); - rowBlocks[it->row()] =it->value().rows(); - colBlocks[it->col()] = it->value().cols(); - } - nz_outer(IsColMajor ? it->col() : it->row()) += it->value().rows() * it->value().cols(); - nzblock_outer(IsColMajor ? it->col() : it->row())++; - } - // Allocate member arrays - if(m_blockSize == Dynamic) setBlockLayout(rowBlocks, colBlocks); - StorageIndex nzblocks = nzblock_outer.sum(); - reserve(nzblocks); - - // Temporary markers - VectorXi block_id(m_outerBSize); // To be used as a block marker during insertion - - // Setup outer index pointers and markers - m_outerIndex[0] = 0; - if (m_blockSize == Dynamic) m_blockPtr[0] = 0; - for(StorageIndex bj = 0; bj < m_outerBSize; ++bj) - { - m_outerIndex[bj+1] = m_outerIndex[bj] + nzblock_outer(bj); - block_id(bj) = m_outerIndex[bj]; - if(m_blockSize==Dynamic) - { - m_blockPtr[m_outerIndex[bj+1]] = m_blockPtr[m_outerIndex[bj]] + nz_outer(bj); - } - } - - // Fill the matrix - for(InputIterator it(begin); it!=end; ++it) - { - StorageIndex outer = IsColMajor ? it->col() : it->row(); - StorageIndex inner = IsColMajor ? it->row() : it->col(); - m_indices[block_id(outer)] = inner; - StorageIndex block_size = it->value().rows()*it->value().cols(); - StorageIndex nz_marker = blockPtr(block_id[outer]); - memcpy(&(m_values[nz_marker]), it->value().data(), block_size * sizeof(Scalar)); - if(m_blockSize == Dynamic) - { - m_blockPtr[block_id(outer)+1] = m_blockPtr[block_id(outer)] + block_size; - } - block_id(outer)++; - } - - // An alternative when the outer indices are sorted...no need to use an array of markers -// for(Index bcol = 0; bcol < m_outerBSize; ++bcol) -// { -// Index id = 0, id_nz = 0, id_nzblock = 0; -// for(InputIterator it(begin); it!=end; ++it) -// { -// while (id<bcol) // one pass should do the job unless there are empty columns -// { -// id++; -// m_outerIndex[id+1]=m_outerIndex[id]; -// } -// m_outerIndex[id+1] += 1; -// m_indices[id_nzblock]=brow; -// Index block_size = it->value().rows()*it->value().cols(); -// m_blockPtr[id_nzblock+1] = m_blockPtr[id_nzblock] + block_size; -// id_nzblock++; -// memcpy(&(m_values[id_nz]),it->value().data(), block_size*sizeof(Scalar)); -// id_nz += block_size; -// } -// while(id < m_outerBSize-1) // Empty columns at the end -// { -// id++; -// m_outerIndex[id+1]=m_outerIndex[id]; -// } -// } - } - - - /** - * \returns the number of rows - */ - inline Index rows() const - { -// return blockRows(); - return (IsColMajor ? innerSize() : outerSize()); - } - - /** - * \returns the number of cols - */ - inline Index cols() const - { -// return blockCols(); - return (IsColMajor ? outerSize() : innerSize()); - } - - inline Index innerSize() const - { - if(m_blockSize == Dynamic) return m_innerOffset[m_innerBSize]; - else return (m_innerBSize * m_blockSize) ; - } - - inline Index outerSize() const - { - if(m_blockSize == Dynamic) return m_outerOffset[m_outerBSize]; - else return (m_outerBSize * m_blockSize) ; - } - /** \returns the number of rows grouped by blocks */ - inline Index blockRows() const - { - return (IsColMajor ? m_innerBSize : m_outerBSize); - } - /** \returns the number of columns grouped by blocks */ - inline Index blockCols() const - { - return (IsColMajor ? m_outerBSize : m_innerBSize); - } - - inline Index outerBlocks() const { return m_outerBSize; } - inline Index innerBlocks() const { return m_innerBSize; } - - /** \returns the block index where outer belongs to */ - inline Index outerToBlock(Index outer) const - { - eigen_assert(outer < outerSize() && "OUTER INDEX OUT OF BOUNDS"); - - if(m_blockSize != Dynamic) - return (outer / m_blockSize); // Integer division - - StorageIndex b_outer = 0; - while(m_outerOffset[b_outer] <= outer) ++b_outer; - return b_outer - 1; - } - /** \returns the block index where inner belongs to */ - inline Index innerToBlock(Index inner) const - { - eigen_assert(inner < innerSize() && "OUTER INDEX OUT OF BOUNDS"); - - if(m_blockSize != Dynamic) - return (inner / m_blockSize); // Integer division - - StorageIndex b_inner = 0; - while(m_innerOffset[b_inner] <= inner) ++b_inner; - return b_inner - 1; - } - - /** - *\returns a reference to the (i,j) block as an Eigen Dense Matrix - */ - Ref<BlockScalar> coeffRef(Index brow, Index bcol) - { - eigen_assert(brow < blockRows() && "BLOCK ROW INDEX OUT OF BOUNDS"); - eigen_assert(bcol < blockCols() && "BLOCK nzblocksFlagCOLUMN OUT OF BOUNDS"); - - StorageIndex rsize = IsColMajor ? blockInnerSize(brow): blockOuterSize(bcol); - StorageIndex csize = IsColMajor ? blockOuterSize(bcol) : blockInnerSize(brow); - StorageIndex inner = IsColMajor ? brow : bcol; - StorageIndex outer = IsColMajor ? bcol : brow; - StorageIndex offset = m_outerIndex[outer]; - while(offset < m_outerIndex[outer+1] && m_indices[offset] != inner) - offset++; - if(m_indices[offset] == inner) - { - return Map<BlockScalar>(&(m_values[blockPtr(offset)]), rsize, csize); - } - else - { - //FIXME the block does not exist, Insert it !!!!!!!!! - eigen_assert("DYNAMIC INSERTION IS NOT YET SUPPORTED"); - } - } - - /** - * \returns the value of the (i,j) block as an Eigen Dense Matrix - */ - Map<const BlockScalar> coeff(Index brow, Index bcol) const - { - eigen_assert(brow < blockRows() && "BLOCK ROW INDEX OUT OF BOUNDS"); - eigen_assert(bcol < blockCols() && "BLOCK COLUMN OUT OF BOUNDS"); - - StorageIndex rsize = IsColMajor ? blockInnerSize(brow): blockOuterSize(bcol); - StorageIndex csize = IsColMajor ? blockOuterSize(bcol) : blockInnerSize(brow); - StorageIndex inner = IsColMajor ? brow : bcol; - StorageIndex outer = IsColMajor ? bcol : brow; - StorageIndex offset = m_outerIndex[outer]; - while(offset < m_outerIndex[outer+1] && m_indices[offset] != inner) offset++; - if(m_indices[offset] == inner) - { - return Map<const BlockScalar> (&(m_values[blockPtr(offset)]), rsize, csize); - } - else -// return BlockScalar::Zero(rsize, csize); - eigen_assert("NOT YET SUPPORTED"); - } - - // Block Matrix times vector product - template<typename VecType> - BlockSparseTimeDenseProduct<BlockSparseMatrix, VecType> operator*(const VecType& lhs) const - { - return BlockSparseTimeDenseProduct<BlockSparseMatrix, VecType>(*this, lhs); - } - - /** \returns the number of nonzero blocks */ - inline Index nonZerosBlocks() const { return m_nonzerosblocks; } - /** \returns the total number of nonzero elements, including eventual explicit zeros in blocks */ - inline Index nonZeros() const { return m_nonzeros; } - - inline BlockScalarReturnType *valuePtr() {return static_cast<BlockScalarReturnType *>(m_values);} -// inline Scalar *valuePtr(){ return m_values; } - inline StorageIndex *innerIndexPtr() {return m_indices; } - inline const StorageIndex *innerIndexPtr() const {return m_indices; } - inline StorageIndex *outerIndexPtr() {return m_outerIndex; } - inline const StorageIndex* outerIndexPtr() const {return m_outerIndex; } - - /** \brief for compatibility purposes with the SparseMatrix class */ - inline bool isCompressed() const {return true;} - /** - * \returns the starting index of the bi row block - */ - inline Index blockRowsIndex(Index bi) const - { - return IsColMajor ? blockInnerIndex(bi) : blockOuterIndex(bi); - } - - /** - * \returns the starting index of the bj col block - */ - inline Index blockColsIndex(Index bj) const - { - return IsColMajor ? blockOuterIndex(bj) : blockInnerIndex(bj); - } - - inline Index blockOuterIndex(Index bj) const - { - return (m_blockSize == Dynamic) ? m_outerOffset[bj] : (bj * m_blockSize); - } - inline Index blockInnerIndex(Index bi) const - { - return (m_blockSize == Dynamic) ? m_innerOffset[bi] : (bi * m_blockSize); - } - - // Not needed ??? - inline Index blockInnerSize(Index bi) const - { - return (m_blockSize == Dynamic) ? (m_innerOffset[bi+1] - m_innerOffset[bi]) : m_blockSize; - } - inline Index blockOuterSize(Index bj) const - { - return (m_blockSize == Dynamic) ? (m_outerOffset[bj+1]- m_outerOffset[bj]) : m_blockSize; - } - - /** - * \brief Browse the matrix by outer index - */ - class InnerIterator; // Browse column by column - - /** - * \brief Browse the matrix by block outer index - */ - class BlockInnerIterator; // Browse block by block - - friend std::ostream & operator << (std::ostream & s, const BlockSparseMatrix& m) - { - for (StorageIndex j = 0; j < m.outerBlocks(); ++j) - { - BlockInnerIterator itb(m, j); - for(; itb; ++itb) - { - s << "("<<itb.row() << ", " << itb.col() << ")\n"; - s << itb.value() <<"\n"; - } - } - s << std::endl; - return s; - } - - /** - * \returns the starting position of the block \p id in the array of values - */ - Index blockPtr(Index id) const - { - if(m_blockSize == Dynamic) return m_blockPtr[id]; - else return id * m_blockSize * m_blockSize; - //return blockDynIdx(id, typename internal::conditional<(BlockSize==Dynamic), internal::true_type, internal::false_type>::type()); - } - - - protected: -// inline Index blockDynIdx(Index id, internal::true_type) const -// { -// return m_blockPtr[id]; -// } -// inline Index blockDynIdx(Index id, internal::false_type) const -// { -// return id * BlockSize * BlockSize; -// } - - // To be implemented - // Insert a block at a particular location... need to make a room for that - Map<BlockScalar> insert(Index brow, Index bcol); - - Index m_innerBSize; // Number of block rows - Index m_outerBSize; // Number of block columns - StorageIndex *m_innerOffset; // Starting index of each inner block (size m_innerBSize+1) - StorageIndex *m_outerOffset; // Starting index of each outer block (size m_outerBSize+1) - Index m_nonzerosblocks; // Total nonzeros blocks (lower than m_innerBSize x m_outerBSize) - Index m_nonzeros; // Total nonzeros elements - Scalar *m_values; //Values stored block column after block column (size m_nonzeros) - StorageIndex *m_blockPtr; // Pointer to the beginning of each block in m_values, size m_nonzeroblocks ... null for fixed-size blocks - StorageIndex *m_indices; //Inner block indices, size m_nonzerosblocks ... OK - StorageIndex *m_outerIndex; // Starting pointer of each block column in m_indices (size m_outerBSize)... OK - Index m_blockSize; // Size of a block for fixed-size blocks, otherwise -1 -}; - -template<typename _Scalar, int _BlockAtCompileTime, int _Options, typename _StorageIndex> -class BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, _StorageIndex>::BlockInnerIterator -{ - public: - - enum{ - Flags = _Options - }; - - BlockInnerIterator(const BlockSparseMatrix& mat, const Index outer) - : m_mat(mat),m_outer(outer), - m_id(mat.m_outerIndex[outer]), - m_end(mat.m_outerIndex[outer+1]) - { - } - - inline BlockInnerIterator& operator++() {m_id++; return *this; } - - inline const Map<const BlockScalar> value() const - { - return Map<const BlockScalar>(&(m_mat.m_values[m_mat.blockPtr(m_id)]), - rows(),cols()); - } - inline Map<BlockScalar> valueRef() - { - return Map<BlockScalar>(&(m_mat.m_values[m_mat.blockPtr(m_id)]), - rows(),cols()); - } - // Block inner index - inline Index index() const {return m_mat.m_indices[m_id]; } - inline Index outer() const { return m_outer; } - // block row index - inline Index row() const {return index(); } - // block column index - inline Index col() const {return outer(); } - // FIXME Number of rows in the current block - inline Index rows() const { return (m_mat.m_blockSize==Dynamic) ? (m_mat.m_innerOffset[index()+1] - m_mat.m_innerOffset[index()]) : m_mat.m_blockSize; } - // Number of columns in the current block ... - inline Index cols() const { return (m_mat.m_blockSize==Dynamic) ? (m_mat.m_outerOffset[m_outer+1]-m_mat.m_outerOffset[m_outer]) : m_mat.m_blockSize;} - inline operator bool() const { return (m_id < m_end); } - - protected: - const BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, StorageIndex>& m_mat; - const Index m_outer; - Index m_id; - Index m_end; -}; - -template<typename _Scalar, int _BlockAtCompileTime, int _Options, typename _StorageIndex> -class BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, _StorageIndex>::InnerIterator -{ - public: - InnerIterator(const BlockSparseMatrix& mat, Index outer) - : m_mat(mat),m_outerB(mat.outerToBlock(outer)),m_outer(outer), - itb(mat, mat.outerToBlock(outer)), - m_offset(outer - mat.blockOuterIndex(m_outerB)) - { - if (itb) - { - m_id = m_mat.blockInnerIndex(itb.index()); - m_start = m_id; - m_end = m_mat.blockInnerIndex(itb.index()+1); - } - } - inline InnerIterator& operator++() - { - m_id++; - if (m_id >= m_end) - { - ++itb; - if (itb) - { - m_id = m_mat.blockInnerIndex(itb.index()); - m_start = m_id; - m_end = m_mat.blockInnerIndex(itb.index()+1); - } - } - return *this; - } - inline const Scalar& value() const - { - return itb.value().coeff(m_id - m_start, m_offset); - } - inline Scalar& valueRef() - { - return itb.valueRef().coeff(m_id - m_start, m_offset); - } - inline Index index() const { return m_id; } - inline Index outer() const {return m_outer; } - inline Index col() const {return outer(); } - inline Index row() const { return index();} - inline operator bool() const - { - return itb; - } - protected: - const BlockSparseMatrix& m_mat; - const Index m_outer; - const Index m_outerB; - BlockInnerIterator itb; // Iterator through the blocks - const Index m_offset; // Position of this column in the block - Index m_start; // starting inner index of this block - Index m_id; // current inner index in the block - Index m_end; // starting inner index of the next block - -}; -} // end namespace Eigen - -#endif // EIGEN_SPARSEBLOCKMATRIX_H diff --git a/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h b/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h deleted file mode 100644 index 0ffbc43..0000000 --- a/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h +++ /dev/null @@ -1,404 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_DYNAMIC_SPARSEMATRIX_H -#define EIGEN_DYNAMIC_SPARSEMATRIX_H - -namespace Eigen { - -/** \deprecated use a SparseMatrix in an uncompressed mode - * - * \class DynamicSparseMatrix - * - * \brief A sparse matrix class designed for matrix assembly purpose - * - * \param _Scalar the scalar type, i.e. the type of the coefficients - * - * Unlike SparseMatrix, this class provides a much higher degree of flexibility. In particular, it allows - * random read/write accesses in log(rho*outer_size) where \c rho is the probability that a coefficient is - * nonzero and outer_size is the number of columns if the matrix is column-major and the number of rows - * otherwise. - * - * Internally, the data are stored as a std::vector of compressed vector. The performances of random writes might - * decrease as the number of nonzeros per inner-vector increase. In practice, we observed very good performance - * till about 100 nonzeros/vector, and the performance remains relatively good till 500 nonzeros/vectors. - * - * \see SparseMatrix - */ - -namespace internal { -template<typename _Scalar, int _Options, typename _StorageIndex> -struct traits<DynamicSparseMatrix<_Scalar, _Options, _StorageIndex> > -{ - typedef _Scalar Scalar; - typedef _StorageIndex StorageIndex; - typedef Sparse StorageKind; - typedef MatrixXpr XprKind; - enum { - RowsAtCompileTime = Dynamic, - ColsAtCompileTime = Dynamic, - MaxRowsAtCompileTime = Dynamic, - MaxColsAtCompileTime = Dynamic, - Flags = _Options | NestByRefBit | LvalueBit, - CoeffReadCost = NumTraits<Scalar>::ReadCost, - SupportedAccessPatterns = OuterRandomAccessPattern - }; -}; -} - -template<typename _Scalar, int _Options, typename _StorageIndex> - class DynamicSparseMatrix - : public SparseMatrixBase<DynamicSparseMatrix<_Scalar, _Options, _StorageIndex> > -{ - typedef SparseMatrixBase<DynamicSparseMatrix> Base; - using Base::convert_index; - public: - EIGEN_SPARSE_PUBLIC_INTERFACE(DynamicSparseMatrix) - // FIXME: why are these operator already alvailable ??? - // EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(DynamicSparseMatrix, +=) - // EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(DynamicSparseMatrix, -=) - typedef MappedSparseMatrix<Scalar,Flags> Map; - using Base::IsRowMajor; - using Base::operator=; - enum { - Options = _Options - }; - - protected: - - typedef DynamicSparseMatrix<Scalar,(Flags&~RowMajorBit)|(IsRowMajor?RowMajorBit:0), StorageIndex> TransposedSparseMatrix; - - Index m_innerSize; - std::vector<internal::CompressedStorage<Scalar,StorageIndex> > m_data; - - public: - - inline Index rows() const { return IsRowMajor ? outerSize() : m_innerSize; } - inline Index cols() const { return IsRowMajor ? m_innerSize : outerSize(); } - inline Index innerSize() const { return m_innerSize; } - inline Index outerSize() const { return convert_index(m_data.size()); } - inline Index innerNonZeros(Index j) const { return m_data[j].size(); } - - std::vector<internal::CompressedStorage<Scalar,StorageIndex> >& _data() { return m_data; } - const std::vector<internal::CompressedStorage<Scalar,StorageIndex> >& _data() const { return m_data; } - - /** \returns the coefficient value at given position \a row, \a col - * This operation involes a log(rho*outer_size) binary search. - */ - inline Scalar coeff(Index row, Index col) const - { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - return m_data[outer].at(inner); - } - - /** \returns a reference to the coefficient value at given position \a row, \a col - * This operation involes a log(rho*outer_size) binary search. If the coefficient does not - * exist yet, then a sorted insertion into a sequential buffer is performed. - */ - inline Scalar& coeffRef(Index row, Index col) - { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - return m_data[outer].atWithInsertion(inner); - } - - class InnerIterator; - class ReverseInnerIterator; - - void setZero() - { - for (Index j=0; j<outerSize(); ++j) - m_data[j].clear(); - } - - /** \returns the number of non zero coefficients */ - Index nonZeros() const - { - Index res = 0; - for (Index j=0; j<outerSize(); ++j) - res += m_data[j].size(); - return res; - } - - - - void reserve(Index reserveSize = 1000) - { - if (outerSize()>0) - { - Index reserveSizePerVector = (std::max)(reserveSize/outerSize(),Index(4)); - for (Index j=0; j<outerSize(); ++j) - { - m_data[j].reserve(reserveSizePerVector); - } - } - } - - /** Does nothing: provided for compatibility with SparseMatrix */ - inline void startVec(Index /*outer*/) {} - - /** \returns a reference to the non zero coefficient at position \a row, \a col assuming that: - * - the nonzero does not already exist - * - the new coefficient is the last one of the given inner vector. - * - * \sa insert, insertBackByOuterInner */ - inline Scalar& insertBack(Index row, Index col) - { - return insertBackByOuterInner(IsRowMajor?row:col, IsRowMajor?col:row); - } - - /** \sa insertBack */ - inline Scalar& insertBackByOuterInner(Index outer, Index inner) - { - eigen_assert(outer<Index(m_data.size()) && inner<m_innerSize && "out of range"); - eigen_assert(((m_data[outer].size()==0) || (m_data[outer].index(m_data[outer].size()-1)<inner)) - && "wrong sorted insertion"); - m_data[outer].append(0, inner); - return m_data[outer].value(m_data[outer].size()-1); - } - - inline Scalar& insert(Index row, Index col) - { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - Index startId = 0; - Index id = static_cast<Index>(m_data[outer].size()) - 1; - m_data[outer].resize(id+2,1); - - while ( (id >= startId) && (m_data[outer].index(id) > inner) ) - { - m_data[outer].index(id+1) = m_data[outer].index(id); - m_data[outer].value(id+1) = m_data[outer].value(id); - --id; - } - m_data[outer].index(id+1) = inner; - m_data[outer].value(id+1) = 0; - return m_data[outer].value(id+1); - } - - /** Does nothing: provided for compatibility with SparseMatrix */ - inline void finalize() {} - - /** Suppress all nonzeros which are smaller than \a reference under the tolerence \a epsilon */ - void prune(Scalar reference, RealScalar epsilon = NumTraits<RealScalar>::dummy_precision()) - { - for (Index j=0; j<outerSize(); ++j) - m_data[j].prune(reference,epsilon); - } - - /** Resize the matrix without preserving the data (the matrix is set to zero) - */ - void resize(Index rows, Index cols) - { - const Index outerSize = IsRowMajor ? rows : cols; - m_innerSize = convert_index(IsRowMajor ? cols : rows); - setZero(); - if (Index(m_data.size()) != outerSize) - { - m_data.resize(outerSize); - } - } - - void resizeAndKeepData(Index rows, Index cols) - { - const Index outerSize = IsRowMajor ? rows : cols; - const Index innerSize = IsRowMajor ? cols : rows; - if (m_innerSize>innerSize) - { - // remove all coefficients with innerCoord>=innerSize - // TODO - //std::cerr << "not implemented yet\n"; - exit(2); - } - if (m_data.size() != outerSize) - { - m_data.resize(outerSize); - } - } - - /** The class DynamicSparseMatrix is deprectaed */ - EIGEN_DEPRECATED inline DynamicSparseMatrix() - : m_innerSize(0), m_data(0) - { - #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN - EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN - #endif - eigen_assert(innerSize()==0 && outerSize()==0); - } - - /** The class DynamicSparseMatrix is deprectaed */ - EIGEN_DEPRECATED inline DynamicSparseMatrix(Index rows, Index cols) - : m_innerSize(0) - { - #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN - EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN - #endif - resize(rows, cols); - } - - /** The class DynamicSparseMatrix is deprectaed */ - template<typename OtherDerived> - EIGEN_DEPRECATED explicit inline DynamicSparseMatrix(const SparseMatrixBase<OtherDerived>& other) - : m_innerSize(0) - { - #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN - EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN - #endif - Base::operator=(other.derived()); - } - - inline DynamicSparseMatrix(const DynamicSparseMatrix& other) - : Base(), m_innerSize(0) - { - #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN - EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN - #endif - *this = other.derived(); - } - - inline void swap(DynamicSparseMatrix& other) - { - //EIGEN_DBG_SPARSE(std::cout << "SparseMatrix:: swap\n"); - std::swap(m_innerSize, other.m_innerSize); - //std::swap(m_outerSize, other.m_outerSize); - m_data.swap(other.m_data); - } - - inline DynamicSparseMatrix& operator=(const DynamicSparseMatrix& other) - { - if (other.isRValue()) - { - swap(other.const_cast_derived()); - } - else - { - resize(other.rows(), other.cols()); - m_data = other.m_data; - } - return *this; - } - - /** Destructor */ - inline ~DynamicSparseMatrix() {} - - public: - - /** \deprecated - * Set the matrix to zero and reserve the memory for \a reserveSize nonzero coefficients. */ - EIGEN_DEPRECATED void startFill(Index reserveSize = 1000) - { - setZero(); - reserve(reserveSize); - } - - /** \deprecated use insert() - * inserts a nonzero coefficient at given coordinates \a row, \a col and returns its reference assuming that: - * 1 - the coefficient does not exist yet - * 2 - this the coefficient with greater inner coordinate for the given outer coordinate. - * In other words, assuming \c *this is column-major, then there must not exists any nonzero coefficient of coordinates - * \c i \c x \a col such that \c i >= \a row. Otherwise the matrix is invalid. - * - * \see fillrand(), coeffRef() - */ - EIGEN_DEPRECATED Scalar& fill(Index row, Index col) - { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - return insertBack(outer,inner); - } - - /** \deprecated use insert() - * Like fill() but with random inner coordinates. - * Compared to the generic coeffRef(), the unique limitation is that we assume - * the coefficient does not exist yet. - */ - EIGEN_DEPRECATED Scalar& fillrand(Index row, Index col) - { - return insert(row,col); - } - - /** \deprecated use finalize() - * Does nothing. Provided for compatibility with SparseMatrix. */ - EIGEN_DEPRECATED void endFill() {} - -# ifdef EIGEN_DYNAMICSPARSEMATRIX_PLUGIN -# include EIGEN_DYNAMICSPARSEMATRIX_PLUGIN -# endif - }; - -template<typename Scalar, int _Options, typename _StorageIndex> -class DynamicSparseMatrix<Scalar,_Options,_StorageIndex>::InnerIterator : public SparseVector<Scalar,_Options,_StorageIndex>::InnerIterator -{ - typedef typename SparseVector<Scalar,_Options,_StorageIndex>::InnerIterator Base; - public: - InnerIterator(const DynamicSparseMatrix& mat, Index outer) - : Base(mat.m_data[outer]), m_outer(outer) - {} - - inline Index row() const { return IsRowMajor ? m_outer : Base::index(); } - inline Index col() const { return IsRowMajor ? Base::index() : m_outer; } - inline Index outer() const { return m_outer; } - - protected: - const Index m_outer; -}; - -template<typename Scalar, int _Options, typename _StorageIndex> -class DynamicSparseMatrix<Scalar,_Options,_StorageIndex>::ReverseInnerIterator : public SparseVector<Scalar,_Options,_StorageIndex>::ReverseInnerIterator -{ - typedef typename SparseVector<Scalar,_Options,_StorageIndex>::ReverseInnerIterator Base; - public: - ReverseInnerIterator(const DynamicSparseMatrix& mat, Index outer) - : Base(mat.m_data[outer]), m_outer(outer) - {} - - inline Index row() const { return IsRowMajor ? m_outer : Base::index(); } - inline Index col() const { return IsRowMajor ? Base::index() : m_outer; } - inline Index outer() const { return m_outer; } - - protected: - const Index m_outer; -}; - -namespace internal { - -template<typename _Scalar, int _Options, typename _StorageIndex> -struct evaluator<DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> > - : evaluator_base<DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> > -{ - typedef _Scalar Scalar; - typedef DynamicSparseMatrix<_Scalar,_Options,_StorageIndex> SparseMatrixType; - typedef typename SparseMatrixType::InnerIterator InnerIterator; - typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator; - - enum { - CoeffReadCost = NumTraits<_Scalar>::ReadCost, - Flags = SparseMatrixType::Flags - }; - - evaluator() : m_matrix(0) {} - evaluator(const SparseMatrixType &mat) : m_matrix(&mat) {} - - operator SparseMatrixType&() { return m_matrix->const_cast_derived(); } - operator const SparseMatrixType&() const { return *m_matrix; } - - Scalar coeff(Index row, Index col) const { return m_matrix->coeff(row,col); } - - Index nonZerosEstimate() const { return m_matrix->nonZeros(); } - - const SparseMatrixType *m_matrix; -}; - -} - -} // end namespace Eigen - -#endif // EIGEN_DYNAMIC_SPARSEMATRIX_H diff --git a/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h b/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h deleted file mode 100644 index 04b7d69..0000000 --- a/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +++ /dev/null @@ -1,275 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr> -// Copyright (C) 2012 Desire NUENTSA WAKAM <desire.nuentsa_wakam@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_MARKET_IO_H -#define EIGEN_SPARSE_MARKET_IO_H - -#include <iostream> - -namespace Eigen { - -namespace internal -{ - template <typename Scalar,typename IndexType> - inline bool GetMarketLine (std::stringstream& line, IndexType& M, IndexType& N, IndexType& i, IndexType& j, Scalar& value) - { - line >> i >> j >> value; - i--; - j--; - if(i>=0 && j>=0 && i<M && j<N) - { - return true; - } - else - return false; - } - template <typename Scalar,typename IndexType> - inline bool GetMarketLine (std::stringstream& line, IndexType& M, IndexType& N, IndexType& i, IndexType& j, std::complex<Scalar>& value) - { - Scalar valR, valI; - line >> i >> j >> valR >> valI; - i--; - j--; - if(i>=0 && j>=0 && i<M && j<N) - { - value = std::complex<Scalar>(valR, valI); - return true; - } - else - return false; - } - - template <typename RealScalar> - inline void GetVectorElt (const std::string& line, RealScalar& val) - { - std::istringstream newline(line); - newline >> val; - } - - template <typename RealScalar> - inline void GetVectorElt (const std::string& line, std::complex<RealScalar>& val) - { - RealScalar valR, valI; - std::istringstream newline(line); - newline >> valR >> valI; - val = std::complex<RealScalar>(valR, valI); - } - - template<typename Scalar> - inline void putMarketHeader(std::string& header,int sym) - { - header= "%%MatrixMarket matrix coordinate "; - if(internal::is_same<Scalar, std::complex<float> >::value || internal::is_same<Scalar, std::complex<double> >::value) - { - header += " complex"; - if(sym == Symmetric) header += " symmetric"; - else if (sym == SelfAdjoint) header += " Hermitian"; - else header += " general"; - } - else - { - header += " real"; - if(sym == Symmetric) header += " symmetric"; - else header += " general"; - } - } - - template<typename Scalar> - inline void PutMatrixElt(Scalar value, int row, int col, std::ofstream& out) - { - out << row << " "<< col << " " << value << "\n"; - } - template<typename Scalar> - inline void PutMatrixElt(std::complex<Scalar> value, int row, int col, std::ofstream& out) - { - out << row << " " << col << " " << value.real() << " " << value.imag() << "\n"; - } - - - template<typename Scalar> - inline void putVectorElt(Scalar value, std::ofstream& out) - { - out << value << "\n"; - } - template<typename Scalar> - inline void putVectorElt(std::complex<Scalar> value, std::ofstream& out) - { - out << value.real << " " << value.imag()<< "\n"; - } - -} // end namepsace internal - -inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscomplex, bool& isvector) -{ - sym = 0; - iscomplex = false; - isvector = false; - std::ifstream in(filename.c_str(),std::ios::in); - if(!in) - return false; - - std::string line; - // The matrix header is always the first line in the file - std::getline(in, line); eigen_assert(in.good()); - - std::stringstream fmtline(line); - std::string substr[5]; - fmtline>> substr[0] >> substr[1] >> substr[2] >> substr[3] >> substr[4]; - if(substr[2].compare("array") == 0) isvector = true; - if(substr[3].compare("complex") == 0) iscomplex = true; - if(substr[4].compare("symmetric") == 0) sym = Symmetric; - else if (substr[4].compare("Hermitian") == 0) sym = SelfAdjoint; - - return true; -} - -template<typename SparseMatrixType> -bool loadMarket(SparseMatrixType& mat, const std::string& filename) -{ - typedef typename SparseMatrixType::Scalar Scalar; - typedef typename SparseMatrixType::StorageIndex StorageIndex; - std::ifstream input(filename.c_str(),std::ios::in); - if(!input) - return false; - - const int maxBuffersize = 2048; - char buffer[maxBuffersize]; - - bool readsizes = false; - - typedef Triplet<Scalar,StorageIndex> T; - std::vector<T> elements; - - StorageIndex M(-1), N(-1), NNZ(-1); - StorageIndex count = 0; - while(input.getline(buffer, maxBuffersize)) - { - // skip comments - //NOTE An appropriate test should be done on the header to get the symmetry - if(buffer[0]=='%') - continue; - - std::stringstream line(buffer); - - if(!readsizes) - { - line >> M >> N >> NNZ; - if(M > 0 && N > 0 && NNZ > 0) - { - readsizes = true; - //std::cout << "sizes: " << M << "," << N << "," << NNZ << "\n"; - mat.resize(M,N); - mat.reserve(NNZ); - } - } - else - { - StorageIndex i(-1), j(-1); - Scalar value; - if( internal::GetMarketLine(line, M, N, i, j, value) ) - { - ++ count; - elements.push_back(T(i,j,value)); - } - else - std::cerr << "Invalid read: " << i << "," << j << "\n"; - } - } - mat.setFromTriplets(elements.begin(), elements.end()); - if(count!=NNZ) - std::cerr << count << "!=" << NNZ << "\n"; - - input.close(); - return true; -} - -template<typename VectorType> -bool loadMarketVector(VectorType& vec, const std::string& filename) -{ - typedef typename VectorType::Scalar Scalar; - std::ifstream in(filename.c_str(), std::ios::in); - if(!in) - return false; - - std::string line; - int n(0), col(0); - do - { // Skip comments - std::getline(in, line); eigen_assert(in.good()); - } while (line[0] == '%'); - std::istringstream newline(line); - newline >> n >> col; - eigen_assert(n>0 && col>0); - vec.resize(n); - int i = 0; - Scalar value; - while ( std::getline(in, line) && (i < n) ){ - internal::GetVectorElt(line, value); - vec(i++) = value; - } - in.close(); - if (i!=n){ - std::cerr<< "Unable to read all elements from file " << filename << "\n"; - return false; - } - return true; -} - -template<typename SparseMatrixType> -bool saveMarket(const SparseMatrixType& mat, const std::string& filename, int sym = 0) -{ - typedef typename SparseMatrixType::Scalar Scalar; - std::ofstream out(filename.c_str(),std::ios::out); - if(!out) - return false; - - out.flags(std::ios_base::scientific); - out.precision(64); - std::string header; - internal::putMarketHeader<Scalar>(header, sym); - out << header << std::endl; - out << mat.rows() << " " << mat.cols() << " " << mat.nonZeros() << "\n"; - int count = 0; - for(int j=0; j<mat.outerSize(); ++j) - for(typename SparseMatrixType::InnerIterator it(mat,j); it; ++it) - { - ++ count; - internal::PutMatrixElt(it.value(), it.row()+1, it.col()+1, out); - // out << it.row()+1 << " " << it.col()+1 << " " << it.value() << "\n"; - } - out.close(); - return true; -} - -template<typename VectorType> -bool saveMarketVector (const VectorType& vec, const std::string& filename) -{ - typedef typename VectorType::Scalar Scalar; - std::ofstream out(filename.c_str(),std::ios::out); - if(!out) - return false; - - out.flags(std::ios_base::scientific); - out.precision(64); - if(internal::is_same<Scalar, std::complex<float> >::value || internal::is_same<Scalar, std::complex<double> >::value) - out << "%%MatrixMarket matrix array complex general\n"; - else - out << "%%MatrixMarket matrix array real general\n"; - out << vec.size() << " "<< 1 << "\n"; - for (int i=0; i < vec.size(); i++){ - internal::putVectorElt(vec(i), out); - } - out.close(); - return true; -} - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_MARKET_IO_H diff --git a/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h b/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h deleted file mode 100644 index 02916ea..0000000 --- a/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h +++ /dev/null @@ -1,247 +0,0 @@ - -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Desire NUENTSA WAKAM <desire.nuentsa_wakam@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_BROWSE_MATRICES_H -#define EIGEN_BROWSE_MATRICES_H - -namespace Eigen { - -enum { - SPD = 0x100, - NonSymmetric = 0x0 -}; - -/** - * @brief Iterator to browse matrices from a specified folder - * - * This is used to load all the matrices from a folder. - * The matrices should be in Matrix Market format - * It is assumed that the matrices are named as matname.mtx - * and matname_SPD.mtx if the matrix is Symmetric and positive definite (or Hermitian) - * The right hand side vectors are loaded as well, if they exist. - * They should be named as matname_b.mtx. - * Note that the right hand side for a SPD matrix is named as matname_SPD_b.mtx - * - * Sometimes a reference solution is available. In this case, it should be named as matname_x.mtx - * - * Sample code - * \code - * - * \endcode - * - * \tparam Scalar The scalar type - */ -template <typename Scalar> -class MatrixMarketIterator -{ - typedef typename NumTraits<Scalar>::Real RealScalar; - public: - typedef Matrix<Scalar,Dynamic,1> VectorType; - typedef SparseMatrix<Scalar,ColMajor> MatrixType; - - public: - MatrixMarketIterator(const std::string &folder) - : m_sym(0), m_isvalid(false), m_matIsLoaded(false), m_hasRhs(false), m_hasrefX(false), m_folder(folder) - { - m_folder_id = opendir(folder.c_str()); - if(m_folder_id) - Getnextvalidmatrix(); - } - - ~MatrixMarketIterator() - { - if (m_folder_id) closedir(m_folder_id); - } - - inline MatrixMarketIterator& operator++() - { - m_matIsLoaded = false; - m_hasrefX = false; - m_hasRhs = false; - Getnextvalidmatrix(); - return *this; - } - inline operator bool() const { return m_isvalid;} - - /** Return the sparse matrix corresponding to the current file */ - inline MatrixType& matrix() - { - // Read the matrix - if (m_matIsLoaded) return m_mat; - - std::string matrix_file = m_folder + "/" + m_matname + ".mtx"; - if ( !loadMarket(m_mat, matrix_file)) - { - std::cerr << "Warning loadMarket failed when loading \"" << matrix_file << "\"" << std::endl; - m_matIsLoaded = false; - return m_mat; - } - m_matIsLoaded = true; - - if (m_sym != NonSymmetric) - { - // Check whether we need to restore a full matrix: - RealScalar diag_norm = m_mat.diagonal().norm(); - RealScalar lower_norm = m_mat.template triangularView<Lower>().norm(); - RealScalar upper_norm = m_mat.template triangularView<Upper>().norm(); - if(lower_norm>diag_norm && upper_norm==diag_norm) - { - // only the lower part is stored - MatrixType tmp(m_mat); - m_mat = tmp.template selfadjointView<Lower>(); - } - else if(upper_norm>diag_norm && lower_norm==diag_norm) - { - // only the upper part is stored - MatrixType tmp(m_mat); - m_mat = tmp.template selfadjointView<Upper>(); - } - } - return m_mat; - } - - /** Return the right hand side corresponding to the current matrix. - * If the rhs file is not provided, a random rhs is generated - */ - inline VectorType& rhs() - { - // Get the right hand side - if (m_hasRhs) return m_rhs; - - std::string rhs_file; - rhs_file = m_folder + "/" + m_matname + "_b.mtx"; // The pattern is matname_b.mtx - m_hasRhs = Fileexists(rhs_file); - if (m_hasRhs) - { - m_rhs.resize(m_mat.cols()); - m_hasRhs = loadMarketVector(m_rhs, rhs_file); - } - if (!m_hasRhs) - { - // Generate a random right hand side - if (!m_matIsLoaded) this->matrix(); - m_refX.resize(m_mat.cols()); - m_refX.setRandom(); - m_rhs = m_mat * m_refX; - m_hasrefX = true; - m_hasRhs = true; - } - return m_rhs; - } - - /** Return a reference solution - * If it is not provided and if the right hand side is not available - * then refX is randomly generated such that A*refX = b - * where A and b are the matrix and the rhs. - * Note that when a rhs is provided, refX is not available - */ - inline VectorType& refX() - { - // Check if a reference solution is provided - if (m_hasrefX) return m_refX; - - std::string lhs_file; - lhs_file = m_folder + "/" + m_matname + "_x.mtx"; - m_hasrefX = Fileexists(lhs_file); - if (m_hasrefX) - { - m_refX.resize(m_mat.cols()); - m_hasrefX = loadMarketVector(m_refX, lhs_file); - } - else - m_refX.resize(0); - return m_refX; - } - - inline std::string& matname() { return m_matname; } - - inline int sym() { return m_sym; } - - bool hasRhs() {return m_hasRhs; } - bool hasrefX() {return m_hasrefX; } - bool isFolderValid() { return bool(m_folder_id); } - - protected: - - inline bool Fileexists(std::string file) - { - std::ifstream file_id(file.c_str()); - if (!file_id.good() ) - { - return false; - } - else - { - file_id.close(); - return true; - } - } - - void Getnextvalidmatrix( ) - { - m_isvalid = false; - // Here, we return with the next valid matrix in the folder - while ( (m_curs_id = readdir(m_folder_id)) != NULL) { - m_isvalid = false; - std::string curfile; - curfile = m_folder + "/" + m_curs_id->d_name; - // Discard if it is a folder - if (m_curs_id->d_type == DT_DIR) continue; //FIXME This may not be available on non BSD systems -// struct stat st_buf; -// stat (curfile.c_str(), &st_buf); -// if (S_ISDIR(st_buf.st_mode)) continue; - - // Determine from the header if it is a matrix or a right hand side - bool isvector,iscomplex=false; - if(!getMarketHeader(curfile,m_sym,iscomplex,isvector)) continue; - if(isvector) continue; - if (!iscomplex) - { - if(internal::is_same<Scalar, std::complex<float> >::value || internal::is_same<Scalar, std::complex<double> >::value) - continue; - } - if (iscomplex) - { - if(internal::is_same<Scalar, float>::value || internal::is_same<Scalar, double>::value) - continue; - } - - - // Get the matrix name - std::string filename = m_curs_id->d_name; - m_matname = filename.substr(0, filename.length()-4); - - // Find if the matrix is SPD - size_t found = m_matname.find("SPD"); - if( (found!=std::string::npos) && (m_sym != NonSymmetric) ) - m_sym = SPD; - - m_isvalid = true; - break; - } - } - int m_sym; // Symmetry of the matrix - MatrixType m_mat; // Current matrix - VectorType m_rhs; // Current vector - VectorType m_refX; // The reference solution, if exists - std::string m_matname; // Matrix Name - bool m_isvalid; - bool m_matIsLoaded; // Determine if the matrix has already been loaded from the file - bool m_hasRhs; // The right hand side exists - bool m_hasrefX; // A reference solution is provided - std::string m_folder; - DIR * m_folder_id; - struct dirent *m_curs_id; - -}; - -} // end namespace Eigen - -#endif diff --git a/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h b/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h deleted file mode 100644 index ee97299..0000000 --- a/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h +++ /dev/null @@ -1,327 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_RANDOMSETTER_H -#define EIGEN_RANDOMSETTER_H - -namespace Eigen { - -/** Represents a std::map - * - * \see RandomSetter - */ -template<typename Scalar> struct StdMapTraits -{ - typedef int KeyType; - typedef std::map<KeyType,Scalar> Type; - enum { - IsSorted = 1 - }; - - static void setInvalidKey(Type&, const KeyType&) {} -}; - -#ifdef EIGEN_UNORDERED_MAP_SUPPORT -/** Represents a std::unordered_map - * - * To use it you need to both define EIGEN_UNORDERED_MAP_SUPPORT and include the unordered_map header file - * yourself making sure that unordered_map is defined in the std namespace. - * - * For instance, with current version of gcc you can either enable C++0x standard (-std=c++0x) or do: - * \code - * #include <tr1/unordered_map> - * #define EIGEN_UNORDERED_MAP_SUPPORT - * namespace std { - * using std::tr1::unordered_map; - * } - * \endcode - * - * \see RandomSetter - */ -template<typename Scalar> struct StdUnorderedMapTraits -{ - typedef int KeyType; - typedef std::unordered_map<KeyType,Scalar> Type; - enum { - IsSorted = 0 - }; - - static void setInvalidKey(Type&, const KeyType&) {} -}; -#endif // EIGEN_UNORDERED_MAP_SUPPORT - -#ifdef _DENSE_HASH_MAP_H_ -/** Represents a google::dense_hash_map - * - * \see RandomSetter - */ -template<typename Scalar> struct GoogleDenseHashMapTraits -{ - typedef int KeyType; - typedef google::dense_hash_map<KeyType,Scalar> Type; - enum { - IsSorted = 0 - }; - - static void setInvalidKey(Type& map, const KeyType& k) - { map.set_empty_key(k); } -}; -#endif - -#ifdef _SPARSE_HASH_MAP_H_ -/** Represents a google::sparse_hash_map - * - * \see RandomSetter - */ -template<typename Scalar> struct GoogleSparseHashMapTraits -{ - typedef int KeyType; - typedef google::sparse_hash_map<KeyType,Scalar> Type; - enum { - IsSorted = 0 - }; - - static void setInvalidKey(Type&, const KeyType&) {} -}; -#endif - -/** \class RandomSetter - * - * \brief The RandomSetter is a wrapper object allowing to set/update a sparse matrix with random access - * - * \tparam SparseMatrixType the type of the sparse matrix we are updating - * \tparam MapTraits a traits class representing the map implementation used for the temporary sparse storage. - * Its default value depends on the system. - * \tparam OuterPacketBits defines the number of rows (or columns) manage by a single map object - * as a power of two exponent. - * - * This class temporarily represents a sparse matrix object using a generic map implementation allowing for - * efficient random access. The conversion from the compressed representation to a hash_map object is performed - * in the RandomSetter constructor, while the sparse matrix is updated back at destruction time. This strategy - * suggest the use of nested blocks as in this example: - * - * \code - * SparseMatrix<double> m(rows,cols); - * { - * RandomSetter<SparseMatrix<double> > w(m); - * // don't use m but w instead with read/write random access to the coefficients: - * for(;;) - * w(rand(),rand()) = rand; - * } - * // when w is deleted, the data are copied back to m - * // and m is ready to use. - * \endcode - * - * Since hash_map objects are not fully sorted, representing a full matrix as a single hash_map would - * involve a big and costly sort to update the compressed matrix back. To overcome this issue, a RandomSetter - * use multiple hash_map, each representing 2^OuterPacketBits columns or rows according to the storage order. - * To reach optimal performance, this value should be adjusted according to the average number of nonzeros - * per rows/columns. - * - * The possible values for the template parameter MapTraits are: - * - \b StdMapTraits: corresponds to std::map. (does not perform very well) - * - \b GnuHashMapTraits: corresponds to __gnu_cxx::hash_map (available only with GCC) - * - \b GoogleDenseHashMapTraits: corresponds to google::dense_hash_map (best efficiency, reasonable memory consumption) - * - \b GoogleSparseHashMapTraits: corresponds to google::sparse_hash_map (best memory consumption, relatively good performance) - * - * The default map implementation depends on the availability, and the preferred order is: - * GoogleSparseHashMapTraits, GnuHashMapTraits, and finally StdMapTraits. - * - * For performance and memory consumption reasons it is highly recommended to use one of - * the Google's hash_map implementation. To enable the support for them, you have two options: - * - \#include <google/dense_hash_map> yourself \b before Eigen/Sparse header - * - define EIGEN_GOOGLEHASH_SUPPORT - * In the later case the inclusion of <google/dense_hash_map> is made for you. - * - * \see http://code.google.com/p/google-sparsehash/ - */ -template<typename SparseMatrixType, - template <typename T> class MapTraits = -#if defined _DENSE_HASH_MAP_H_ - GoogleDenseHashMapTraits -#elif defined _HASH_MAP - GnuHashMapTraits -#else - StdMapTraits -#endif - ,int OuterPacketBits = 6> -class RandomSetter -{ - typedef typename SparseMatrixType::Scalar Scalar; - typedef typename SparseMatrixType::StorageIndex StorageIndex; - - struct ScalarWrapper - { - ScalarWrapper() : value(0) {} - Scalar value; - }; - typedef typename MapTraits<ScalarWrapper>::KeyType KeyType; - typedef typename MapTraits<ScalarWrapper>::Type HashMapType; - static const int OuterPacketMask = (1 << OuterPacketBits) - 1; - enum { - SwapStorage = 1 - MapTraits<ScalarWrapper>::IsSorted, - TargetRowMajor = (SparseMatrixType::Flags & RowMajorBit) ? 1 : 0, - SetterRowMajor = SwapStorage ? 1-TargetRowMajor : TargetRowMajor - }; - - public: - - /** Constructs a random setter object from the sparse matrix \a target - * - * Note that the initial value of \a target are imported. If you want to re-set - * a sparse matrix from scratch, then you must set it to zero first using the - * setZero() function. - */ - inline RandomSetter(SparseMatrixType& target) - : mp_target(&target) - { - const Index outerSize = SwapStorage ? target.innerSize() : target.outerSize(); - const Index innerSize = SwapStorage ? target.outerSize() : target.innerSize(); - m_outerPackets = outerSize >> OuterPacketBits; - if (outerSize&OuterPacketMask) - m_outerPackets += 1; - m_hashmaps = new HashMapType[m_outerPackets]; - // compute number of bits needed to store inner indices - Index aux = innerSize - 1; - m_keyBitsOffset = 0; - while (aux) - { - ++m_keyBitsOffset; - aux = aux >> 1; - } - KeyType ik = (1<<(OuterPacketBits+m_keyBitsOffset)); - for (Index k=0; k<m_outerPackets; ++k) - MapTraits<ScalarWrapper>::setInvalidKey(m_hashmaps[k],ik); - - // insert current coeffs - for (Index j=0; j<mp_target->outerSize(); ++j) - for (typename SparseMatrixType::InnerIterator it(*mp_target,j); it; ++it) - (*this)(TargetRowMajor?j:it.index(), TargetRowMajor?it.index():j) = it.value(); - } - - /** Destructor updating back the sparse matrix target */ - ~RandomSetter() - { - KeyType keyBitsMask = (1<<m_keyBitsOffset)-1; - if (!SwapStorage) // also means the map is sorted - { - mp_target->setZero(); - mp_target->makeCompressed(); - mp_target->reserve(nonZeros()); - Index prevOuter = -1; - for (Index k=0; k<m_outerPackets; ++k) - { - const Index outerOffset = (1<<OuterPacketBits) * k; - typename HashMapType::iterator end = m_hashmaps[k].end(); - for (typename HashMapType::iterator it = m_hashmaps[k].begin(); it!=end; ++it) - { - const Index outer = (it->first >> m_keyBitsOffset) + outerOffset; - const Index inner = it->first & keyBitsMask; - if (prevOuter!=outer) - { - for (Index j=prevOuter+1;j<=outer;++j) - mp_target->startVec(j); - prevOuter = outer; - } - mp_target->insertBackByOuterInner(outer, inner) = it->second.value; - } - } - mp_target->finalize(); - } - else - { - VectorXi positions(mp_target->outerSize()); - positions.setZero(); - // pass 1 - for (Index k=0; k<m_outerPackets; ++k) - { - typename HashMapType::iterator end = m_hashmaps[k].end(); - for (typename HashMapType::iterator it = m_hashmaps[k].begin(); it!=end; ++it) - { - const Index outer = it->first & keyBitsMask; - ++positions[outer]; - } - } - // prefix sum - Index count = 0; - for (Index j=0; j<mp_target->outerSize(); ++j) - { - Index tmp = positions[j]; - mp_target->outerIndexPtr()[j] = count; - positions[j] = count; - count += tmp; - } - mp_target->makeCompressed(); - mp_target->outerIndexPtr()[mp_target->outerSize()] = count; - mp_target->resizeNonZeros(count); - // pass 2 - for (Index k=0; k<m_outerPackets; ++k) - { - const Index outerOffset = (1<<OuterPacketBits) * k; - typename HashMapType::iterator end = m_hashmaps[k].end(); - for (typename HashMapType::iterator it = m_hashmaps[k].begin(); it!=end; ++it) - { - const Index inner = (it->first >> m_keyBitsOffset) + outerOffset; - const Index outer = it->first & keyBitsMask; - // sorted insertion - // Note that we have to deal with at most 2^OuterPacketBits unsorted coefficients, - // moreover those 2^OuterPacketBits coeffs are likely to be sparse, an so only a - // small fraction of them have to be sorted, whence the following simple procedure: - Index posStart = mp_target->outerIndexPtr()[outer]; - Index i = (positions[outer]++) - 1; - while ( (i >= posStart) && (mp_target->innerIndexPtr()[i] > inner) ) - { - mp_target->valuePtr()[i+1] = mp_target->valuePtr()[i]; - mp_target->innerIndexPtr()[i+1] = mp_target->innerIndexPtr()[i]; - --i; - } - mp_target->innerIndexPtr()[i+1] = inner; - mp_target->valuePtr()[i+1] = it->second.value; - } - } - } - delete[] m_hashmaps; - } - - /** \returns a reference to the coefficient at given coordinates \a row, \a col */ - Scalar& operator() (Index row, Index col) - { - const Index outer = SetterRowMajor ? row : col; - const Index inner = SetterRowMajor ? col : row; - const Index outerMajor = outer >> OuterPacketBits; // index of the packet/map - const Index outerMinor = outer & OuterPacketMask; // index of the inner vector in the packet - const KeyType key = internal::convert_index<KeyType>((outerMinor<<m_keyBitsOffset) | inner); - return m_hashmaps[outerMajor][key].value; - } - - /** \returns the number of non zero coefficients - * - * \note According to the underlying map/hash_map implementation, - * this function might be quite expensive. - */ - Index nonZeros() const - { - Index nz = 0; - for (Index k=0; k<m_outerPackets; ++k) - nz += static_cast<Index>(m_hashmaps[k].size()); - return nz; - } - - - protected: - - HashMapType* m_hashmaps; - SparseMatrixType* mp_target; - Index m_outerPackets; - unsigned char m_keyBitsOffset; -}; - -} // end namespace Eigen - -#endif // EIGEN_RANDOMSETTER_H diff --git a/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h deleted file mode 100644 index ed415db..0000000 --- a/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h +++ /dev/null @@ -1,124 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#ifndef EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H -#define EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H - -namespace Eigen { - -/** \cpp11 \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. - * - * This function computes the coefficient-wise incomplete gamma function. - * - * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, - * or float/double in non c++11 mode, the user has to provide implementations of igammac(T,T) for any scalar - * type T to be supported. - * - * \sa Eigen::igammac(), Eigen::lgamma() - */ -template<typename Derived,typename ExponentDerived> -inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived> -igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) -{ - return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>( - a.derived(), - x.derived() - ); -} - -/** \cpp11 \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays. - * - * This function computes the coefficient-wise complementary incomplete gamma function. - * - * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, - * or float/double in non c++11 mode, the user has to provide implementations of igammac(T,T) for any scalar - * type T to be supported. - * - * \sa Eigen::igamma(), Eigen::lgamma() - */ -template<typename Derived,typename ExponentDerived> -inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived> -igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) -{ - return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>( - a.derived(), - x.derived() - ); -} - -/** \cpp11 \returns an expression of the coefficient-wise polygamma(\a n, \a x) to the given arrays. - * - * It returns the \a n -th derivative of the digamma(psi) evaluated at \c x. - * - * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, - * or float/double in non c++11 mode, the user has to provide implementations of polygamma(T,T) for any scalar - * type T to be supported. - * - * \sa Eigen::digamma() - */ -// * \warning Be careful with the order of the parameters: x.polygamma(n) is equivalent to polygamma(n,x) -// * \sa ArrayBase::polygamma() -template<typename DerivedN,typename DerivedX> -inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX> -polygamma(const Eigen::ArrayBase<DerivedN>& n, const Eigen::ArrayBase<DerivedX>& x) -{ - return Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX>( - n.derived(), - x.derived() - ); -} - -/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given arrays. - * - * This function computes the regularized incomplete beta function (integral). - * - * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, - * or float/double in non c++11 mode, the user has to provide implementations of betainc(T,T,T) for any scalar - * type T to be supported. - * - * \sa Eigen::betainc(), Eigen::lgamma() - */ -template<typename ArgADerived, typename ArgBDerived, typename ArgXDerived> -inline const Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived> -betainc(const Eigen::ArrayBase<ArgADerived>& a, const Eigen::ArrayBase<ArgBDerived>& b, const Eigen::ArrayBase<ArgXDerived>& x) -{ - return Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived>( - a.derived(), - b.derived(), - x.derived() - ); -} - - -/** \returns an expression of the coefficient-wise zeta(\a x, \a q) to the given arrays. - * - * It returns the Riemann zeta function of two arguments \a x and \a q: - * - * \param x is the exposent, it must be > 1 - * \param q is the shift, it must be > 0 - * - * \note This function supports only float and double scalar types. To support other scalar types, the user has - * to provide implementations of zeta(T,T) for any scalar type T to be supported. - * - * \sa ArrayBase::zeta() - */ -template<typename DerivedX,typename DerivedQ> -inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ> -zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q) -{ - return Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ>( - x.derived(), - q.derived() - ); -} - -} // end namespace Eigen - -#endif // EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H diff --git a/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h deleted file mode 100644 index d8f2363..0000000 --- a/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +++ /dev/null @@ -1,236 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com> -// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPECIALFUNCTIONS_FUNCTORS_H -#define EIGEN_SPECIALFUNCTIONS_FUNCTORS_H - -namespace Eigen { - -namespace internal { - - -/** \internal - * \brief Template functor to compute the incomplete gamma function igamma(a, x) - * - * \sa class CwiseBinaryOp, Cwise::igamma - */ -template<typename Scalar> struct scalar_igamma_op : binary_op_base<Scalar,Scalar> -{ - EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { - using numext::igamma; return igamma(a, x); - } - template<typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const { - return internal::pigamma(a, x); - } -}; -template<typename Scalar> -struct functor_traits<scalar_igamma_op<Scalar> > { - enum { - // Guesstimate - Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasIGamma - }; -}; - - -/** \internal - * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x) - * - * \sa class CwiseBinaryOp, Cwise::igammac - */ -template<typename Scalar> struct scalar_igammac_op : binary_op_base<Scalar,Scalar> -{ - EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { - using numext::igammac; return igammac(a, x); - } - template<typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const - { - return internal::pigammac(a, x); - } -}; -template<typename Scalar> -struct functor_traits<scalar_igammac_op<Scalar> > { - enum { - // Guesstimate - Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasIGammac - }; -}; - - -/** \internal - * \brief Template functor to compute the incomplete beta integral betainc(a, b, x) - * - */ -template<typename Scalar> struct scalar_betainc_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_betainc_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& a, const Scalar& b) const { - using numext::betainc; return betainc(x, a, b); - } - template<typename Packet> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& x, const Packet& a, const Packet& b) const - { - return internal::pbetainc(x, a, b); - } -}; -template<typename Scalar> -struct functor_traits<scalar_betainc_op<Scalar> > { - enum { - // Guesstimate - Cost = 400 * NumTraits<Scalar>::MulCost + 400 * NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasBetaInc - }; -}; - - -/** \internal - * \brief Template functor to compute the natural log of the absolute - * value of Gamma of a scalar - * \sa class CwiseUnaryOp, Cwise::lgamma() - */ -template<typename Scalar> struct scalar_lgamma_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { - using numext::lgamma; return lgamma(a); - } - typedef typename packet_traits<Scalar>::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); } -}; -template<typename Scalar> -struct functor_traits<scalar_lgamma_op<Scalar> > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasLGamma - }; -}; - -/** \internal - * \brief Template functor to compute psi, the derivative of lgamma of a scalar. - * \sa class CwiseUnaryOp, Cwise::digamma() - */ -template<typename Scalar> struct scalar_digamma_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { - using numext::digamma; return digamma(a); - } - typedef typename packet_traits<Scalar>::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); } -}; -template<typename Scalar> -struct functor_traits<scalar_digamma_op<Scalar> > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasDiGamma - }; -}; - -/** \internal - * \brief Template functor to compute the Riemann Zeta function of two arguments. - * \sa class CwiseUnaryOp, Cwise::zeta() - */ -template<typename Scalar> struct scalar_zeta_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const { - using numext::zeta; return zeta(x, q); - } - typedef typename packet_traits<Scalar>::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); } -}; -template<typename Scalar> -struct functor_traits<scalar_zeta_op<Scalar> > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasZeta - }; -}; - -/** \internal - * \brief Template functor to compute the polygamma function. - * \sa class CwiseUnaryOp, Cwise::polygamma() - */ -template<typename Scalar> struct scalar_polygamma_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const { - using numext::polygamma; return polygamma(n, x); - } - typedef typename packet_traits<Scalar>::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); } -}; -template<typename Scalar> -struct functor_traits<scalar_polygamma_op<Scalar> > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasPolygamma - }; -}; - -/** \internal - * \brief Template functor to compute the Gauss error function of a - * scalar - * \sa class CwiseUnaryOp, Cwise::erf() - */ -template<typename Scalar> struct scalar_erf_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { - using numext::erf; return erf(a); - } - typedef typename packet_traits<Scalar>::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perf(a); } -}; -template<typename Scalar> -struct functor_traits<scalar_erf_op<Scalar> > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasErf - }; -}; - -/** \internal - * \brief Template functor to compute the Complementary Error Function - * of a scalar - * \sa class CwiseUnaryOp, Cwise::erfc() - */ -template<typename Scalar> struct scalar_erfc_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { - using numext::erfc; return erfc(a); - } - typedef typename packet_traits<Scalar>::type Packet; - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perfc(a); } -}; -template<typename Scalar> -struct functor_traits<scalar_erfc_op<Scalar> > -{ - enum { - // Guesstimate - Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, - PacketAccess = packet_traits<Scalar>::HasErfc - }; -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SPECIALFUNCTIONS_FUNCTORS_H diff --git a/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h deleted file mode 100644 index 553bcda..0000000 --- a/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h +++ /dev/null @@ -1,47 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPECIALFUNCTIONS_HALF_H -#define EIGEN_SPECIALFUNCTIONS_HALF_H - -namespace Eigen { -namespace numext { - -#if EIGEN_HAS_C99_MATH -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half lgamma(const Eigen::half& a) { - return Eigen::half(Eigen::numext::lgamma(static_cast<float>(a))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half digamma(const Eigen::half& a) { - return Eigen::half(Eigen::numext::digamma(static_cast<float>(a))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half zeta(const Eigen::half& x, const Eigen::half& q) { - return Eigen::half(Eigen::numext::zeta(static_cast<float>(x), static_cast<float>(q))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half polygamma(const Eigen::half& n, const Eigen::half& x) { - return Eigen::half(Eigen::numext::polygamma(static_cast<float>(n), static_cast<float>(x))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::half& a) { - return Eigen::half(Eigen::numext::erf(static_cast<float>(a))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) { - return Eigen::half(Eigen::numext::erfc(static_cast<float>(a))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) { - return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) { - return Eigen::half(Eigen::numext::igammac(static_cast<float>(a), static_cast<float>(x))); -} -template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) { - return Eigen::half(Eigen::numext::betainc(static_cast<float>(a), static_cast<float>(b), static_cast<float>(x))); -} -#endif - -} // end namespace numext -} // end namespace Eigen - -#endif // EIGEN_SPECIALFUNCTIONS_HALF_H diff --git a/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h deleted file mode 100644 index f524d71..0000000 --- a/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ /dev/null @@ -1,1565 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPECIAL_FUNCTIONS_H -#define EIGEN_SPECIAL_FUNCTIONS_H - -namespace Eigen { -namespace internal { - -// Parts of this code are based on the Cephes Math Library. -// -// Cephes Math Library Release 2.8: June, 2000 -// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier -// -// Permission has been kindly provided by the original author -// to incorporate the Cephes software into the Eigen codebase: -// -// From: Stephen Moshier -// To: Eugene Brevdo -// Subject: Re: Permission to wrap several cephes functions in Eigen -// -// Hello Eugene, -// -// Thank you for writing. -// -// If your licensing is similar to BSD, the formal way that has been -// handled is simply to add a statement to the effect that you are incorporating -// the Cephes software by permission of the author. -// -// Good luck with your project, -// Steve - -namespace cephes { - -/* polevl (modified for Eigen) - * - * Evaluate polynomial - * - * - * - * SYNOPSIS: - * - * int N; - * Scalar x, y, coef[N+1]; - * - * y = polevl<decltype(x), N>( x, coef); - * - * - * - * DESCRIPTION: - * - * Evaluates polynomial of degree N: - * - * 2 N - * y = C + C x + C x +...+ C x - * 0 1 2 N - * - * Coefficients are stored in reverse order: - * - * coef[0] = C , ..., coef[N] = C . - * N 0 - * - * The function p1evl() assumes that coef[N] = 1.0 and is - * omitted from the array. Its calling arguments are - * otherwise the same as polevl(). - * - * - * The Eigen implementation is templatized. For best speed, store - * coef as a const array (constexpr), e.g. - * - * const double coef[] = {1.0, 2.0, 3.0, ...}; - * - */ -template <typename Scalar, int N> -struct polevl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) { - EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - - return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N]; - } -}; - -template <typename Scalar> -struct polevl<Scalar, 0> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) { - return coef[0]; - } -}; - -} // end namespace cephes - -/**************************************************************************** - * Implementation of lgamma, requires C++11/C99 * - ****************************************************************************/ - -template <typename Scalar> -struct lgamma_impl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar) { - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return Scalar(0); - } -}; - -template <typename Scalar> -struct lgamma_retval { - typedef Scalar type; -}; - -#if EIGEN_HAS_C99_MATH -template <> -struct lgamma_impl<float> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(float x) { -#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) - int signgam; - return ::lgammaf_r(x, &signgam); -#else - return ::lgammaf(x); -#endif - } -}; - -template <> -struct lgamma_impl<double> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(double x) { -#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) - int signgam; - return ::lgamma_r(x, &signgam); -#else - return ::lgamma(x); -#endif - } -}; -#endif - -/**************************************************************************** - * Implementation of digamma (psi), based on Cephes * - ****************************************************************************/ - -template <typename Scalar> -struct digamma_retval { - typedef Scalar type; -}; - -/* - * - * Polynomial evaluation helper for the Psi (digamma) function. - * - * digamma_impl_maybe_poly::run(s) evaluates the asymptotic Psi expansion for - * input Scalar s, assuming s is above 10.0. - * - * If s is above a certain threshold for the given Scalar type, zero - * is returned. Otherwise the polynomial is evaluated with enough - * coefficients for results matching Scalar machine precision. - * - * - */ -template <typename Scalar> -struct digamma_impl_maybe_poly { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar) { - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return Scalar(0); - } -}; - - -template <> -struct digamma_impl_maybe_poly<float> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(const float s) { - const float A[] = { - -4.16666666666666666667E-3f, - 3.96825396825396825397E-3f, - -8.33333333333333333333E-3f, - 8.33333333333333333333E-2f - }; - - float z; - if (s < 1.0e8f) { - z = 1.0f / (s * s); - return z * cephes::polevl<float, 3>::run(z, A); - } else return 0.0f; - } -}; - -template <> -struct digamma_impl_maybe_poly<double> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(const double s) { - const double A[] = { - 8.33333333333333333333E-2, - -2.10927960927960927961E-2, - 7.57575757575757575758E-3, - -4.16666666666666666667E-3, - 3.96825396825396825397E-3, - -8.33333333333333333333E-3, - 8.33333333333333333333E-2 - }; - - double z; - if (s < 1.0e17) { - z = 1.0 / (s * s); - return z * cephes::polevl<double, 6>::run(z, A); - } - else return 0.0; - } -}; - -template <typename Scalar> -struct digamma_impl { - EIGEN_DEVICE_FUNC - static Scalar run(Scalar x) { - /* - * - * Psi (digamma) function (modified for Eigen) - * - * - * SYNOPSIS: - * - * double x, y, psi(); - * - * y = psi( x ); - * - * - * DESCRIPTION: - * - * d - - * psi(x) = -- ln | (x) - * dx - * - * is the logarithmic derivative of the gamma function. - * For integer x, - * n-1 - * - - * psi(n) = -EUL + > 1/k. - * - - * k=1 - * - * If x is negative, it is transformed to a positive argument by the - * reflection formula psi(1-x) = psi(x) + pi cot(pi x). - * For general positive x, the argument is made greater than 10 - * using the recurrence psi(x+1) = psi(x) + 1/x. - * Then the following asymptotic expansion is applied: - * - * inf. B - * - 2k - * psi(x) = log(x) - 1/2x - > ------- - * - 2k - * k=1 2k x - * - * where the B2k are Bernoulli numbers. - * - * ACCURACY (float): - * Relative error (except absolute when |psi| < 1): - * arithmetic domain # trials peak rms - * IEEE 0,30 30000 1.3e-15 1.4e-16 - * IEEE -30,0 40000 1.5e-15 2.2e-16 - * - * ACCURACY (double): - * Absolute error, relative when |psi| > 1 : - * arithmetic domain # trials peak rms - * IEEE -33,0 30000 8.2e-7 1.2e-7 - * IEEE 0,33 100000 7.3e-7 7.7e-8 - * - * ERROR MESSAGES: - * message condition value returned - * psi singularity x integer <=0 INFINITY - */ - - Scalar p, q, nz, s, w, y; - bool negative = false; - - const Scalar maxnum = NumTraits<Scalar>::infinity(); - const Scalar m_pi = Scalar(EIGEN_PI); - - const Scalar zero = Scalar(0); - const Scalar one = Scalar(1); - const Scalar half = Scalar(0.5); - nz = zero; - - if (x <= zero) { - negative = true; - q = x; - p = numext::floor(q); - if (p == q) { - return maxnum; - } - /* Remove the zeros of tan(m_pi x) - * by subtracting the nearest integer from x - */ - nz = q - p; - if (nz != half) { - if (nz > half) { - p += one; - nz = q - p; - } - nz = m_pi / numext::tan(m_pi * nz); - } - else { - nz = zero; - } - x = one - x; - } - - /* use the recurrence psi(x+1) = psi(x) + 1/x. */ - s = x; - w = zero; - while (s < Scalar(10)) { - w += one / s; - s += one; - } - - y = digamma_impl_maybe_poly<Scalar>::run(s); - - y = numext::log(s) - (half / s) - y - w; - - return (negative) ? y - nz : y; - } -}; - -/**************************************************************************** - * Implementation of erf, requires C++11/C99 * - ****************************************************************************/ - -template <typename Scalar> -struct erf_impl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar) { - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return Scalar(0); - } -}; - -template <typename Scalar> -struct erf_retval { - typedef Scalar type; -}; - -#if EIGEN_HAS_C99_MATH -template <> -struct erf_impl<float> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); } -}; - -template <> -struct erf_impl<double> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); } -}; -#endif // EIGEN_HAS_C99_MATH - -/*************************************************************************** -* Implementation of erfc, requires C++11/C99 * -****************************************************************************/ - -template <typename Scalar> -struct erfc_impl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar) { - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return Scalar(0); - } -}; - -template <typename Scalar> -struct erfc_retval { - typedef Scalar type; -}; - -#if EIGEN_HAS_C99_MATH -template <> -struct erfc_impl<float> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } -}; - -template <> -struct erfc_impl<double> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } -}; -#endif // EIGEN_HAS_C99_MATH - -/************************************************************************************************************** - * Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 * - **************************************************************************************************************/ - -template <typename Scalar> -struct igammac_retval { - typedef Scalar type; -}; - -// NOTE: cephes_helper is also used to implement zeta -template <typename Scalar> -struct cephes_helper { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; } - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar big() { assert(false && "big not supported for this type"); return 0.0; } - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar biginv() { assert(false && "biginv not supported for this type"); return 0.0; } -}; - -template <> -struct cephes_helper<float> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float machep() { - return NumTraits<float>::epsilon() / 2; // 1.0 - machep == 1.0 - } - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float big() { - // use epsneg (1.0 - epsneg == 1.0) - return 1.0f / (NumTraits<float>::epsilon() / 2); - } - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float biginv() { - // epsneg - return machep(); - } -}; - -template <> -struct cephes_helper<double> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double machep() { - return NumTraits<double>::epsilon() / 2; // 1.0 - machep == 1.0 - } - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double big() { - return 1.0 / NumTraits<double>::epsilon(); - } - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double biginv() { - // inverse of eps - return NumTraits<double>::epsilon(); - } -}; - -#if !EIGEN_HAS_C99_MATH - -template <typename Scalar> -struct igammac_impl { - EIGEN_DEVICE_FUNC - static Scalar run(Scalar a, Scalar x) { - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return Scalar(0); - } -}; - -#else - -template <typename Scalar> struct igamma_impl; // predeclare igamma_impl - -template <typename Scalar> -struct igammac_impl { - EIGEN_DEVICE_FUNC - static Scalar run(Scalar a, Scalar x) { - /* igamc() - * - * Incomplete gamma integral (modified for Eigen) - * - * - * - * SYNOPSIS: - * - * double a, x, y, igamc(); - * - * y = igamc( a, x ); - * - * DESCRIPTION: - * - * The function is defined by - * - * - * igamc(a,x) = 1 - igam(a,x) - * - * inf. - * - - * 1 | | -t a-1 - * = ----- | e t dt. - * - | | - * | (a) - - * x - * - * - * In this implementation both arguments must be positive. - * The integral is evaluated by either a power series or - * continued fraction expansion, depending on the relative - * values of a and x. - * - * ACCURACY (float): - * - * Relative error: - * arithmetic domain # trials peak rms - * IEEE 0,30 30000 7.8e-6 5.9e-7 - * - * - * ACCURACY (double): - * - * Tested at random a, x. - * a x Relative error: - * arithmetic domain domain # trials peak rms - * IEEE 0.5,100 0,100 200000 1.9e-14 1.7e-15 - * IEEE 0.01,0.5 0,100 200000 1.4e-13 1.6e-15 - * - */ - /* - Cephes Math Library Release 2.2: June, 1992 - Copyright 1985, 1987, 1992 by Stephen L. Moshier - Direct inquiries to 30 Frost Street, Cambridge, MA 02140 - */ - const Scalar zero = 0; - const Scalar one = 1; - const Scalar nan = NumTraits<Scalar>::quiet_NaN(); - - if ((x < zero) || (a <= zero)) { - // domain error - return nan; - } - - if ((x < one) || (x < a)) { - /* The checks above ensure that we meet the preconditions for - * igamma_impl::Impl(), so call it, rather than igamma_impl::Run(). - * Calling Run() would also work, but in that case the compiler may not be - * able to prove that igammac_impl::Run and igamma_impl::Run are not - * mutually recursive. This leads to worse code, particularly on - * platforms like nvptx, where recursion is allowed only begrudgingly. - */ - return (one - igamma_impl<Scalar>::Impl(a, x)); - } - - return Impl(a, x); - } - - private: - /* igamma_impl calls igammac_impl::Impl. */ - friend struct igamma_impl<Scalar>; - - /* Actually computes igamc(a, x). - * - * Preconditions: - * a > 0 - * x >= 1 - * x >= a - */ - EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) { - const Scalar zero = 0; - const Scalar one = 1; - const Scalar two = 2; - const Scalar machep = cephes_helper<Scalar>::machep(); - const Scalar maxlog = numext::log(NumTraits<Scalar>::highest()); - const Scalar big = cephes_helper<Scalar>::big(); - const Scalar biginv = cephes_helper<Scalar>::biginv(); - const Scalar inf = NumTraits<Scalar>::infinity(); - - Scalar ans, ax, c, yc, r, t, y, z; - Scalar pk, pkm1, pkm2, qk, qkm1, qkm2; - - if (x == inf) return zero; // std::isinf crashes on CUDA - - /* Compute x**a * exp(-x) / gamma(a) */ - ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a); - if (ax < -maxlog) { // underflow - return zero; - } - ax = numext::exp(ax); - - // continued fraction - y = one - a; - z = x + y + one; - c = zero; - pkm2 = one; - qkm2 = x; - pkm1 = x + one; - qkm1 = z * x; - ans = pkm1 / qkm1; - - while (true) { - c += one; - y += one; - z += two; - yc = y * c; - pk = pkm1 * z - pkm2 * yc; - qk = qkm1 * z - qkm2 * yc; - if (qk != zero) { - r = pk / qk; - t = numext::abs((ans - r) / r); - ans = r; - } else { - t = one; - } - pkm2 = pkm1; - pkm1 = pk; - qkm2 = qkm1; - qkm1 = qk; - if (numext::abs(pk) > big) { - pkm2 *= biginv; - pkm1 *= biginv; - qkm2 *= biginv; - qkm1 *= biginv; - } - if (t <= machep) { - break; - } - } - - return (ans * ax); - } -}; - -#endif // EIGEN_HAS_C99_MATH - -/************************************************************************************************ - * Implementation of igamma (incomplete gamma integral), based on Cephes but requires C++11/C99 * - ************************************************************************************************/ - -template <typename Scalar> -struct igamma_retval { - typedef Scalar type; -}; - -#if !EIGEN_HAS_C99_MATH - -template <typename Scalar> -struct igamma_impl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) { - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return Scalar(0); - } -}; - -#else - -template <typename Scalar> -struct igamma_impl { - EIGEN_DEVICE_FUNC - static Scalar run(Scalar a, Scalar x) { - /* igam() - * Incomplete gamma integral - * - * - * - * SYNOPSIS: - * - * double a, x, y, igam(); - * - * y = igam( a, x ); - * - * DESCRIPTION: - * - * The function is defined by - * - * x - * - - * 1 | | -t a-1 - * igam(a,x) = ----- | e t dt. - * - | | - * | (a) - - * 0 - * - * - * In this implementation both arguments must be positive. - * The integral is evaluated by either a power series or - * continued fraction expansion, depending on the relative - * values of a and x. - * - * ACCURACY (double): - * - * Relative error: - * arithmetic domain # trials peak rms - * IEEE 0,30 200000 3.6e-14 2.9e-15 - * IEEE 0,100 300000 9.9e-14 1.5e-14 - * - * - * ACCURACY (float): - * - * Relative error: - * arithmetic domain # trials peak rms - * IEEE 0,30 20000 7.8e-6 5.9e-7 - * - */ - /* - Cephes Math Library Release 2.2: June, 1992 - Copyright 1985, 1987, 1992 by Stephen L. Moshier - Direct inquiries to 30 Frost Street, Cambridge, MA 02140 - */ - - - /* left tail of incomplete gamma function: - * - * inf. k - * a -x - x - * x e > ---------- - * - - - * k=0 | (a+k+1) - * - */ - const Scalar zero = 0; - const Scalar one = 1; - const Scalar nan = NumTraits<Scalar>::quiet_NaN(); - - if (x == zero) return zero; - - if ((x < zero) || (a <= zero)) { // domain error - return nan; - } - - if ((x > one) && (x > a)) { - /* The checks above ensure that we meet the preconditions for - * igammac_impl::Impl(), so call it, rather than igammac_impl::Run(). - * Calling Run() would also work, but in that case the compiler may not be - * able to prove that igammac_impl::Run and igamma_impl::Run are not - * mutually recursive. This leads to worse code, particularly on - * platforms like nvptx, where recursion is allowed only begrudgingly. - */ - return (one - igammac_impl<Scalar>::Impl(a, x)); - } - - return Impl(a, x); - } - - private: - /* igammac_impl calls igamma_impl::Impl. */ - friend struct igammac_impl<Scalar>; - - /* Actually computes igam(a, x). - * - * Preconditions: - * x > 0 - * a > 0 - * !(x > 1 && x > a) - */ - EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) { - const Scalar zero = 0; - const Scalar one = 1; - const Scalar machep = cephes_helper<Scalar>::machep(); - const Scalar maxlog = numext::log(NumTraits<Scalar>::highest()); - - Scalar ans, ax, c, r; - - /* Compute x**a * exp(-x) / gamma(a) */ - ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a); - if (ax < -maxlog) { - // underflow - return zero; - } - ax = numext::exp(ax); - - /* power series */ - r = a; - c = one; - ans = one; - - while (true) { - r += one; - c *= x/r; - ans += c; - if (c/ans <= machep) { - break; - } - } - - return (ans * ax / a); - } -}; - -#endif // EIGEN_HAS_C99_MATH - -/***************************************************************************** - * Implementation of Riemann zeta function of two arguments, based on Cephes * - *****************************************************************************/ - -template <typename Scalar> -struct zeta_retval { - typedef Scalar type; -}; - -template <typename Scalar> -struct zeta_impl_series { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar) { - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return Scalar(0); - } -}; - -template <> -struct zeta_impl_series<float> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE bool run(float& a, float& b, float& s, const float x, const float machep) { - int i = 0; - while(i < 9) - { - i += 1; - a += 1.0f; - b = numext::pow( a, -x ); - s += b; - if( numext::abs(b/s) < machep ) - return true; - } - - //Return whether we are done - return false; - } -}; - -template <> -struct zeta_impl_series<double> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE bool run(double& a, double& b, double& s, const double x, const double machep) { - int i = 0; - while( (i < 9) || (a <= 9.0) ) - { - i += 1; - a += 1.0; - b = numext::pow( a, -x ); - s += b; - if( numext::abs(b/s) < machep ) - return true; - } - - //Return whether we are done - return false; - } -}; - -template <typename Scalar> -struct zeta_impl { - EIGEN_DEVICE_FUNC - static Scalar run(Scalar x, Scalar q) { - /* zeta.c - * - * Riemann zeta function of two arguments - * - * - * - * SYNOPSIS: - * - * double x, q, y, zeta(); - * - * y = zeta( x, q ); - * - * - * - * DESCRIPTION: - * - * - * - * inf. - * - -x - * zeta(x,q) = > (k+q) - * - - * k=0 - * - * where x > 1 and q is not a negative integer or zero. - * The Euler-Maclaurin summation formula is used to obtain - * the expansion - * - * n - * - -x - * zeta(x,q) = > (k+q) - * - - * k=1 - * - * 1-x inf. B x(x+1)...(x+2j) - * (n+q) 1 - 2j - * + --------- - ------- + > -------------------- - * x-1 x - x+2j+1 - * 2(n+q) j=1 (2j)! (n+q) - * - * where the B2j are Bernoulli numbers. Note that (see zetac.c) - * zeta(x,1) = zetac(x) + 1. - * - * - * - * ACCURACY: - * - * Relative error for single precision: - * arithmetic domain # trials peak rms - * IEEE 0,25 10000 6.9e-7 1.0e-7 - * - * Large arguments may produce underflow in powf(), in which - * case the results are inaccurate. - * - * REFERENCE: - * - * Gradshteyn, I. S., and I. M. Ryzhik, Tables of Integrals, - * Series, and Products, p. 1073; Academic Press, 1980. - * - */ - - int i; - Scalar p, r, a, b, k, s, t, w; - - const Scalar A[] = { - Scalar(12.0), - Scalar(-720.0), - Scalar(30240.0), - Scalar(-1209600.0), - Scalar(47900160.0), - Scalar(-1.8924375803183791606e9), /*1.307674368e12/691*/ - Scalar(7.47242496e10), - Scalar(-2.950130727918164224e12), /*1.067062284288e16/3617*/ - Scalar(1.1646782814350067249e14), /*5.109094217170944e18/43867*/ - Scalar(-4.5979787224074726105e15), /*8.028576626982912e20/174611*/ - Scalar(1.8152105401943546773e17), /*1.5511210043330985984e23/854513*/ - Scalar(-7.1661652561756670113e18) /*1.6938241367317436694528e27/236364091*/ - }; - - const Scalar maxnum = NumTraits<Scalar>::infinity(); - const Scalar zero = 0.0, half = 0.5, one = 1.0; - const Scalar machep = cephes_helper<Scalar>::machep(); - const Scalar nan = NumTraits<Scalar>::quiet_NaN(); - - if( x == one ) - return maxnum; - - if( x < one ) - { - return nan; - } - - if( q <= zero ) - { - if(q == numext::floor(q)) - { - return maxnum; - } - p = x; - r = numext::floor(p); - if (p != r) - return nan; - } - - /* Permit negative q but continue sum until n+q > +9 . - * This case should be handled by a reflection formula. - * If q<0 and x is an integer, there is a relation to - * the polygamma function. - */ - s = numext::pow( q, -x ); - a = q; - b = zero; - // Run the summation in a helper function that is specific to the floating precision - if (zeta_impl_series<Scalar>::run(a, b, s, x, machep)) { - return s; - } - - w = a; - s += b*w/(x-one); - s -= half * b; - a = one; - k = zero; - for( i=0; i<12; i++ ) - { - a *= x + k; - b /= w; - t = a*b/A[i]; - s = s + t; - t = numext::abs(t/s); - if( t < machep ) { - break; - } - k += one; - a *= x + k; - b /= w; - k += one; - } - return s; - } -}; - -/**************************************************************************** - * Implementation of polygamma function, requires C++11/C99 * - ****************************************************************************/ - -template <typename Scalar> -struct polygamma_retval { - typedef Scalar type; -}; - -#if !EIGEN_HAS_C99_MATH - -template <typename Scalar> -struct polygamma_impl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(Scalar n, Scalar x) { - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return Scalar(0); - } -}; - -#else - -template <typename Scalar> -struct polygamma_impl { - EIGEN_DEVICE_FUNC - static Scalar run(Scalar n, Scalar x) { - Scalar zero = 0.0, one = 1.0; - Scalar nplus = n + one; - const Scalar nan = NumTraits<Scalar>::quiet_NaN(); - - // Check that n is an integer - if (numext::floor(n) != n) { - return nan; - } - // Just return the digamma function for n = 1 - else if (n == zero) { - return digamma_impl<Scalar>::run(x); - } - // Use the same implementation as scipy - else { - Scalar factorial = numext::exp(lgamma_impl<Scalar>::run(nplus)); - return numext::pow(-one, nplus) * factorial * zeta_impl<Scalar>::run(nplus, x); - } - } -}; - -#endif // EIGEN_HAS_C99_MATH - -/************************************************************************************************ - * Implementation of betainc (incomplete beta integral), based on Cephes but requires C++11/C99 * - ************************************************************************************************/ - -template <typename Scalar> -struct betainc_retval { - typedef Scalar type; -}; - -#if !EIGEN_HAS_C99_MATH - -template <typename Scalar> -struct betainc_impl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) { - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return Scalar(0); - } -}; - -#else - -template <typename Scalar> -struct betainc_impl { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(Scalar, Scalar, Scalar) { - /* betaincf.c - * - * Incomplete beta integral - * - * - * SYNOPSIS: - * - * float a, b, x, y, betaincf(); - * - * y = betaincf( a, b, x ); - * - * - * DESCRIPTION: - * - * Returns incomplete beta integral of the arguments, evaluated - * from zero to x. The function is defined as - * - * x - * - - - * | (a+b) | | a-1 b-1 - * ----------- | t (1-t) dt. - * - - | | - * | (a) | (b) - - * 0 - * - * The domain of definition is 0 <= x <= 1. In this - * implementation a and b are restricted to positive values. - * The integral from x to 1 may be obtained by the symmetry - * relation - * - * 1 - betainc( a, b, x ) = betainc( b, a, 1-x ). - * - * The integral is evaluated by a continued fraction expansion. - * If a < 1, the function calls itself recursively after a - * transformation to increase a to a+1. - * - * ACCURACY (float): - * - * Tested at random points (a,b,x) with a and b in the indicated - * interval and x between 0 and 1. - * - * arithmetic domain # trials peak rms - * Relative error: - * IEEE 0,30 10000 3.7e-5 5.1e-6 - * IEEE 0,100 10000 1.7e-4 2.5e-5 - * The useful domain for relative error is limited by underflow - * of the single precision exponential function. - * Absolute error: - * IEEE 0,30 100000 2.2e-5 9.6e-7 - * IEEE 0,100 10000 6.5e-5 3.7e-6 - * - * Larger errors may occur for extreme ratios of a and b. - * - * ACCURACY (double): - * arithmetic domain # trials peak rms - * IEEE 0,5 10000 6.9e-15 4.5e-16 - * IEEE 0,85 250000 2.2e-13 1.7e-14 - * IEEE 0,1000 30000 5.3e-12 6.3e-13 - * IEEE 0,10000 250000 9.3e-11 7.1e-12 - * IEEE 0,100000 10000 8.7e-10 4.8e-11 - * Outputs smaller than the IEEE gradual underflow threshold - * were excluded from these statistics. - * - * ERROR MESSAGES: - * message condition value returned - * incbet domain x<0, x>1 nan - * incbet underflow nan - */ - - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), - THIS_TYPE_IS_NOT_SUPPORTED); - return Scalar(0); - } -}; - -/* Continued fraction expansion #1 for incomplete beta integral (small_branch = True) - * Continued fraction expansion #2 for incomplete beta integral (small_branch = False) - */ -template <typename Scalar> -struct incbeta_cfe { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x, bool small_branch) { - EIGEN_STATIC_ASSERT((internal::is_same<Scalar, float>::value || - internal::is_same<Scalar, double>::value), - THIS_TYPE_IS_NOT_SUPPORTED); - const Scalar big = cephes_helper<Scalar>::big(); - const Scalar machep = cephes_helper<Scalar>::machep(); - const Scalar biginv = cephes_helper<Scalar>::biginv(); - - const Scalar zero = 0; - const Scalar one = 1; - const Scalar two = 2; - - Scalar xk, pk, pkm1, pkm2, qk, qkm1, qkm2; - Scalar k1, k2, k3, k4, k5, k6, k7, k8, k26update; - Scalar ans; - int n; - - const int num_iters = (internal::is_same<Scalar, float>::value) ? 100 : 300; - const Scalar thresh = - (internal::is_same<Scalar, float>::value) ? machep : Scalar(3) * machep; - Scalar r = (internal::is_same<Scalar, float>::value) ? zero : one; - - if (small_branch) { - k1 = a; - k2 = a + b; - k3 = a; - k4 = a + one; - k5 = one; - k6 = b - one; - k7 = k4; - k8 = a + two; - k26update = one; - } else { - k1 = a; - k2 = b - one; - k3 = a; - k4 = a + one; - k5 = one; - k6 = a + b; - k7 = a + one; - k8 = a + two; - k26update = -one; - x = x / (one - x); - } - - pkm2 = zero; - qkm2 = one; - pkm1 = one; - qkm1 = one; - ans = one; - n = 0; - - do { - xk = -(x * k1 * k2) / (k3 * k4); - pk = pkm1 + pkm2 * xk; - qk = qkm1 + qkm2 * xk; - pkm2 = pkm1; - pkm1 = pk; - qkm2 = qkm1; - qkm1 = qk; - - xk = (x * k5 * k6) / (k7 * k8); - pk = pkm1 + pkm2 * xk; - qk = qkm1 + qkm2 * xk; - pkm2 = pkm1; - pkm1 = pk; - qkm2 = qkm1; - qkm1 = qk; - - if (qk != zero) { - r = pk / qk; - if (numext::abs(ans - r) < numext::abs(r) * thresh) { - return r; - } - ans = r; - } - - k1 += one; - k2 += k26update; - k3 += two; - k4 += two; - k5 += one; - k6 -= k26update; - k7 += two; - k8 += two; - - if ((numext::abs(qk) + numext::abs(pk)) > big) { - pkm2 *= biginv; - pkm1 *= biginv; - qkm2 *= biginv; - qkm1 *= biginv; - } - if ((numext::abs(qk) < biginv) || (numext::abs(pk) < biginv)) { - pkm2 *= big; - pkm1 *= big; - qkm2 *= big; - qkm1 *= big; - } - } while (++n < num_iters); - - return ans; - } -}; - -/* Helper functions depending on the Scalar type */ -template <typename Scalar> -struct betainc_helper {}; - -template <> -struct betainc_helper<float> { - /* Core implementation, assumes a large (> 1.0) */ - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float incbsa(float aa, float bb, - float xx) { - float ans, a, b, t, x, onemx; - bool reversed_a_b = false; - - onemx = 1.0f - xx; - - /* see if x is greater than the mean */ - if (xx > (aa / (aa + bb))) { - reversed_a_b = true; - a = bb; - b = aa; - t = xx; - x = onemx; - } else { - a = aa; - b = bb; - t = onemx; - x = xx; - } - - /* Choose expansion for optimal convergence */ - if (b > 10.0f) { - if (numext::abs(b * x / a) < 0.3f) { - t = betainc_helper<float>::incbps(a, b, x); - if (reversed_a_b) t = 1.0f - t; - return t; - } - } - - ans = x * (a + b - 2.0f) / (a - 1.0f); - if (ans < 1.0f) { - ans = incbeta_cfe<float>::run(a, b, x, true /* small_branch */); - t = b * numext::log(t); - } else { - ans = incbeta_cfe<float>::run(a, b, x, false /* small_branch */); - t = (b - 1.0f) * numext::log(t); - } - - t += a * numext::log(x) + lgamma_impl<float>::run(a + b) - - lgamma_impl<float>::run(a) - lgamma_impl<float>::run(b); - t += numext::log(ans / a); - t = numext::exp(t); - - if (reversed_a_b) t = 1.0f - t; - return t; - } - - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float incbps(float a, float b, float x) { - float t, u, y, s; - const float machep = cephes_helper<float>::machep(); - - y = a * numext::log(x) + (b - 1.0f) * numext::log1p(-x) - numext::log(a); - y -= lgamma_impl<float>::run(a) + lgamma_impl<float>::run(b); - y += lgamma_impl<float>::run(a + b); - - t = x / (1.0f - x); - s = 0.0f; - u = 1.0f; - do { - b -= 1.0f; - if (b == 0.0f) { - break; - } - a += 1.0f; - u *= t * b / a; - s += u; - } while (numext::abs(u) > machep); - - return numext::exp(y) * (1.0f + s); - } -}; - -template <> -struct betainc_impl<float> { - EIGEN_DEVICE_FUNC - static float run(float a, float b, float x) { - const float nan = NumTraits<float>::quiet_NaN(); - float ans, t; - - if (a <= 0.0f) return nan; - if (b <= 0.0f) return nan; - if ((x <= 0.0f) || (x >= 1.0f)) { - if (x == 0.0f) return 0.0f; - if (x == 1.0f) return 1.0f; - // mtherr("betaincf", DOMAIN); - return nan; - } - - /* transformation for small aa */ - if (a <= 1.0f) { - ans = betainc_helper<float>::incbsa(a + 1.0f, b, x); - t = a * numext::log(x) + b * numext::log1p(-x) + - lgamma_impl<float>::run(a + b) - lgamma_impl<float>::run(a + 1.0f) - - lgamma_impl<float>::run(b); - return (ans + numext::exp(t)); - } else { - return betainc_helper<float>::incbsa(a, b, x); - } - } -}; - -template <> -struct betainc_helper<double> { - EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double incbps(double a, double b, double x) { - const double machep = cephes_helper<double>::machep(); - - double s, t, u, v, n, t1, z, ai; - - ai = 1.0 / a; - u = (1.0 - b) * x; - v = u / (a + 1.0); - t1 = v; - t = u; - n = 2.0; - s = 0.0; - z = machep * ai; - while (numext::abs(v) > z) { - u = (n - b) * x / n; - t *= u; - v = t / (a + n); - s += v; - n += 1.0; - } - s += t1; - s += ai; - - u = a * numext::log(x); - // TODO: gamma() is not directly implemented in Eigen. - /* - if ((a + b) < maxgam && numext::abs(u) < maxlog) { - t = gamma(a + b) / (gamma(a) * gamma(b)); - s = s * t * pow(x, a); - } else { - */ - t = lgamma_impl<double>::run(a + b) - lgamma_impl<double>::run(a) - - lgamma_impl<double>::run(b) + u + numext::log(s); - return s = numext::exp(t); - } -}; - -template <> -struct betainc_impl<double> { - EIGEN_DEVICE_FUNC - static double run(double aa, double bb, double xx) { - const double nan = NumTraits<double>::quiet_NaN(); - const double machep = cephes_helper<double>::machep(); - // const double maxgam = 171.624376956302725; - - double a, b, t, x, xc, w, y; - bool reversed_a_b = false; - - if (aa <= 0.0 || bb <= 0.0) { - return nan; // goto domerr; - } - - if ((xx <= 0.0) || (xx >= 1.0)) { - if (xx == 0.0) return (0.0); - if (xx == 1.0) return (1.0); - // mtherr("incbet", DOMAIN); - return nan; - } - - if ((bb * xx) <= 1.0 && xx <= 0.95) { - return betainc_helper<double>::incbps(aa, bb, xx); - } - - w = 1.0 - xx; - - /* Reverse a and b if x is greater than the mean. */ - if (xx > (aa / (aa + bb))) { - reversed_a_b = true; - a = bb; - b = aa; - xc = xx; - x = w; - } else { - a = aa; - b = bb; - xc = w; - x = xx; - } - - if (reversed_a_b && (b * x) <= 1.0 && x <= 0.95) { - t = betainc_helper<double>::incbps(a, b, x); - if (t <= machep) { - t = 1.0 - machep; - } else { - t = 1.0 - t; - } - return t; - } - - /* Choose expansion for better convergence. */ - y = x * (a + b - 2.0) - (a - 1.0); - if (y < 0.0) { - w = incbeta_cfe<double>::run(a, b, x, true /* small_branch */); - } else { - w = incbeta_cfe<double>::run(a, b, x, false /* small_branch */) / xc; - } - - /* Multiply w by the factor - a b _ _ _ - x (1-x) | (a+b) / ( a | (a) | (b) ) . */ - - y = a * numext::log(x); - t = b * numext::log(xc); - // TODO: gamma is not directly implemented in Eigen. - /* - if ((a + b) < maxgam && numext::abs(y) < maxlog && numext::abs(t) < maxlog) - { - t = pow(xc, b); - t *= pow(x, a); - t /= a; - t *= w; - t *= gamma(a + b) / (gamma(a) * gamma(b)); - } else { - */ - /* Resort to logarithms. */ - y += t + lgamma_impl<double>::run(a + b) - lgamma_impl<double>::run(a) - - lgamma_impl<double>::run(b); - y += numext::log(w / a); - t = numext::exp(y); - - /* } */ - // done: - - if (reversed_a_b) { - if (t <= machep) { - t = 1.0 - machep; - } else { - t = 1.0 - t; - } - } - return t; - } -}; - -#endif // EIGEN_HAS_C99_MATH - -} // end namespace internal - -namespace numext { - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) - lgamma(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar) - digamma(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(zeta, Scalar) -zeta(const Scalar& x, const Scalar& q) { - return EIGEN_MATHFUNC_IMPL(zeta, Scalar)::run(x, q); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(polygamma, Scalar) -polygamma(const Scalar& n, const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(polygamma, Scalar)::run(n, x); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) - erf(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) - erfc(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar) - igamma(const Scalar& a, const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar) - igammac(const Scalar& a, const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(igammac, Scalar)::run(a, x); -} - -template <typename Scalar> -EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar) - betainc(const Scalar& a, const Scalar& b, const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x); -} - -} // end namespace numext - - -} // end namespace Eigen - -#endif // EIGEN_SPECIAL_FUNCTIONS_H diff --git a/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h deleted file mode 100644 index 46d60d3..0000000 --- a/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +++ /dev/null @@ -1,58 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPECIALFUNCTIONS_PACKETMATH_H -#define EIGEN_SPECIALFUNCTIONS_PACKETMATH_H - -namespace Eigen { - -namespace internal { - -/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } - -/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); } - -/** \internal \returns the zeta function of two arguments (coeff-wise) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); } - -/** \internal \returns the polygamma function (coeff-wise) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); } - -/** \internal \returns the erf(\a a) (coeff-wise) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet perf(const Packet& a) { using numext::erf; return erf(a); } - -/** \internal \returns the erfc(\a a) (coeff-wise) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } - -/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */ -template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } - -/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */ -template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } - -/** \internal \returns the complementary incomplete gamma function betainc(\a a, \a b, \a x) */ -template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); } - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SPECIALFUNCTIONS_PACKETMATH_H - diff --git a/eigen/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h b/eigen/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h deleted file mode 100644 index ec4fa84..0000000 --- a/eigen/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h +++ /dev/null @@ -1,165 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CUDA_SPECIALFUNCTIONS_H -#define EIGEN_CUDA_SPECIALFUNCTIONS_H - -namespace Eigen { - -namespace internal { - -// Make sure this is only available when targeting a GPU: we don't want to -// introduce conflicts between these packet_traits definitions and the ones -// we'll use on the host side (SSE, AVX, ...) -#if defined(__CUDACC__) && defined(EIGEN_USE_GPU) - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 plgamma<float4>(const float4& a) -{ - return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 plgamma<double2>(const double2& a) -{ - using numext::lgamma; - return make_double2(lgamma(a.x), lgamma(a.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pdigamma<float4>(const float4& a) -{ - using numext::digamma; - return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pdigamma<double2>(const double2& a) -{ - using numext::digamma; - return make_double2(digamma(a.x), digamma(a.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pzeta<float4>(const float4& x, const float4& q) -{ - using numext::zeta; - return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pzeta<double2>(const double2& x, const double2& q) -{ - using numext::zeta; - return make_double2(zeta(x.x, q.x), zeta(x.y, q.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 ppolygamma<float4>(const float4& n, const float4& x) -{ - using numext::polygamma; - return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 ppolygamma<double2>(const double2& n, const double2& x) -{ - using numext::polygamma; - return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 perf<float4>(const float4& a) -{ - return make_float4(erff(a.x), erff(a.y), erff(a.z), erff(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 perf<double2>(const double2& a) -{ - using numext::erf; - return make_double2(erf(a.x), erf(a.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 perfc<float4>(const float4& a) -{ - using numext::erfc; - return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 perfc<double2>(const double2& a) -{ - using numext::erfc; - return make_double2(erfc(a.x), erfc(a.y)); -} - - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pigamma<float4>(const float4& a, const float4& x) -{ - using numext::igamma; - return make_float4( - igamma(a.x, x.x), - igamma(a.y, x.y), - igamma(a.z, x.z), - igamma(a.w, x.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pigamma<double2>(const double2& a, const double2& x) -{ - using numext::igamma; - return make_double2(igamma(a.x, x.x), igamma(a.y, x.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pigammac<float4>(const float4& a, const float4& x) -{ - using numext::igammac; - return make_float4( - igammac(a.x, x.x), - igammac(a.y, x.y), - igammac(a.z, x.z), - igammac(a.w, x.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pigammac<double2>(const double2& a, const double2& x) -{ - using numext::igammac; - return make_double2(igammac(a.x, x.x), igammac(a.y, x.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pbetainc<float4>(const float4& a, const float4& b, const float4& x) -{ - using numext::betainc; - return make_float4( - betainc(a.x, b.x, x.x), - betainc(a.y, b.y, x.y), - betainc(a.z, b.z, x.z), - betainc(a.w, b.w, x.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pbetainc<double2>(const double2& a, const double2& b, const double2& x) -{ - using numext::betainc; - return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y)); -} - -#endif - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CUDA_SPECIALFUNCTIONS_H diff --git a/eigen/unsupported/Eigen/src/Splines/Spline.h b/eigen/unsupported/Eigen/src/Splines/Spline.h deleted file mode 100644 index 57788c8..0000000 --- a/eigen/unsupported/Eigen/src/Splines/Spline.h +++ /dev/null @@ -1,507 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 20010-2011 Hauke Heibel <hauke.heibel@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPLINE_H -#define EIGEN_SPLINE_H - -#include "SplineFwd.h" - -namespace Eigen -{ - /** - * \ingroup Splines_Module - * \class Spline - * \brief A class representing multi-dimensional spline curves. - * - * The class represents B-splines with non-uniform knot vectors. Each control - * point of the B-spline is associated with a basis function - * \f{align*} - * C(u) & = \sum_{i=0}^{n}N_{i,p}(u)P_i - * \f} - * - * \tparam _Scalar The underlying data type (typically float or double) - * \tparam _Dim The curve dimension (e.g. 2 or 3) - * \tparam _Degree Per default set to Dynamic; could be set to the actual desired - * degree for optimization purposes (would result in stack allocation - * of several temporary variables). - **/ - template <typename _Scalar, int _Dim, int _Degree> - class Spline - { - public: - typedef _Scalar Scalar; /*!< The spline curve's scalar type. */ - enum { Dimension = _Dim /*!< The spline curve's dimension. */ }; - enum { Degree = _Degree /*!< The spline curve's degree. */ }; - - /** \brief The point type the spline is representing. */ - typedef typename SplineTraits<Spline>::PointType PointType; - - /** \brief The data type used to store knot vectors. */ - typedef typename SplineTraits<Spline>::KnotVectorType KnotVectorType; - - /** \brief The data type used to store parameter vectors. */ - typedef typename SplineTraits<Spline>::ParameterVectorType ParameterVectorType; - - /** \brief The data type used to store non-zero basis functions. */ - typedef typename SplineTraits<Spline>::BasisVectorType BasisVectorType; - - /** \brief The data type used to store the values of the basis function derivatives. */ - typedef typename SplineTraits<Spline>::BasisDerivativeType BasisDerivativeType; - - /** \brief The data type representing the spline's control points. */ - typedef typename SplineTraits<Spline>::ControlPointVectorType ControlPointVectorType; - - /** - * \brief Creates a (constant) zero spline. - * For Splines with dynamic degree, the resulting degree will be 0. - **/ - Spline() - : m_knots(1, (Degree==Dynamic ? 2 : 2*Degree+2)) - , m_ctrls(ControlPointVectorType::Zero(Dimension,(Degree==Dynamic ? 1 : Degree+1))) - { - // in theory this code can go to the initializer list but it will get pretty - // much unreadable ... - enum { MinDegree = (Degree==Dynamic ? 0 : Degree) }; - m_knots.template segment<MinDegree+1>(0) = Array<Scalar,1,MinDegree+1>::Zero(); - m_knots.template segment<MinDegree+1>(MinDegree+1) = Array<Scalar,1,MinDegree+1>::Ones(); - } - - /** - * \brief Creates a spline from a knot vector and control points. - * \param knots The spline's knot vector. - * \param ctrls The spline's control point vector. - **/ - template <typename OtherVectorType, typename OtherArrayType> - Spline(const OtherVectorType& knots, const OtherArrayType& ctrls) : m_knots(knots), m_ctrls(ctrls) {} - - /** - * \brief Copy constructor for splines. - * \param spline The input spline. - **/ - template <int OtherDegree> - Spline(const Spline<Scalar, Dimension, OtherDegree>& spline) : - m_knots(spline.knots()), m_ctrls(spline.ctrls()) {} - - /** - * \brief Returns the knots of the underlying spline. - **/ - const KnotVectorType& knots() const { return m_knots; } - - /** - * \brief Returns the ctrls of the underlying spline. - **/ - const ControlPointVectorType& ctrls() const { return m_ctrls; } - - /** - * \brief Returns the spline value at a given site \f$u\f$. - * - * The function returns - * \f{align*} - * C(u) & = \sum_{i=0}^{n}N_{i,p}P_i - * \f} - * - * \param u Parameter \f$u \in [0;1]\f$ at which the spline is evaluated. - * \return The spline value at the given location \f$u\f$. - **/ - PointType operator()(Scalar u) const; - - /** - * \brief Evaluation of spline derivatives of up-to given order. - * - * The function returns - * \f{align*} - * \frac{d^i}{du^i}C(u) & = \sum_{i=0}^{n} \frac{d^i}{du^i} N_{i,p}(u)P_i - * \f} - * for i ranging between 0 and order. - * - * \param u Parameter \f$u \in [0;1]\f$ at which the spline derivative is evaluated. - * \param order The order up to which the derivatives are computed. - **/ - typename SplineTraits<Spline>::DerivativeType - derivatives(Scalar u, DenseIndex order) const; - - /** - * \copydoc Spline::derivatives - * Using the template version of this function is more efficieent since - * temporary objects are allocated on the stack whenever this is possible. - **/ - template <int DerivativeOrder> - typename SplineTraits<Spline,DerivativeOrder>::DerivativeType - derivatives(Scalar u, DenseIndex order = DerivativeOrder) const; - - /** - * \brief Computes the non-zero basis functions at the given site. - * - * Splines have local support and a point from their image is defined - * by exactly \f$p+1\f$ control points \f$P_i\f$ where \f$p\f$ is the - * spline degree. - * - * This function computes the \f$p+1\f$ non-zero basis function values - * for a given parameter value \f$u\f$. It returns - * \f{align*}{ - * N_{i,p}(u), \hdots, N_{i+p+1,p}(u) - * \f} - * - * \param u Parameter \f$u \in [0;1]\f$ at which the non-zero basis functions - * are computed. - **/ - typename SplineTraits<Spline>::BasisVectorType - basisFunctions(Scalar u) const; - - /** - * \brief Computes the non-zero spline basis function derivatives up to given order. - * - * The function computes - * \f{align*}{ - * \frac{d^i}{du^i} N_{i,p}(u), \hdots, \frac{d^i}{du^i} N_{i+p+1,p}(u) - * \f} - * with i ranging from 0 up to the specified order. - * - * \param u Parameter \f$u \in [0;1]\f$ at which the non-zero basis function - * derivatives are computed. - * \param order The order up to which the basis function derivatives are computes. - **/ - typename SplineTraits<Spline>::BasisDerivativeType - basisFunctionDerivatives(Scalar u, DenseIndex order) const; - - /** - * \copydoc Spline::basisFunctionDerivatives - * Using the template version of this function is more efficieent since - * temporary objects are allocated on the stack whenever this is possible. - **/ - template <int DerivativeOrder> - typename SplineTraits<Spline,DerivativeOrder>::BasisDerivativeType - basisFunctionDerivatives(Scalar u, DenseIndex order = DerivativeOrder) const; - - /** - * \brief Returns the spline degree. - **/ - DenseIndex degree() const; - - /** - * \brief Returns the span within the knot vector in which u is falling. - * \param u The site for which the span is determined. - **/ - DenseIndex span(Scalar u) const; - - /** - * \brief Computes the spang within the provided knot vector in which u is falling. - **/ - static DenseIndex Span(typename SplineTraits<Spline>::Scalar u, DenseIndex degree, const typename SplineTraits<Spline>::KnotVectorType& knots); - - /** - * \brief Returns the spline's non-zero basis functions. - * - * The function computes and returns - * \f{align*}{ - * N_{i,p}(u), \hdots, N_{i+p+1,p}(u) - * \f} - * - * \param u The site at which the basis functions are computed. - * \param degree The degree of the underlying spline. - * \param knots The underlying spline's knot vector. - **/ - static BasisVectorType BasisFunctions(Scalar u, DenseIndex degree, const KnotVectorType& knots); - - /** - * \copydoc Spline::basisFunctionDerivatives - * \param degree The degree of the underlying spline - * \param knots The underlying spline's knot vector. - **/ - static BasisDerivativeType BasisFunctionDerivatives( - const Scalar u, const DenseIndex order, const DenseIndex degree, const KnotVectorType& knots); - - private: - KnotVectorType m_knots; /*!< Knot vector. */ - ControlPointVectorType m_ctrls; /*!< Control points. */ - - template <typename DerivativeType> - static void BasisFunctionDerivativesImpl( - const typename Spline<_Scalar, _Dim, _Degree>::Scalar u, - const DenseIndex order, - const DenseIndex p, - const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& U, - DerivativeType& N_); - }; - - template <typename _Scalar, int _Dim, int _Degree> - DenseIndex Spline<_Scalar, _Dim, _Degree>::Span( - typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::Scalar u, - DenseIndex degree, - const typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::KnotVectorType& knots) - { - // Piegl & Tiller, "The NURBS Book", A2.1 (p. 68) - if (u <= knots(0)) return degree; - const Scalar* pos = std::upper_bound(knots.data()+degree-1, knots.data()+knots.size()-degree-1, u); - return static_cast<DenseIndex>( std::distance(knots.data(), pos) - 1 ); - } - - template <typename _Scalar, int _Dim, int _Degree> - typename Spline<_Scalar, _Dim, _Degree>::BasisVectorType - Spline<_Scalar, _Dim, _Degree>::BasisFunctions( - typename Spline<_Scalar, _Dim, _Degree>::Scalar u, - DenseIndex degree, - const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots) - { - const DenseIndex p = degree; - const DenseIndex i = Spline::Span(u, degree, knots); - - const KnotVectorType& U = knots; - - BasisVectorType left(p+1); left(0) = Scalar(0); - BasisVectorType right(p+1); right(0) = Scalar(0); - - VectorBlock<BasisVectorType,Degree>(left,1,p) = u - VectorBlock<const KnotVectorType,Degree>(U,i+1-p,p).reverse(); - VectorBlock<BasisVectorType,Degree>(right,1,p) = VectorBlock<const KnotVectorType,Degree>(U,i+1,p) - u; - - BasisVectorType N(1,p+1); - N(0) = Scalar(1); - for (DenseIndex j=1; j<=p; ++j) - { - Scalar saved = Scalar(0); - for (DenseIndex r=0; r<j; r++) - { - const Scalar tmp = N(r)/(right(r+1)+left(j-r)); - N[r] = saved + right(r+1)*tmp; - saved = left(j-r)*tmp; - } - N(j) = saved; - } - return N; - } - - template <typename _Scalar, int _Dim, int _Degree> - DenseIndex Spline<_Scalar, _Dim, _Degree>::degree() const - { - if (_Degree == Dynamic) - return m_knots.size() - m_ctrls.cols() - 1; - else - return _Degree; - } - - template <typename _Scalar, int _Dim, int _Degree> - DenseIndex Spline<_Scalar, _Dim, _Degree>::span(Scalar u) const - { - return Spline::Span(u, degree(), knots()); - } - - template <typename _Scalar, int _Dim, int _Degree> - typename Spline<_Scalar, _Dim, _Degree>::PointType Spline<_Scalar, _Dim, _Degree>::operator()(Scalar u) const - { - enum { Order = SplineTraits<Spline>::OrderAtCompileTime }; - - const DenseIndex span = this->span(u); - const DenseIndex p = degree(); - const BasisVectorType basis_funcs = basisFunctions(u); - - const Replicate<BasisVectorType,Dimension,1> ctrl_weights(basis_funcs); - const Block<const ControlPointVectorType,Dimension,Order> ctrl_pts(ctrls(),0,span-p,Dimension,p+1); - return (ctrl_weights * ctrl_pts).rowwise().sum(); - } - - /* --------------------------------------------------------------------------------------------- */ - - template <typename SplineType, typename DerivativeType> - void derivativesImpl(const SplineType& spline, typename SplineType::Scalar u, DenseIndex order, DerivativeType& der) - { - enum { Dimension = SplineTraits<SplineType>::Dimension }; - enum { Order = SplineTraits<SplineType>::OrderAtCompileTime }; - enum { DerivativeOrder = DerivativeType::ColsAtCompileTime }; - - typedef typename SplineTraits<SplineType>::ControlPointVectorType ControlPointVectorType; - typedef typename SplineTraits<SplineType,DerivativeOrder>::BasisDerivativeType BasisDerivativeType; - typedef typename BasisDerivativeType::ConstRowXpr BasisDerivativeRowXpr; - - const DenseIndex p = spline.degree(); - const DenseIndex span = spline.span(u); - - const DenseIndex n = (std::min)(p, order); - - der.resize(Dimension,n+1); - - // Retrieve the basis function derivatives up to the desired order... - const BasisDerivativeType basis_func_ders = spline.template basisFunctionDerivatives<DerivativeOrder>(u, n+1); - - // ... and perform the linear combinations of the control points. - for (DenseIndex der_order=0; der_order<n+1; ++der_order) - { - const Replicate<BasisDerivativeRowXpr,Dimension,1> ctrl_weights( basis_func_ders.row(der_order) ); - const Block<const ControlPointVectorType,Dimension,Order> ctrl_pts(spline.ctrls(),0,span-p,Dimension,p+1); - der.col(der_order) = (ctrl_weights * ctrl_pts).rowwise().sum(); - } - } - - template <typename _Scalar, int _Dim, int _Degree> - typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::DerivativeType - Spline<_Scalar, _Dim, _Degree>::derivatives(Scalar u, DenseIndex order) const - { - typename SplineTraits< Spline >::DerivativeType res; - derivativesImpl(*this, u, order, res); - return res; - } - - template <typename _Scalar, int _Dim, int _Degree> - template <int DerivativeOrder> - typename SplineTraits< Spline<_Scalar, _Dim, _Degree>, DerivativeOrder >::DerivativeType - Spline<_Scalar, _Dim, _Degree>::derivatives(Scalar u, DenseIndex order) const - { - typename SplineTraits< Spline, DerivativeOrder >::DerivativeType res; - derivativesImpl(*this, u, order, res); - return res; - } - - template <typename _Scalar, int _Dim, int _Degree> - typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::BasisVectorType - Spline<_Scalar, _Dim, _Degree>::basisFunctions(Scalar u) const - { - return Spline::BasisFunctions(u, degree(), knots()); - } - - /* --------------------------------------------------------------------------------------------- */ - - - template <typename _Scalar, int _Dim, int _Degree> - template <typename DerivativeType> - void Spline<_Scalar, _Dim, _Degree>::BasisFunctionDerivativesImpl( - const typename Spline<_Scalar, _Dim, _Degree>::Scalar u, - const DenseIndex order, - const DenseIndex p, - const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& U, - DerivativeType& N_) - { - typedef Spline<_Scalar, _Dim, _Degree> SplineType; - enum { Order = SplineTraits<SplineType>::OrderAtCompileTime }; - - const DenseIndex span = SplineType::Span(u, p, U); - - const DenseIndex n = (std::min)(p, order); - - N_.resize(n+1, p+1); - - BasisVectorType left = BasisVectorType::Zero(p+1); - BasisVectorType right = BasisVectorType::Zero(p+1); - - Matrix<Scalar,Order,Order> ndu(p+1,p+1); - - Scalar saved, temp; // FIXME These were double instead of Scalar. Was there a reason for that? - - ndu(0,0) = 1.0; - - DenseIndex j; - for (j=1; j<=p; ++j) - { - left[j] = u-U[span+1-j]; - right[j] = U[span+j]-u; - saved = 0.0; - - for (DenseIndex r=0; r<j; ++r) - { - /* Lower triangle */ - ndu(j,r) = right[r+1]+left[j-r]; - temp = ndu(r,j-1)/ndu(j,r); - /* Upper triangle */ - ndu(r,j) = static_cast<Scalar>(saved+right[r+1] * temp); - saved = left[j-r] * temp; - } - - ndu(j,j) = static_cast<Scalar>(saved); - } - - for (j = p; j>=0; --j) - N_(0,j) = ndu(j,p); - - // Compute the derivatives - DerivativeType a(n+1,p+1); - DenseIndex r=0; - for (; r<=p; ++r) - { - DenseIndex s1,s2; - s1 = 0; s2 = 1; // alternate rows in array a - a(0,0) = 1.0; - - // Compute the k-th derivative - for (DenseIndex k=1; k<=static_cast<DenseIndex>(n); ++k) - { - Scalar d = 0.0; - DenseIndex rk,pk,j1,j2; - rk = r-k; pk = p-k; - - if (r>=k) - { - a(s2,0) = a(s1,0)/ndu(pk+1,rk); - d = a(s2,0)*ndu(rk,pk); - } - - if (rk>=-1) j1 = 1; - else j1 = -rk; - - if (r-1 <= pk) j2 = k-1; - else j2 = p-r; - - for (j=j1; j<=j2; ++j) - { - a(s2,j) = (a(s1,j)-a(s1,j-1))/ndu(pk+1,rk+j); - d += a(s2,j)*ndu(rk+j,pk); - } - - if (r<=pk) - { - a(s2,k) = -a(s1,k-1)/ndu(pk+1,r); - d += a(s2,k)*ndu(r,pk); - } - - N_(k,r) = static_cast<Scalar>(d); - j = s1; s1 = s2; s2 = j; // Switch rows - } - } - - /* Multiply through by the correct factors */ - /* (Eq. [2.9]) */ - r = p; - for (DenseIndex k=1; k<=static_cast<DenseIndex>(n); ++k) - { - for (j=p; j>=0; --j) N_(k,j) *= r; - r *= p-k; - } - } - - template <typename _Scalar, int _Dim, int _Degree> - typename SplineTraits< Spline<_Scalar, _Dim, _Degree> >::BasisDerivativeType - Spline<_Scalar, _Dim, _Degree>::basisFunctionDerivatives(Scalar u, DenseIndex order) const - { - typename SplineTraits<Spline<_Scalar, _Dim, _Degree> >::BasisDerivativeType der; - BasisFunctionDerivativesImpl(u, order, degree(), knots(), der); - return der; - } - - template <typename _Scalar, int _Dim, int _Degree> - template <int DerivativeOrder> - typename SplineTraits< Spline<_Scalar, _Dim, _Degree>, DerivativeOrder >::BasisDerivativeType - Spline<_Scalar, _Dim, _Degree>::basisFunctionDerivatives(Scalar u, DenseIndex order) const - { - typename SplineTraits< Spline<_Scalar, _Dim, _Degree>, DerivativeOrder >::BasisDerivativeType der; - BasisFunctionDerivativesImpl(u, order, degree(), knots(), der); - return der; - } - - template <typename _Scalar, int _Dim, int _Degree> - typename SplineTraits<Spline<_Scalar, _Dim, _Degree> >::BasisDerivativeType - Spline<_Scalar, _Dim, _Degree>::BasisFunctionDerivatives( - const typename Spline<_Scalar, _Dim, _Degree>::Scalar u, - const DenseIndex order, - const DenseIndex degree, - const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots) - { - typename SplineTraits<Spline>::BasisDerivativeType der; - BasisFunctionDerivativesImpl(u, order, degree, knots, der); - return der; - } -} - -#endif // EIGEN_SPLINE_H diff --git a/eigen/unsupported/Eigen/src/Splines/SplineFitting.h b/eigen/unsupported/Eigen/src/Splines/SplineFitting.h deleted file mode 100644 index c761a9b..0000000 --- a/eigen/unsupported/Eigen/src/Splines/SplineFitting.h +++ /dev/null @@ -1,430 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 20010-2011 Hauke Heibel <hauke.heibel@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPLINE_FITTING_H -#define EIGEN_SPLINE_FITTING_H - -#include <algorithm> -#include <functional> -#include <numeric> -#include <vector> - -#include "SplineFwd.h" - -#include <Eigen/LU> -#include <Eigen/QR> - -namespace Eigen -{ - /** - * \brief Computes knot averages. - * \ingroup Splines_Module - * - * The knots are computed as - * \f{align*} - * u_0 & = \hdots = u_p = 0 \\ - * u_{m-p} & = \hdots = u_{m} = 1 \\ - * u_{j+p} & = \frac{1}{p}\sum_{i=j}^{j+p-1}\bar{u}_i \quad\quad j=1,\hdots,n-p - * \f} - * where \f$p\f$ is the degree and \f$m+1\f$ the number knots - * of the desired interpolating spline. - * - * \param[in] parameters The input parameters. During interpolation one for each data point. - * \param[in] degree The spline degree which is used during the interpolation. - * \param[out] knots The output knot vector. - * - * \sa Les Piegl and Wayne Tiller, The NURBS book (2nd ed.), 1997, 9.2.1 Global Curve Interpolation to Point Data - **/ - template <typename KnotVectorType> - void KnotAveraging(const KnotVectorType& parameters, DenseIndex degree, KnotVectorType& knots) - { - knots.resize(parameters.size()+degree+1); - - for (DenseIndex j=1; j<parameters.size()-degree; ++j) - knots(j+degree) = parameters.segment(j,degree).mean(); - - knots.segment(0,degree+1) = KnotVectorType::Zero(degree+1); - knots.segment(knots.size()-degree-1,degree+1) = KnotVectorType::Ones(degree+1); - } - - /** - * \brief Computes knot averages when derivative constraints are present. - * Note that this is a technical interpretation of the referenced article - * since the algorithm contained therein is incorrect as written. - * \ingroup Splines_Module - * - * \param[in] parameters The parameters at which the interpolation B-Spline - * will intersect the given interpolation points. The parameters - * are assumed to be a non-decreasing sequence. - * \param[in] degree The degree of the interpolating B-Spline. This must be - * greater than zero. - * \param[in] derivativeIndices The indices corresponding to parameters at - * which there are derivative constraints. The indices are assumed - * to be a non-decreasing sequence. - * \param[out] knots The calculated knot vector. These will be returned as a - * non-decreasing sequence - * - * \sa Les A. Piegl, Khairan Rajab, Volha Smarodzinana. 2008. - * Curve interpolation with directional constraints for engineering design. - * Engineering with Computers - **/ - template <typename KnotVectorType, typename ParameterVectorType, typename IndexArray> - void KnotAveragingWithDerivatives(const ParameterVectorType& parameters, - const unsigned int degree, - const IndexArray& derivativeIndices, - KnotVectorType& knots) - { - typedef typename ParameterVectorType::Scalar Scalar; - - DenseIndex numParameters = parameters.size(); - DenseIndex numDerivatives = derivativeIndices.size(); - - if (numDerivatives < 1) - { - KnotAveraging(parameters, degree, knots); - return; - } - - DenseIndex startIndex; - DenseIndex endIndex; - - DenseIndex numInternalDerivatives = numDerivatives; - - if (derivativeIndices[0] == 0) - { - startIndex = 0; - --numInternalDerivatives; - } - else - { - startIndex = 1; - } - if (derivativeIndices[numDerivatives - 1] == numParameters - 1) - { - endIndex = numParameters - degree; - --numInternalDerivatives; - } - else - { - endIndex = numParameters - degree - 1; - } - - // There are (endIndex - startIndex + 1) knots obtained from the averaging - // and 2 for the first and last parameters. - DenseIndex numAverageKnots = endIndex - startIndex + 3; - KnotVectorType averageKnots(numAverageKnots); - averageKnots[0] = parameters[0]; - - int newKnotIndex = 0; - for (DenseIndex i = startIndex; i <= endIndex; ++i) - averageKnots[++newKnotIndex] = parameters.segment(i, degree).mean(); - averageKnots[++newKnotIndex] = parameters[numParameters - 1]; - - newKnotIndex = -1; - - ParameterVectorType temporaryParameters(numParameters + 1); - KnotVectorType derivativeKnots(numInternalDerivatives); - for (DenseIndex i = 0; i < numAverageKnots - 1; ++i) - { - temporaryParameters[0] = averageKnots[i]; - ParameterVectorType parameterIndices(numParameters); - int temporaryParameterIndex = 1; - for (DenseIndex j = 0; j < numParameters; ++j) - { - Scalar parameter = parameters[j]; - if (parameter >= averageKnots[i] && parameter < averageKnots[i + 1]) - { - parameterIndices[temporaryParameterIndex] = j; - temporaryParameters[temporaryParameterIndex++] = parameter; - } - } - temporaryParameters[temporaryParameterIndex] = averageKnots[i + 1]; - - for (int j = 0; j <= temporaryParameterIndex - 2; ++j) - { - for (DenseIndex k = 0; k < derivativeIndices.size(); ++k) - { - if (parameterIndices[j + 1] == derivativeIndices[k] - && parameterIndices[j + 1] != 0 - && parameterIndices[j + 1] != numParameters - 1) - { - derivativeKnots[++newKnotIndex] = temporaryParameters.segment(j, 3).mean(); - break; - } - } - } - } - - KnotVectorType temporaryKnots(averageKnots.size() + derivativeKnots.size()); - - std::merge(averageKnots.data(), averageKnots.data() + averageKnots.size(), - derivativeKnots.data(), derivativeKnots.data() + derivativeKnots.size(), - temporaryKnots.data()); - - // Number of knots (one for each point and derivative) plus spline order. - DenseIndex numKnots = numParameters + numDerivatives + degree + 1; - knots.resize(numKnots); - - knots.head(degree).fill(temporaryKnots[0]); - knots.tail(degree).fill(temporaryKnots.template tail<1>()[0]); - knots.segment(degree, temporaryKnots.size()) = temporaryKnots; - } - - /** - * \brief Computes chord length parameters which are required for spline interpolation. - * \ingroup Splines_Module - * - * \param[in] pts The data points to which a spline should be fit. - * \param[out] chord_lengths The resulting chord lenggth vector. - * - * \sa Les Piegl and Wayne Tiller, The NURBS book (2nd ed.), 1997, 9.2.1 Global Curve Interpolation to Point Data - **/ - template <typename PointArrayType, typename KnotVectorType> - void ChordLengths(const PointArrayType& pts, KnotVectorType& chord_lengths) - { - typedef typename KnotVectorType::Scalar Scalar; - - const DenseIndex n = pts.cols(); - - // 1. compute the column-wise norms - chord_lengths.resize(pts.cols()); - chord_lengths[0] = 0; - chord_lengths.rightCols(n-1) = (pts.array().leftCols(n-1) - pts.array().rightCols(n-1)).matrix().colwise().norm(); - - // 2. compute the partial sums - std::partial_sum(chord_lengths.data(), chord_lengths.data()+n, chord_lengths.data()); - - // 3. normalize the data - chord_lengths /= chord_lengths(n-1); - chord_lengths(n-1) = Scalar(1); - } - - /** - * \brief Spline fitting methods. - * \ingroup Splines_Module - **/ - template <typename SplineType> - struct SplineFitting - { - typedef typename SplineType::KnotVectorType KnotVectorType; - typedef typename SplineType::ParameterVectorType ParameterVectorType; - - /** - * \brief Fits an interpolating Spline to the given data points. - * - * \param pts The points for which an interpolating spline will be computed. - * \param degree The degree of the interpolating spline. - * - * \returns A spline interpolating the initially provided points. - **/ - template <typename PointArrayType> - static SplineType Interpolate(const PointArrayType& pts, DenseIndex degree); - - /** - * \brief Fits an interpolating Spline to the given data points. - * - * \param pts The points for which an interpolating spline will be computed. - * \param degree The degree of the interpolating spline. - * \param knot_parameters The knot parameters for the interpolation. - * - * \returns A spline interpolating the initially provided points. - **/ - template <typename PointArrayType> - static SplineType Interpolate(const PointArrayType& pts, DenseIndex degree, const KnotVectorType& knot_parameters); - - /** - * \brief Fits an interpolating spline to the given data points and - * derivatives. - * - * \param points The points for which an interpolating spline will be computed. - * \param derivatives The desired derivatives of the interpolating spline at interpolation - * points. - * \param derivativeIndices An array indicating which point each derivative belongs to. This - * must be the same size as @a derivatives. - * \param degree The degree of the interpolating spline. - * - * \returns A spline interpolating @a points with @a derivatives at those points. - * - * \sa Les A. Piegl, Khairan Rajab, Volha Smarodzinana. 2008. - * Curve interpolation with directional constraints for engineering design. - * Engineering with Computers - **/ - template <typename PointArrayType, typename IndexArray> - static SplineType InterpolateWithDerivatives(const PointArrayType& points, - const PointArrayType& derivatives, - const IndexArray& derivativeIndices, - const unsigned int degree); - - /** - * \brief Fits an interpolating spline to the given data points and derivatives. - * - * \param points The points for which an interpolating spline will be computed. - * \param derivatives The desired derivatives of the interpolating spline at interpolation points. - * \param derivativeIndices An array indicating which point each derivative belongs to. This - * must be the same size as @a derivatives. - * \param degree The degree of the interpolating spline. - * \param parameters The parameters corresponding to the interpolation points. - * - * \returns A spline interpolating @a points with @a derivatives at those points. - * - * \sa Les A. Piegl, Khairan Rajab, Volha Smarodzinana. 2008. - * Curve interpolation with directional constraints for engineering design. - * Engineering with Computers - */ - template <typename PointArrayType, typename IndexArray> - static SplineType InterpolateWithDerivatives(const PointArrayType& points, - const PointArrayType& derivatives, - const IndexArray& derivativeIndices, - const unsigned int degree, - const ParameterVectorType& parameters); - }; - - template <typename SplineType> - template <typename PointArrayType> - SplineType SplineFitting<SplineType>::Interpolate(const PointArrayType& pts, DenseIndex degree, const KnotVectorType& knot_parameters) - { - typedef typename SplineType::KnotVectorType::Scalar Scalar; - typedef typename SplineType::ControlPointVectorType ControlPointVectorType; - - typedef Matrix<Scalar,Dynamic,Dynamic> MatrixType; - - KnotVectorType knots; - KnotAveraging(knot_parameters, degree, knots); - - DenseIndex n = pts.cols(); - MatrixType A = MatrixType::Zero(n,n); - for (DenseIndex i=1; i<n-1; ++i) - { - const DenseIndex span = SplineType::Span(knot_parameters[i], degree, knots); - - // The segment call should somehow be told the spline order at compile time. - A.row(i).segment(span-degree, degree+1) = SplineType::BasisFunctions(knot_parameters[i], degree, knots); - } - A(0,0) = 1.0; - A(n-1,n-1) = 1.0; - - HouseholderQR<MatrixType> qr(A); - - // Here, we are creating a temporary due to an Eigen issue. - ControlPointVectorType ctrls = qr.solve(MatrixType(pts.transpose())).transpose(); - - return SplineType(knots, ctrls); - } - - template <typename SplineType> - template <typename PointArrayType> - SplineType SplineFitting<SplineType>::Interpolate(const PointArrayType& pts, DenseIndex degree) - { - KnotVectorType chord_lengths; // knot parameters - ChordLengths(pts, chord_lengths); - return Interpolate(pts, degree, chord_lengths); - } - - template <typename SplineType> - template <typename PointArrayType, typename IndexArray> - SplineType - SplineFitting<SplineType>::InterpolateWithDerivatives(const PointArrayType& points, - const PointArrayType& derivatives, - const IndexArray& derivativeIndices, - const unsigned int degree, - const ParameterVectorType& parameters) - { - typedef typename SplineType::KnotVectorType::Scalar Scalar; - typedef typename SplineType::ControlPointVectorType ControlPointVectorType; - - typedef Matrix<Scalar, Dynamic, Dynamic> MatrixType; - - const DenseIndex n = points.cols() + derivatives.cols(); - - KnotVectorType knots; - - KnotAveragingWithDerivatives(parameters, degree, derivativeIndices, knots); - - // fill matrix - MatrixType A = MatrixType::Zero(n, n); - - // Use these dimensions for quicker populating, then transpose for solving. - MatrixType b(points.rows(), n); - - DenseIndex startRow; - DenseIndex derivativeStart; - - // End derivatives. - if (derivativeIndices[0] == 0) - { - A.template block<1, 2>(1, 0) << -1, 1; - - Scalar y = (knots(degree + 1) - knots(0)) / degree; - b.col(1) = y*derivatives.col(0); - - startRow = 2; - derivativeStart = 1; - } - else - { - startRow = 1; - derivativeStart = 0; - } - if (derivativeIndices[derivatives.cols() - 1] == points.cols() - 1) - { - A.template block<1, 2>(n - 2, n - 2) << -1, 1; - - Scalar y = (knots(knots.size() - 1) - knots(knots.size() - (degree + 2))) / degree; - b.col(b.cols() - 2) = y*derivatives.col(derivatives.cols() - 1); - } - - DenseIndex row = startRow; - DenseIndex derivativeIndex = derivativeStart; - for (DenseIndex i = 1; i < parameters.size() - 1; ++i) - { - const DenseIndex span = SplineType::Span(parameters[i], degree, knots); - - if (derivativeIndices[derivativeIndex] == i) - { - A.block(row, span - degree, 2, degree + 1) - = SplineType::BasisFunctionDerivatives(parameters[i], 1, degree, knots); - - b.col(row++) = points.col(i); - b.col(row++) = derivatives.col(derivativeIndex++); - } - else - { - A.row(row++).segment(span - degree, degree + 1) - = SplineType::BasisFunctions(parameters[i], degree, knots); - } - } - b.col(0) = points.col(0); - b.col(b.cols() - 1) = points.col(points.cols() - 1); - A(0,0) = 1; - A(n - 1, n - 1) = 1; - - // Solve - FullPivLU<MatrixType> lu(A); - ControlPointVectorType controlPoints = lu.solve(MatrixType(b.transpose())).transpose(); - - SplineType spline(knots, controlPoints); - - return spline; - } - - template <typename SplineType> - template <typename PointArrayType, typename IndexArray> - SplineType - SplineFitting<SplineType>::InterpolateWithDerivatives(const PointArrayType& points, - const PointArrayType& derivatives, - const IndexArray& derivativeIndices, - const unsigned int degree) - { - ParameterVectorType parameters; - ChordLengths(points, parameters); - return InterpolateWithDerivatives(points, derivatives, derivativeIndices, degree, parameters); - } -} - -#endif // EIGEN_SPLINE_FITTING_H diff --git a/eigen/unsupported/Eigen/src/Splines/SplineFwd.h b/eigen/unsupported/Eigen/src/Splines/SplineFwd.h deleted file mode 100644 index 0a95fbf..0000000 --- a/eigen/unsupported/Eigen/src/Splines/SplineFwd.h +++ /dev/null @@ -1,93 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 20010-2011 Hauke Heibel <hauke.heibel@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPLINES_FWD_H -#define EIGEN_SPLINES_FWD_H - -#include <Eigen/Core> - -namespace Eigen -{ - template <typename Scalar, int Dim, int Degree = Dynamic> class Spline; - - template < typename SplineType, int DerivativeOrder = Dynamic > struct SplineTraits {}; - - /** - * \ingroup Splines_Module - * \brief Compile-time attributes of the Spline class for Dynamic degree. - **/ - template <typename _Scalar, int _Dim, int _Degree> - struct SplineTraits< Spline<_Scalar, _Dim, _Degree>, Dynamic > - { - typedef _Scalar Scalar; /*!< The spline curve's scalar type. */ - enum { Dimension = _Dim /*!< The spline curve's dimension. */ }; - enum { Degree = _Degree /*!< The spline curve's degree. */ }; - - enum { OrderAtCompileTime = _Degree==Dynamic ? Dynamic : _Degree+1 /*!< The spline curve's order at compile-time. */ }; - enum { NumOfDerivativesAtCompileTime = OrderAtCompileTime /*!< The number of derivatives defined for the current spline. */ }; - - enum { DerivativeMemoryLayout = Dimension==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ }; - - /** \brief The data type used to store non-zero basis functions. */ - typedef Array<Scalar,1,OrderAtCompileTime> BasisVectorType; - - /** \brief The data type used to store the values of the basis function derivatives. */ - typedef Array<Scalar,Dynamic,Dynamic,RowMajor,NumOfDerivativesAtCompileTime,OrderAtCompileTime> BasisDerivativeType; - - /** \brief The data type used to store the spline's derivative values. */ - typedef Array<Scalar,Dimension,Dynamic,DerivativeMemoryLayout,Dimension,NumOfDerivativesAtCompileTime> DerivativeType; - - /** \brief The point type the spline is representing. */ - typedef Array<Scalar,Dimension,1> PointType; - - /** \brief The data type used to store knot vectors. */ - typedef Array<Scalar,1,Dynamic> KnotVectorType; - - /** \brief The data type used to store parameter vectors. */ - typedef Array<Scalar,1,Dynamic> ParameterVectorType; - - /** \brief The data type representing the spline's control points. */ - typedef Array<Scalar,Dimension,Dynamic> ControlPointVectorType; - }; - - /** - * \ingroup Splines_Module - * \brief Compile-time attributes of the Spline class for fixed degree. - * - * The traits class inherits all attributes from the SplineTraits of Dynamic degree. - **/ - template < typename _Scalar, int _Dim, int _Degree, int _DerivativeOrder > - struct SplineTraits< Spline<_Scalar, _Dim, _Degree>, _DerivativeOrder > : public SplineTraits< Spline<_Scalar, _Dim, _Degree> > - { - enum { OrderAtCompileTime = _Degree==Dynamic ? Dynamic : _Degree+1 /*!< The spline curve's order at compile-time. */ }; - enum { NumOfDerivativesAtCompileTime = _DerivativeOrder==Dynamic ? Dynamic : _DerivativeOrder+1 /*!< The number of derivatives defined for the current spline. */ }; - - enum { DerivativeMemoryLayout = _Dim==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ }; - - /** \brief The data type used to store the values of the basis function derivatives. */ - typedef Array<_Scalar,Dynamic,Dynamic,RowMajor,NumOfDerivativesAtCompileTime,OrderAtCompileTime> BasisDerivativeType; - - /** \brief The data type used to store the spline's derivative values. */ - typedef Array<_Scalar,_Dim,Dynamic,DerivativeMemoryLayout,_Dim,NumOfDerivativesAtCompileTime> DerivativeType; - }; - - /** \brief 2D float B-spline with dynamic degree. */ - typedef Spline<float,2> Spline2f; - - /** \brief 3D float B-spline with dynamic degree. */ - typedef Spline<float,3> Spline3f; - - /** \brief 2D double B-spline with dynamic degree. */ - typedef Spline<double,2> Spline2d; - - /** \brief 3D double B-spline with dynamic degree. */ - typedef Spline<double,3> Spline3d; -} - -#endif // EIGEN_SPLINES_FWD_H |