From 88534ba623421c956d8ffcda2d27f41d704d15ef Mon Sep 17 00:00:00 2001 From: Stanislaw Halik Date: Tue, 3 Jul 2018 07:37:12 +0200 Subject: update eigen --- eigen/.hgignore | 4 - eigen/.hgtags | 4 + eigen/CMakeLists.txt | 23 +- eigen/CTestConfig.cmake | 8 +- eigen/Eigen/Core | 30 +- eigen/Eigen/Geometry | 1 + eigen/Eigen/src/Cholesky/LDLT.h | 1 + eigen/Eigen/src/Cholesky/LLT.h | 1 + eigen/Eigen/src/CholmodSupport/CholmodSupport.h | 71 +- eigen/Eigen/src/Core/ArithmeticSequence.h | 350 ----- eigen/Eigen/src/Core/Array.h | 8 +- eigen/Eigen/src/Core/ArrayBase.h | 2 +- eigen/Eigen/src/Core/ArrayWrapper.h | 6 +- eigen/Eigen/src/Core/Assign.h | 2 +- eigen/Eigen/src/Core/BooleanRedux.h | 44 +- eigen/Eigen/src/Core/CommaInitializer.h | 4 +- eigen/Eigen/src/Core/CoreEvaluators.h | 203 +-- eigen/Eigen/src/Core/CoreIterators.h | 5 - eigen/Eigen/src/Core/CwiseBinaryOp.h | 5 +- eigen/Eigen/src/Core/CwiseNullaryOp.h | 4 +- eigen/Eigen/src/Core/DenseBase.h | 4 - eigen/Eigen/src/Core/Diagonal.h | 10 +- eigen/Eigen/src/Core/DiagonalMatrix.h | 4 +- eigen/Eigen/src/Core/DiagonalProduct.h | 2 +- eigen/Eigen/src/Core/Dot.h | 16 +- eigen/Eigen/src/Core/EigenBase.h | 1 + eigen/Eigen/src/Core/Fuzzy.h | 6 +- eigen/Eigen/src/Core/GeneralProduct.h | 2 +- eigen/Eigen/src/Core/GenericPacketMath.h | 5 - eigen/Eigen/src/Core/GlobalFunctions.h | 1 - eigen/Eigen/src/Core/IndexedView.h | 207 --- eigen/Eigen/src/Core/MathFunctions.h | 274 +--- eigen/Eigen/src/Core/MathFunctionsImpl.h | 7 +- eigen/Eigen/src/Core/MatrixBase.h | 2 +- eigen/Eigen/src/Core/NestByValue.h | 10 +- eigen/Eigen/src/Core/NumTraits.h | 4 +- eigen/Eigen/src/Core/ProductEvaluators.h | 6 + eigen/Eigen/src/Core/Random.h | 2 +- eigen/Eigen/src/Core/Redux.h | 14 +- eigen/Eigen/src/Core/Ref.h | 2 - eigen/Eigen/src/Core/Replicate.h | 4 +- eigen/Eigen/src/Core/ReturnByValue.h | 2 +- eigen/Eigen/src/Core/Reverse.h | 6 +- eigen/Eigen/src/Core/SelfAdjointView.h | 4 +- eigen/Eigen/src/Core/Solve.h | 4 +- eigen/Eigen/src/Core/SolveTriangular.h | 2 +- eigen/Eigen/src/Core/Transpose.h | 10 +- eigen/Eigen/src/Core/TriangularMatrix.h | 15 +- eigen/Eigen/src/Core/VectorwiseOp.h | 4 +- eigen/Eigen/src/Core/arch/AVX/PacketMath.h | 24 +- eigen/Eigen/src/Core/arch/AVX512/PacketMath.h | 200 +-- eigen/Eigen/src/Core/arch/CUDA/Half.h | 77 +- eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h | 12 - eigen/Eigen/src/Core/arch/CUDA/PacketMath.h | 4 +- eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 9 - eigen/Eigen/src/Core/arch/NEON/PacketMath.h | 2 +- eigen/Eigen/src/Core/arch/SSE/PacketMath.h | 66 +- eigen/Eigen/src/Core/functors/NullaryFunctors.h | 11 +- eigen/Eigen/src/Core/functors/UnaryFunctors.h | 44 +- .../Core/products/GeneralMatrixMatrixTriangular.h | 19 +- .../products/GeneralMatrixMatrixTriangular_BLAS.h | 2 +- .../Eigen/src/Core/products/GeneralMatrixVector.h | 680 ++++++--- eigen/Eigen/src/Core/products/SelfadjointProduct.h | 2 +- .../src/Core/products/SelfadjointRank2Update.h | 2 +- eigen/Eigen/src/Core/util/BlasUtil.h | 5 - eigen/Eigen/src/Core/util/Constants.h | 4 - eigen/Eigen/src/Core/util/DisableStupidWarnings.h | 6 +- eigen/Eigen/src/Core/util/ForwardDeclarations.h | 1 - eigen/Eigen/src/Core/util/IndexedViewHelper.h | 187 --- eigen/Eigen/src/Core/util/IntegralConstant.h | 270 ---- eigen/Eigen/src/Core/util/Macros.h | 44 +- eigen/Eigen/src/Core/util/Memory.h | 26 +- eigen/Eigen/src/Core/util/Meta.h | 86 +- eigen/Eigen/src/Core/util/SymbolicIndex.h | 300 ---- eigen/Eigen/src/Core/util/XprHelper.h | 4 +- .../Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h | 4 +- eigen/Eigen/src/Geometry/AlignedBox.h | 2 +- eigen/Eigen/src/Geometry/ParametrizedLine.h | 39 +- eigen/Eigen/src/Geometry/Quaternion.h | 10 +- eigen/Eigen/src/Geometry/Transform.h | 14 +- eigen/Eigen/src/Geometry/arch/Geometry_SSE.h | 60 +- .../IterativeLinearSolvers/BasicPreconditioners.h | 27 +- eigen/Eigen/src/Jacobi/Jacobi.h | 42 +- eigen/Eigen/src/LU/FullPivLU.h | 2 + eigen/Eigen/src/QR/ColPivHouseholderQR.h | 13 +- .../Eigen/src/QR/CompleteOrthogonalDecomposition.h | 2 +- eigen/Eigen/src/QR/FullPivHouseholderQR.h | 9 +- eigen/Eigen/src/QR/HouseholderQR.h | 9 +- eigen/Eigen/src/SVD/BDCSVD.h | 81 +- eigen/Eigen/src/SVD/SVDBase.h | 1 + eigen/Eigen/src/SVD/UpperBidiagonalization.h | 4 +- eigen/Eigen/src/SparseCore/SparseCompressedBase.h | 16 - eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 12 +- eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h | 3 +- eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h | 6 +- eigen/Eigen/src/misc/lapacke.h | 9 +- eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h | 17 +- eigen/Eigen/src/plugins/BlockMethods.h | 576 ++------ eigen/Eigen/src/plugins/IndexedViewMethods.h | 267 ---- eigen/bench/btl/actions/basic_actions.hh | 2 +- eigen/bench/btl/libs/BLAS/blas_interface_impl.hh | 6 +- eigen/bench/btl/libs/BLAS/main.cpp | 2 +- eigen/bench/btl/libs/STL/STL_interface.hh | 24 +- eigen/bench/btl/libs/blaze/blaze_interface.hh | 17 +- eigen/bench/btl/libs/blaze/main.cpp | 6 +- eigen/bench/btl/libs/eigen3/eigen3_interface.hh | 8 +- eigen/bench/btl/libs/eigen3/main_matmat.cpp | 2 +- eigen/bench/perf_monitoring/changesets.txt | 71 - eigen/bench/perf_monitoring/gemm.cpp | 12 - eigen/bench/perf_monitoring/gemm/changesets.txt | 61 + eigen/bench/perf_monitoring/gemm/gemm.cpp | 67 + eigen/bench/perf_monitoring/gemm/gemm_settings.txt | 15 + eigen/bench/perf_monitoring/gemm/lazy_gemm.cpp | 98 ++ .../perf_monitoring/gemm/lazy_gemm_settings.txt | 15 + eigen/bench/perf_monitoring/gemm/make_plot.sh | 38 + eigen/bench/perf_monitoring/gemm/run.sh | 156 ++ eigen/bench/perf_monitoring/gemm_common.h | 67 - eigen/bench/perf_monitoring/gemm_settings.txt | 15 - .../bench/perf_monitoring/gemm_square_settings.txt | 11 - eigen/bench/perf_monitoring/gemv.cpp | 12 - eigen/bench/perf_monitoring/gemv_common.h | 69 - eigen/bench/perf_monitoring/gemv_settings.txt | 11 - .../bench/perf_monitoring/gemv_square_settings.txt | 13 - eigen/bench/perf_monitoring/gemvt.cpp | 12 - eigen/bench/perf_monitoring/lazy_gemm.cpp | 101 -- eigen/bench/perf_monitoring/lazy_gemm_settings.txt | 15 - eigen/bench/perf_monitoring/llt.cpp | 15 - eigen/bench/perf_monitoring/make_plot.sh | 98 -- .../perf_monitoring/resources/chart_footer.html | 37 - .../perf_monitoring/resources/chart_header.html | 46 - eigen/bench/perf_monitoring/resources/footer.html | 3 - eigen/bench/perf_monitoring/resources/header.html | 42 - eigen/bench/perf_monitoring/resources/s1.js | 1 - eigen/bench/perf_monitoring/resources/s2.js | 1 - eigen/bench/perf_monitoring/run.sh | 172 --- eigen/bench/perf_monitoring/runall.sh | 63 - eigen/bench/perf_monitoring/trmv_lo.cpp | 12 - eigen/bench/perf_monitoring/trmv_lot.cpp | 12 - eigen/bench/perf_monitoring/trmv_up.cpp | 12 - eigen/bench/perf_monitoring/trmv_upt.cpp | 12 - eigen/bench/spbench/CMakeLists.txt | 27 +- eigen/bench/tensors/tensor_benchmarks_sycl.cc | 23 +- eigen/blas/CMakeLists.txt | 10 +- eigen/cmake/FindBLAS.cmake | 1499 ++++++++++++++++---- eigen/cmake/FindBLASEXT.cmake | 380 +++++ eigen/cmake/FindComputeCpp.cmake | 2 +- eigen/cmake/FindHWLOC.cmake | 331 +++++ eigen/cmake/FindMetis.cmake | 297 +++- eigen/cmake/FindPTSCOTCH.cmake | 423 ++++++ eigen/cmake/FindPastix.cmake | 713 +++++++++- eigen/cmake/FindScotch.cmake | 379 ++++- eigen/cmake/FindXsmm.cmake | 25 - eigen/debug/gdb/printers.py | 168 +-- eigen/doc/AsciiQuickReference.txt | 2 +- eigen/doc/Doxyfile.in | 5 +- eigen/doc/FixedSizeVectorizable.dox | 10 +- eigen/doc/PassingByValue.dox | 8 +- eigen/doc/TopicCMakeGuide.dox | 10 +- eigen/doc/UnalignedArrayAssert.dox | 9 - eigen/doc/eigendoxy.css | 5 - eigen/scripts/eigen_monitor_perf.sh | 25 - eigen/test/CMakeLists.txt | 32 +- eigen/test/array.cpp | 25 +- eigen/test/array_for_matrix.cpp | 22 +- eigen/test/basicstuff.cpp | 16 - eigen/test/block.cpp | 19 +- eigen/test/cholmod_support.cpp | 42 +- eigen/test/geo_alignedbox.cpp | 11 +- eigen/test/geo_parametrizedline.cpp | 27 - eigen/test/half_float.cpp | 29 +- eigen/test/indexed_view.cpp | 378 ----- eigen/test/lscg.cpp | 8 + eigen/test/main.h | 11 + eigen/test/mixingtypes.cpp | 2 +- eigen/test/numext.cpp | 53 + eigen/test/packetmath.cpp | 1 - eigen/test/product_mmtr.cpp | 11 +- eigen/test/product_notemporary.cpp | 1 + eigen/test/sparse_product.cpp | 4 + eigen/test/symbolic_index.cpp | 104 -- eigen/unsupported/CMakeLists.txt | 10 +- eigen/unsupported/Eigen/CXX11/Tensor | 9 +- eigen/unsupported/Eigen/CXX11/ThreadPool | 13 - eigen/unsupported/Eigen/CXX11/src/Tensor/README.md | 8 +- .../Eigen/CXX11/src/Tensor/TensorBase.h | 6 - .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 30 +- .../Eigen/CXX11/src/Tensor/TensorConcatenation.h | 6 - .../Eigen/CXX11/src/Tensor/TensorContraction.h | 287 +--- .../CXX11/src/Tensor/TensorContractionBlocking.h | 134 -- .../Eigen/CXX11/src/Tensor/TensorContractionCuda.h | 101 +- .../CXX11/src/Tensor/TensorContractionMapper.h | 76 +- .../Eigen/CXX11/src/Tensor/TensorContractionSycl.h | 400 ------ .../CXX11/src/Tensor/TensorContractionThreadPool.h | 208 +-- .../Eigen/CXX11/src/Tensor/TensorConversion.h | 3 - .../Eigen/CXX11/src/Tensor/TensorConvolution.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h | 476 ------- .../Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 7 +- .../Eigen/CXX11/src/Tensor/TensorDeviceDefault.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 413 +----- .../CXX11/src/Tensor/TensorDeviceThreadPool.h | 13 +- .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 12 +- .../Eigen/CXX11/src/Tensor/TensorEvalTo.h | 5 +- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 11 +- .../unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 41 +- .../CXX11/src/Tensor/TensorForwardDeclarations.h | 14 +- .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorIntDiv.h | 12 +- .../Eigen/CXX11/src/Tensor/TensorMacros.h | 8 - .../Eigen/CXX11/src/Tensor/TensorMeta.h | 5 +- .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 53 +- .../Eigen/CXX11/src/Tensor/TensorPadding.h | 7 - .../Eigen/CXX11/src/Tensor/TensorReduction.h | 30 +- .../Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 1 + .../Eigen/CXX11/src/Tensor/TensorReductionSycl.h | 179 ++- .../Eigen/CXX11/src/Tensor/TensorReverse.h | 5 - .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 10 +- .../Eigen/CXX11/src/Tensor/TensorStorage.h | 8 +- .../Eigen/CXX11/src/Tensor/TensorStriding.h | 16 +- .../Eigen/CXX11/src/Tensor/TensorSycl.h | 12 - .../Tensor/TensorSyclConvertToDeviceExpression.h | 54 +- .../CXX11/src/Tensor/TensorSyclExprConstructor.h | 188 +-- .../CXX11/src/Tensor/TensorSyclExtractAccessor.h | 265 ++-- .../CXX11/src/Tensor/TensorSyclExtractFunctors.h | 322 ++--- .../Eigen/CXX11/src/Tensor/TensorSyclFunctors.h | 245 ---- .../Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h | 138 +- .../CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h | 57 +- .../Eigen/CXX11/src/Tensor/TensorSyclRun.h | 77 +- .../Eigen/CXX11/src/Tensor/TensorSyclTuple.h | 2 - .../Eigen/CXX11/src/Tensor/TensorTraits.h | 6 - .../Eigen/CXX11/src/Tensor/TensorUInt128.h | 1 - .../CXX11/src/ThreadPool/NonBlockingThreadPool.h | 142 +- .../Eigen/CXX11/src/ThreadPool/RunQueue.h | 7 - .../Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h | 8 - .../Eigen/CXX11/src/ThreadPool/ThreadCancel.h | 23 - .../Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h | 2 - .../CXX11/src/ThreadPool/ThreadPoolInterface.h | 6 - eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h | 6 +- .../Eigen/CXX11/src/util/EmulateArray.h | 13 +- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 7 + .../Eigen/src/EulerAngles/EulerAngles.h | 257 ++-- .../Eigen/src/EulerAngles/EulerSystem.h | 184 +-- .../Eigen/src/MatrixFunctions/MatrixFunction.h | 11 +- .../Eigen/src/MatrixFunctions/MatrixLogarithm.h | 2 +- .../unsupported/Eigen/src/Polynomials/Companion.h | 50 +- .../Eigen/src/Polynomials/PolynomialSolver.h | 18 +- eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h | 89 +- .../src/SpecialFunctions/SpecialFunctionsImpl.h | 8 +- eigen/unsupported/doc/examples/EulerAngles.cpp | 4 +- eigen/unsupported/test/CMakeLists.txt | 21 - eigen/unsupported/test/EulerAngles.cpp | 296 ++-- eigen/unsupported/test/autodiff_scalar.cpp | 15 + .../test/cxx11_non_blocking_thread_pool.cpp | 24 +- .../test/cxx11_tensor_broadcast_sycl.cpp | 114 +- .../test/cxx11_tensor_builtins_sycl.cpp | 267 ---- eigen/unsupported/test/cxx11_tensor_chipping.cpp | 8 +- .../test/cxx11_tensor_chipping_sycl.cpp | 622 -------- .../test/cxx11_tensor_concatenation_sycl.cpp | 180 --- .../test/cxx11_tensor_contract_sycl.cpp | 290 ---- .../test/cxx11_tensor_convolution_sycl.cpp | 469 ------ .../unsupported/test/cxx11_tensor_device_sycl.cpp | 60 +- eigen/unsupported/test/cxx11_tensor_expr.cpp | 46 - eigen/unsupported/test/cxx11_tensor_fixed_size.cpp | 2 +- .../test/cxx11_tensor_forced_eval_sycl.cpp | 54 +- .../test/cxx11_tensor_morphing_sycl.cpp | 248 ---- .../unsupported/test/cxx11_tensor_notification.cpp | 17 +- .../test/cxx11_tensor_of_float16_cuda.cu | 6 - .../unsupported/test/cxx11_tensor_padding_sycl.cpp | 157 -- .../test/cxx11_tensor_reduction_sycl.cpp | 167 +-- .../unsupported/test/cxx11_tensor_reverse_sycl.cpp | 221 --- .../test/cxx11_tensor_shuffling_sycl.cpp | 119 -- .../test/cxx11_tensor_striding_sycl.cpp | 203 --- eigen/unsupported/test/cxx11_tensor_sycl.cpp | 219 +-- eigen/unsupported/test/polynomialsolver.cpp | 34 +- eigen/unsupported/test/sparse_extra.cpp | 23 - 276 files changed, 7075 insertions(+), 12961 deletions(-) delete mode 100644 eigen/Eigen/src/Core/ArithmeticSequence.h delete mode 100644 eigen/Eigen/src/Core/IndexedView.h delete mode 100644 eigen/Eigen/src/Core/util/IndexedViewHelper.h delete mode 100644 eigen/Eigen/src/Core/util/IntegralConstant.h delete mode 100644 eigen/Eigen/src/Core/util/SymbolicIndex.h delete mode 100644 eigen/Eigen/src/plugins/IndexedViewMethods.h delete mode 100644 eigen/bench/perf_monitoring/changesets.txt delete mode 100644 eigen/bench/perf_monitoring/gemm.cpp create mode 100644 eigen/bench/perf_monitoring/gemm/changesets.txt create mode 100644 eigen/bench/perf_monitoring/gemm/gemm.cpp create mode 100644 eigen/bench/perf_monitoring/gemm/gemm_settings.txt create mode 100644 eigen/bench/perf_monitoring/gemm/lazy_gemm.cpp create mode 100644 eigen/bench/perf_monitoring/gemm/lazy_gemm_settings.txt create mode 100644 eigen/bench/perf_monitoring/gemm/make_plot.sh create mode 100644 eigen/bench/perf_monitoring/gemm/run.sh delete mode 100644 eigen/bench/perf_monitoring/gemm_common.h delete mode 100644 eigen/bench/perf_monitoring/gemm_settings.txt delete mode 100644 eigen/bench/perf_monitoring/gemm_square_settings.txt delete mode 100644 eigen/bench/perf_monitoring/gemv.cpp delete mode 100644 eigen/bench/perf_monitoring/gemv_common.h delete mode 100644 eigen/bench/perf_monitoring/gemv_settings.txt delete mode 100644 eigen/bench/perf_monitoring/gemv_square_settings.txt delete mode 100644 eigen/bench/perf_monitoring/gemvt.cpp delete mode 100644 eigen/bench/perf_monitoring/lazy_gemm.cpp delete mode 100644 eigen/bench/perf_monitoring/lazy_gemm_settings.txt delete mode 100644 eigen/bench/perf_monitoring/llt.cpp delete mode 100644 eigen/bench/perf_monitoring/make_plot.sh delete mode 100644 eigen/bench/perf_monitoring/resources/chart_footer.html delete mode 100644 eigen/bench/perf_monitoring/resources/chart_header.html delete mode 100644 eigen/bench/perf_monitoring/resources/footer.html delete mode 100644 eigen/bench/perf_monitoring/resources/header.html delete mode 100644 eigen/bench/perf_monitoring/resources/s1.js delete mode 100644 eigen/bench/perf_monitoring/resources/s2.js delete mode 100644 eigen/bench/perf_monitoring/run.sh delete mode 100644 eigen/bench/perf_monitoring/runall.sh delete mode 100644 eigen/bench/perf_monitoring/trmv_lo.cpp delete mode 100644 eigen/bench/perf_monitoring/trmv_lot.cpp delete mode 100644 eigen/bench/perf_monitoring/trmv_up.cpp delete mode 100644 eigen/bench/perf_monitoring/trmv_upt.cpp create mode 100644 eigen/cmake/FindBLASEXT.cmake create mode 100644 eigen/cmake/FindHWLOC.cmake create mode 100644 eigen/cmake/FindPTSCOTCH.cmake delete mode 100644 eigen/cmake/FindXsmm.cmake delete mode 100644 eigen/scripts/eigen_monitor_perf.sh delete mode 100644 eigen/test/indexed_view.cpp create mode 100644 eigen/test/numext.cpp delete mode 100644 eigen/test/symbolic_index.cpp delete mode 100644 eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h delete mode 100644 eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h delete mode 100644 eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclFunctors.h delete mode 100644 eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadCancel.h delete mode 100644 eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp (limited to 'eigen') diff --git a/eigen/.hgignore b/eigen/.hgignore index dcd9f44..769a47f 100644 --- a/eigen/.hgignore +++ b/eigen/.hgignore @@ -28,11 +28,7 @@ activity.png *.rej log patch -*.patch a a.* lapack/testing lapack/reference -.*project -.settings -Makefile diff --git a/eigen/.hgtags b/eigen/.hgtags index 7036de1..32ec946 100644 --- a/eigen/.hgtags +++ b/eigen/.hgtags @@ -27,3 +27,7 @@ ce5a455b34c0a0ac3545a1497cb4a16c38ed90e8 3.3-beta1 69d418c0699907bcd0bf9e0b3ba0a112ed091d85 3.3-beta2 bef509908b9da05d0d07ffc0da105e2c8c6d3996 3.3-rc1 04ab5fa4b241754afcf631117572276444c67239 3.3-rc2 +26667be4f70baf4f0d39e96f330714c87b399090 3.3.0 +f562a193118d4f40514e2f4a0ace6e974926ef06 3.3.1 +da9b4e14c2550e0d11078a3c39e6d56eba9905df 3.3.2 +67e894c6cd8f5f1f604b27d37ed47fdf012674ff 3.3.3 diff --git a/eigen/CMakeLists.txt b/eigen/CMakeLists.txt index fe4227c..f584002 100644 --- a/eigen/CMakeLists.txt +++ b/eigen/CMakeLists.txt @@ -28,7 +28,7 @@ endif() ############################################################################# -# retrieve version information # +# retrieve version infomation # ############################################################################# # automatically parse the version number @@ -416,15 +416,16 @@ add_subdirectory(Eigen) add_subdirectory(doc EXCLUDE_FROM_ALL) -option(BUILD_TESTING "Enable creation of Eigen tests." ON) -if(BUILD_TESTING) - include(EigenConfigureTesting) +include(EigenConfigureTesting) - if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) - add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest - else() - add_subdirectory(test EXCLUDE_FROM_ALL) - endif() +# fixme, not sure this line is still needed: +enable_testing() # must be called from the root CMakeLists, see man page + + +if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest +else() + add_subdirectory(test EXCLUDE_FROM_ALL) endif() if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) @@ -460,9 +461,7 @@ endif(NOT WIN32) configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY) -if(BUILD_TESTING) - ei_testing_print_summary() -endif() +ei_testing_print_summary() message(STATUS "") message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}") diff --git a/eigen/CTestConfig.cmake b/eigen/CTestConfig.cmake index 4c00278..755b473 100644 --- a/eigen/CTestConfig.cmake +++ b/eigen/CTestConfig.cmake @@ -4,14 +4,10 @@ ## # The following are required to uses Dart and the Cdash dashboard ## ENABLE_TESTING() ## INCLUDE(CTest) -set(CTEST_PROJECT_NAME "Eigen") +set(CTEST_PROJECT_NAME "Eigen3.3") set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC") set(CTEST_DROP_METHOD "http") set(CTEST_DROP_SITE "manao.inria.fr") -set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen") +set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.3") set(CTEST_DROP_SITE_CDASH TRUE) -set(CTEST_PROJECT_SUBPROJECTS -Official -Unsupported -) diff --git a/eigen/Eigen/Core b/eigen/Eigen/Core index d188356..0f7fa63 100644 --- a/eigen/Eigen/Core +++ b/eigen/Eigen/Core @@ -43,8 +43,10 @@ #else #define EIGEN_DEVICE_FUNC #endif + #else #define EIGEN_DEVICE_FUNC + #endif // When compiling CUDA device code with NVCC, pull in math functions from the @@ -141,24 +143,15 @@ #endif #ifdef __AVX2__ #define EIGEN_VECTORIZE_AVX2 - #define EIGEN_VECTORIZE_AVX - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 #endif #ifdef __FMA__ #define EIGEN_VECTORIZE_FMA #endif - #if defined(__AVX512F__) + #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512) #define EIGEN_VECTORIZE_AVX512 #define EIGEN_VECTORIZE_AVX2 #define EIGEN_VECTORIZE_AVX #define EIGEN_VECTORIZE_FMA - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 #ifdef __AVX512DQ__ #define EIGEN_VECTORIZE_AVX512DQ #endif @@ -290,15 +283,6 @@ #include #endif -#if defined(__SYCL_DEVICE_ONLY__) - #undef min - #undef max - #undef isnan - #undef isinf - #undef isfinite - #include -#endif - /** \brief Namespace containing all symbols from the %Eigen library. */ namespace Eigen { @@ -363,9 +347,6 @@ using std::ptrdiff_t; #include "src/Core/util/StaticAssert.h" #include "src/Core/util/XprHelper.h" #include "src/Core/util/Memory.h" -#include "src/Core/util/IntegralConstant.h" -#include "src/Core/util/SymbolicIndex.h" - #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" @@ -376,8 +357,6 @@ using std::ptrdiff_t; #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/AVX/PacketMath.h" #include "src/Core/arch/AVX512/PacketMath.h" - #include "src/Core/arch/SSE/MathFunctions.h" - #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX512/MathFunctions.h" #elif defined EIGEN_VECTORIZE_AVX // Use AVX for floats and doubles, SSE for integers @@ -430,8 +409,6 @@ using std::ptrdiff_t; // on CUDA devices #include "src/Core/arch/CUDA/Complex.h" -#include "src/Core/util/IndexedViewHelper.h" -#include "src/Core/ArithmeticSequence.h" #include "src/Core/IO.h" #include "src/Core/DenseCoeffsBase.h" #include "src/Core/DenseBase.h" @@ -473,7 +450,6 @@ using std::ptrdiff_t; #include "src/Core/Ref.h" #include "src/Core/Block.h" #include "src/Core/VectorBlock.h" -#include "src/Core/IndexedView.h" #include "src/Core/Transpose.h" #include "src/Core/DiagonalMatrix.h" #include "src/Core/Diagonal.h" diff --git a/eigen/Eigen/Geometry b/eigen/Eigen/Geometry index 131a4ed..716d529 100644 --- a/eigen/Eigen/Geometry +++ b/eigen/Eigen/Geometry @@ -59,3 +59,4 @@ #endif // EIGEN_GEOMETRY_MODULE_H /* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/eigen/Eigen/src/Cholesky/LDLT.h b/eigen/Eigen/src/Cholesky/LDLT.h index 9b4fdb4..fcee7b2 100644 --- a/eigen/Eigen/src/Cholesky/LDLT.h +++ b/eigen/Eigen/src/Cholesky/LDLT.h @@ -258,6 +258,7 @@ template class LDLT #ifndef EIGEN_PARSED_BY_DOXYGEN template + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif diff --git a/eigen/Eigen/src/Cholesky/LLT.h b/eigen/Eigen/src/Cholesky/LLT.h index e6c02d8..87ca8d4 100644 --- a/eigen/Eigen/src/Cholesky/LLT.h +++ b/eigen/Eigen/src/Cholesky/LLT.h @@ -200,6 +200,7 @@ template class LLT #ifndef EIGEN_PARSED_BY_DOXYGEN template + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif diff --git a/eigen/Eigen/src/CholmodSupport/CholmodSupport.h b/eigen/Eigen/src/CholmodSupport/CholmodSupport.h index 61faf43..5719720 100644 --- a/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/eigen/Eigen/src/CholmodSupport/CholmodSupport.h @@ -32,7 +32,7 @@ template<> struct cholmod_configure_matrix > { } }; -// Other scalar types are not yet supported by Cholmod +// Other scalar types are not yet suppotred by Cholmod // template<> struct cholmod_configure_matrix { // template // static void run(CholmodType& mat) { @@ -124,9 +124,6 @@ cholmod_sparse viewAsCholmod(const SparseSelfAdjointView::IsComplex == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); - if(_Options & RowMajorBit) res.stype *=-1; return res; } @@ -162,44 +159,6 @@ MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); } -namespace internal { - -// template specializations for int and long that call the correct cholmod method - -#define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \ - template ret cm_ ## name (cholmod_common &Common) { return cholmod_ ## name (&Common); } \ - template<> ret cm_ ## name (cholmod_common &Common) { return cholmod_l_ ## name (&Common); } - -#define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \ - template ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_ ## name (&a1, &Common); } \ - template<> ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); } - -EIGEN_CHOLMOD_SPECIALIZE0(int, start) -EIGEN_CHOLMOD_SPECIALIZE0(int, finish) - -EIGEN_CHOLMOD_SPECIALIZE1(int, free_factor, cholmod_factor*, L) -EIGEN_CHOLMOD_SPECIALIZE1(int, free_dense, cholmod_dense*, X) -EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A) - -EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A) - -template cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_solve (sys, &L, &B, &Common); } -template<> cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_l_solve (sys, &L, &B, &Common); } - -template cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve (sys, &L, &B, &Common); } -template<> cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); } - -template -int cm_factorize_p (cholmod_sparse* A, double beta[2], _StorageIndex* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p (A, beta, fset, fsize, L, &Common); } -template<> -int cm_factorize_p (cholmod_sparse* A, double beta[2], long* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); } - -#undef EIGEN_CHOLMOD_SPECIALIZE0 -#undef EIGEN_CHOLMOD_SPECIALIZE1 - -} // namespace internal - - enum CholmodMode { CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt }; @@ -236,7 +195,7 @@ class CholmodBase : public SparseSolverBase { EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); m_shiftOffset[0] = m_shiftOffset[1] = 0.0; - internal::cm_start(m_cholmod); + cholmod_start(&m_cholmod); } explicit CholmodBase(const MatrixType& matrix) @@ -244,15 +203,15 @@ class CholmodBase : public SparseSolverBase { EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); m_shiftOffset[0] = m_shiftOffset[1] = 0.0; - internal::cm_start(m_cholmod); + cholmod_start(&m_cholmod); compute(matrix); } ~CholmodBase() { if(m_cholmodFactor) - internal::cm_free_factor(m_cholmodFactor, m_cholmod); - internal::cm_finish(m_cholmod); + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); + cholmod_finish(&m_cholmod); } inline StorageIndex cols() const { return internal::convert_index(m_cholmodFactor->n); } @@ -260,7 +219,7 @@ class CholmodBase : public SparseSolverBase /** \brief Reports whether previous computation was successful. * - * \returns \c Success if computation was successful, + * \returns \c Success if computation was succesful, * \c NumericalIssue if the matrix.appears to be negative. */ ComputationInfo info() const @@ -287,11 +246,11 @@ class CholmodBase : public SparseSolverBase { if(m_cholmodFactor) { - internal::cm_free_factor(m_cholmodFactor, m_cholmod); + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); m_cholmodFactor = 0; } cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); - m_cholmodFactor = internal::cm_analyze(A, m_cholmod); + m_cholmodFactor = cholmod_analyze(&A, &m_cholmod); this->m_isInitialized = true; this->m_info = Success; @@ -309,7 +268,7 @@ class CholmodBase : public SparseSolverBase { eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); - internal::cm_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, m_cholmod); + cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod); // If the factorization failed, minor is the column at which it did. On success minor == n. this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); @@ -330,20 +289,19 @@ class CholmodBase : public SparseSolverBase EIGEN_UNUSED_VARIABLE(size); eigen_assert(size==b.rows()); - // Cholmod needs column-major storage without inner-stride, which corresponds to the default behavior of Ref. + // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref. Ref > b_ref(b.derived()); cholmod_dense b_cd = viewAsCholmod(b_ref); - cholmod_dense* x_cd = internal::cm_solve(CHOLMOD_A, *m_cholmodFactor, b_cd, m_cholmod); + cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); if(!x_cd) { this->m_info = NumericalIssue; return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) - // NOTE Actually, the copy can be avoided by calling cholmod_solve2 instead of cholmod_solve dest = Matrix::Map(reinterpret_cast(x_cd->x),b.rows(),b.cols()); - internal::cm_free_dense(x_cd, m_cholmod); + cholmod_free_dense(&x_cd, &m_cholmod); } /** \internal */ @@ -358,16 +316,15 @@ class CholmodBase : public SparseSolverBase // note: cs stands for Cholmod Sparse Ref > b_ref(b.const_cast_derived()); cholmod_sparse b_cs = viewAsCholmod(b_ref); - cholmod_sparse* x_cs = internal::cm_spsolve(CHOLMOD_A, *m_cholmodFactor, b_cs, m_cholmod); + cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod); if(!x_cs) { this->m_info = NumericalIssue; return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) - // NOTE cholmod_spsolve in fact just calls the dense solver for blocks of 4 columns at a time (similar to Eigen's sparse solver) dest.derived() = viewAsEigen(*x_cs); - internal::cm_free_sparse(x_cs, m_cholmod); + cholmod_free_sparse(&x_cs, &m_cholmod); } #endif // EIGEN_PARSED_BY_DOXYGEN diff --git a/eigen/Eigen/src/Core/ArithmeticSequence.h b/eigen/Eigen/src/Core/ArithmeticSequence.h deleted file mode 100644 index ada1571..0000000 --- a/eigen/Eigen/src/Core/ArithmeticSequence.h +++ /dev/null @@ -1,350 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ARITHMETIC_SEQUENCE_H -#define EIGEN_ARITHMETIC_SEQUENCE_H - -namespace Eigen { - -namespace internal { - -#if (!EIGEN_HAS_CXX11) || !((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48) -template struct aseq_negate {}; - -template<> struct aseq_negate { - typedef Index type; -}; - -template struct aseq_negate > { - typedef FixedInt<-N> type; -}; - -// Compilation error in the following case: -template<> struct aseq_negate > {}; - -template::value, - bool SizeIsSymbolic =Symbolic::is_symbolic::value> -struct aseq_reverse_first_type { - typedef Index type; -}; - -template -struct aseq_reverse_first_type { - typedef Symbolic::AddExpr > >, - Symbolic::ValueExpr > - > type; -}; - -template -struct aseq_reverse_first_type_aux { - typedef Index type; -}; - -template -struct aseq_reverse_first_type_aux::type> { - typedef FixedInt<(SizeType::value-1)*IncrType::value> type; -}; - -template -struct aseq_reverse_first_type { - typedef typename aseq_reverse_first_type_aux::type Aux; - typedef Symbolic::AddExpr > type; -}; - -template -struct aseq_reverse_first_type { - typedef Symbolic::AddExpr > >, - Symbolic::ValueExpr >, - Symbolic::ValueExpr<> > type; -}; -#endif - -// Helper to cleanup the type of the increment: -template struct cleanup_seq_incr { - typedef typename cleanup_index_type::type type; -}; - -} - -//-------------------------------------------------------------------------------- -// seq(first,last,incr) and seqN(first,size,incr) -//-------------------------------------------------------------------------------- - -template > -class ArithmeticSequence; - -template -ArithmeticSequence::type, - typename internal::cleanup_index_type::type, - typename internal::cleanup_seq_incr::type > -seqN(FirstType first, SizeType size, IncrType incr); - -/** \class ArithmeticSequence - * \ingroup Core_Module - * - * This class represents an arithmetic progression \f$ a_0, a_1, a_2, ..., a_{n-1}\f$ defined by - * its \em first value \f$ a_0 \f$, its \em size (aka length) \em n, and the \em increment (aka stride) - * that is equal to \f$ a_{i+1}-a_{i}\f$ for any \em i. - * - * It is internally used as the return type of the Eigen::seq and Eigen::seqN functions, and as the input arguments - * of DenseBase::operator()(const RowIndices&, const ColIndices&), and most of the time this is the - * only way it is used. - * - * \tparam FirstType type of the first element, usually an Index, - * but internally it can be a symbolic expression - * \tparam SizeType type representing the size of the sequence, usually an Index - * or a compile time integral constant. Internally, it can also be a symbolic expression - * \tparam IncrType type of the increment, can be a runtime Index, or a compile time integral constant (default is compile-time 1) - * - * \sa Eigen::seq, Eigen::seqN, DenseBase::operator()(const RowIndices&, const ColIndices&), class IndexedView - */ -template -class ArithmeticSequence -{ -public: - ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {} - ArithmeticSequence(FirstType first, SizeType size, IncrType incr) : m_first(first), m_size(size), m_incr(incr) {} - - enum { - SizeAtCompileTime = internal::get_fixed_value::value, - IncrAtCompileTime = internal::get_fixed_value::value - }; - - /** \returns the size, i.e., number of elements, of the sequence */ - Index size() const { return m_size; } - - /** \returns the first element \f$ a_0 \f$ in the sequence */ - Index first() const { return m_first; } - - /** \returns the value \f$ a_i \f$ at index \a i in the sequence. */ - Index operator[](Index i) const { return m_first + i * m_incr; } - - const FirstType& firstObject() const { return m_first; } - const SizeType& sizeObject() const { return m_size; } - const IncrType& incrObject() const { return m_incr; } - -protected: - FirstType m_first; - SizeType m_size; - IncrType m_incr; - -public: - -#if EIGEN_HAS_CXX11 && ((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48) - auto reverse() const -> decltype(Eigen::seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr)) { - return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr); - } -#else -protected: - typedef typename internal::aseq_negate::type ReverseIncrType; - typedef typename internal::aseq_reverse_first_type::type ReverseFirstType; -public: - ArithmeticSequence - reverse() const { - return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr); - } -#endif -}; - -/** \returns an ArithmeticSequence starting at \a first, of length \a size, and increment \a incr - * - * \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */ -template -ArithmeticSequence::type,typename internal::cleanup_index_type::type,typename internal::cleanup_seq_incr::type > -seqN(FirstType first, SizeType size, IncrType incr) { - return ArithmeticSequence::type,typename internal::cleanup_index_type::type,typename internal::cleanup_seq_incr::type>(first,size,incr); -} - -/** \returns an ArithmeticSequence starting at \a first, of length \a size, and unit increment - * - * \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType) */ -template -ArithmeticSequence::type,typename internal::cleanup_index_type::type > -seqN(FirstType first, SizeType size) { - return ArithmeticSequence::type,typename internal::cleanup_index_type::type>(first,size); -} - -#ifdef EIGEN_PARSED_BY_DOXYGEN - -/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and with positive (or negative) increment \a incr - * - * It is essentially an alias to: - * \code - * seqN(f, (l-f+incr)/incr, incr); - * \endcode - * - * \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType) - */ -template -auto seq(FirstType f, LastType l, IncrType incr); - -/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and unit increment - * - * It is essentially an alias to: - * \code - * seqN(f,l-f+1); - * \endcode - * - * \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) - */ -template -auto seq(FirstType f, LastType l); - -#else // EIGEN_PARSED_BY_DOXYGEN - -#if EIGEN_HAS_CXX11 -template -auto seq(FirstType f, LastType l) -> decltype(seqN(typename internal::cleanup_index_type::type(f), - ( typename internal::cleanup_index_type::type(l) - - typename internal::cleanup_index_type::type(f)+fix<1>()))) -{ - return seqN(typename internal::cleanup_index_type::type(f), - (typename internal::cleanup_index_type::type(l) - -typename internal::cleanup_index_type::type(f)+fix<1>())); -} - -template -auto seq(FirstType f, LastType l, IncrType incr) - -> decltype(seqN(typename internal::cleanup_index_type::type(f), - ( typename internal::cleanup_index_type::type(l) - - typename internal::cleanup_index_type::type(f)+typename internal::cleanup_seq_incr::type(incr) - ) / typename internal::cleanup_seq_incr::type(incr), - typename internal::cleanup_seq_incr::type(incr))) -{ - typedef typename internal::cleanup_seq_incr::type CleanedIncrType; - return seqN(typename internal::cleanup_index_type::type(f), - ( typename internal::cleanup_index_type::type(l) - -typename internal::cleanup_index_type::type(f)+CleanedIncrType(incr)) / CleanedIncrType(incr), - CleanedIncrType(incr)); -} -#else - -template -typename internal::enable_if::value || Symbolic::is_symbolic::value), - ArithmeticSequence::type,Index> >::type -seq(FirstType f, LastType l) -{ - return seqN(typename internal::cleanup_index_type::type(f), - Index((typename internal::cleanup_index_type::type(l)-typename internal::cleanup_index_type::type(f)+fix<1>()))); -} - -template -typename internal::enable_if::value, - ArithmeticSequence,Symbolic::ValueExpr<> >, - Symbolic::ValueExpr > > > >::type -seq(const Symbolic::BaseExpr &f, LastType l) -{ - return seqN(f.derived(),(typename internal::cleanup_index_type::type(l)-f.derived()+fix<1>())); -} - -template -typename internal::enable_if::value, - ArithmeticSequence::type, - Symbolic::AddExpr >, - Symbolic::ValueExpr > > > >::type -seq(FirstType f, const Symbolic::BaseExpr &l) -{ - return seqN(typename internal::cleanup_index_type::type(f),(l.derived()-typename internal::cleanup_index_type::type(f)+fix<1>())); -} - -template -ArithmeticSequence >,Symbolic::ValueExpr > > > -seq(const Symbolic::BaseExpr &f, const Symbolic::BaseExpr &l) -{ - return seqN(f.derived(),(l.derived()-f.derived()+fix<1>())); -} - - -template -typename internal::enable_if::value || Symbolic::is_symbolic::value), - ArithmeticSequence::type,Index,typename internal::cleanup_seq_incr::type> >::type -seq(FirstType f, LastType l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr::type CleanedIncrType; - return seqN(typename internal::cleanup_index_type::type(f), - Index((typename internal::cleanup_index_type::type(l)-typename internal::cleanup_index_type::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr)), incr); -} - -template -typename internal::enable_if::value, - ArithmeticSequence, - Symbolic::ValueExpr<> >, - Symbolic::ValueExpr::type> >, - Symbolic::ValueExpr::type> >, - typename internal::cleanup_seq_incr::type> >::type -seq(const Symbolic::BaseExpr &f, LastType l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr::type CleanedIncrType; - return seqN(f.derived(),(typename internal::cleanup_index_type::type(l)-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr); -} - -template -typename internal::enable_if::value, - ArithmeticSequence::type, - Symbolic::QuotientExpr >, - Symbolic::ValueExpr::type> >, - Symbolic::ValueExpr::type> >, - typename internal::cleanup_seq_incr::type> >::type -seq(FirstType f, const Symbolic::BaseExpr &l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr::type CleanedIncrType; - return seqN(typename internal::cleanup_index_type::type(f), - (l.derived()-typename internal::cleanup_index_type::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr), incr); -} - -template -ArithmeticSequence >, - Symbolic::ValueExpr::type> >, - Symbolic::ValueExpr::type> >, - typename internal::cleanup_seq_incr::type> -seq(const Symbolic::BaseExpr &f, const Symbolic::BaseExpr &l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr::type CleanedIncrType; - return seqN(f.derived(),(l.derived()-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr); -} -#endif - -#endif // EIGEN_PARSED_BY_DOXYGEN - -namespace internal { - -// Convert a symbolic span into a usable one (i.e., remove last/end "keywords") -template -struct make_size_type { - typedef typename internal::conditional::value, Index, T>::type type; -}; - -template -struct IndexedViewCompatibleType, XprSize> { - typedef ArithmeticSequence::type,IncrType> type; -}; - -template -ArithmeticSequence::type,IncrType> -makeIndexedViewCompatible(const ArithmeticSequence& ids, Index size,SpecializedType) { - return ArithmeticSequence::type,IncrType>( - eval_expr_given_size(ids.firstObject(),size),eval_expr_given_size(ids.sizeObject(),size),ids.incrObject()); -} - -template -struct get_compile_time_incr > { - enum { value = get_fixed_value::value }; -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_ARITHMETIC_SEQUENCE_H diff --git a/eigen/Eigen/src/Core/Array.h b/eigen/Eigen/src/Core/Array.h index 0d34269..e10020d 100644 --- a/eigen/Eigen/src/Core/Array.h +++ b/eigen/Eigen/src/Core/Array.h @@ -231,10 +231,16 @@ class Array : Base(other) { } + private: + struct PrivateType {}; + public: + /** \sa MatrixBase::operator=(const EigenBase&) */ template EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Array(const EigenBase &other) + EIGEN_STRONG_INLINE Array(const EigenBase &other, + typename internal::enable_if::value, + PrivateType>::type = PrivateType()) : Base(other.derived()) { } diff --git a/eigen/Eigen/src/Core/ArrayBase.h b/eigen/Eigen/src/Core/ArrayBase.h index 9da960f..3dbc708 100644 --- a/eigen/Eigen/src/Core/ArrayBase.h +++ b/eigen/Eigen/src/Core/ArrayBase.h @@ -69,7 +69,6 @@ template class ArrayBase using Base::coeff; using Base::coeffRef; using Base::lazyAssign; - using Base::operator-; using Base::operator=; using Base::operator+=; using Base::operator-=; @@ -89,6 +88,7 @@ template class ArrayBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase #define EIGEN_DOC_UNARY_ADDONS(X,Y) +# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/ArrayCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" diff --git a/eigen/Eigen/src/Core/ArrayWrapper.h b/eigen/Eigen/src/Core/ArrayWrapper.h index a04521a..688aadd 100644 --- a/eigen/Eigen/src/Core/ArrayWrapper.h +++ b/eigen/Eigen/src/Core/ArrayWrapper.h @@ -32,7 +32,8 @@ struct traits > // Let's remove NestByRefBit enum { Flags0 = traits::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } @@ -129,7 +130,8 @@ struct traits > // Let's remove NestByRefBit enum { Flags0 = traits::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } diff --git a/eigen/Eigen/src/Core/Assign.h b/eigen/Eigen/src/Core/Assign.h index 655412e..53806ba 100644 --- a/eigen/Eigen/src/Core/Assign.h +++ b/eigen/Eigen/src/Core/Assign.h @@ -16,7 +16,7 @@ namespace Eigen { template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase +EIGEN_STRONG_INLINE Derived& DenseBase ::lazyAssign(const DenseBase& other) { enum{ diff --git a/eigen/Eigen/src/Core/BooleanRedux.h b/eigen/Eigen/src/Core/BooleanRedux.h index ccf5190..8409d87 100644 --- a/eigen/Eigen/src/Core/BooleanRedux.h +++ b/eigen/Eigen/src/Core/BooleanRedux.h @@ -14,54 +14,56 @@ namespace Eigen { namespace internal { -template +template struct all_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Rows, - row = (UnrollCount-1) % Rows + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) { - return all_unroller::run(mat) && mat.coeff(row, col); + return all_unroller::run(mat) && mat.coeff(row, col); } }; -template -struct all_unroller +template +struct all_unroller { static inline bool run(const Derived &/*mat*/) { return true; } }; -template -struct all_unroller +template +struct all_unroller { static inline bool run(const Derived &) { return false; } }; -template +template struct any_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Rows, - row = (UnrollCount-1) % Rows + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) { - return any_unroller::run(mat) || mat.coeff(row, col); + return any_unroller::run(mat) || mat.coeff(row, col); } }; -template -struct any_unroller +template +struct any_unroller { static inline bool run(const Derived & /*mat*/) { return false; } }; -template -struct any_unroller +template +struct any_unroller { static inline bool run(const Derived &) { return false; } }; @@ -76,7 +78,7 @@ struct any_unroller * \sa any(), Cwise::operator<() */ template -EIGEN_DEVICE_FUNC inline bool DenseBase::all() const +inline bool DenseBase::all() const { typedef internal::evaluator Evaluator; enum { @@ -85,7 +87,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase::all() const }; Evaluator evaluator(derived()); if(unroll) - return internal::all_unroller::RowsAtCompileTime>::run(evaluator); + return internal::all_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) @@ -100,7 +102,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase::all() const * \sa all() */ template -EIGEN_DEVICE_FUNC inline bool DenseBase::any() const +inline bool DenseBase::any() const { typedef internal::evaluator Evaluator; enum { @@ -109,7 +111,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase::any() const }; Evaluator evaluator(derived()); if(unroll) - return internal::any_unroller::RowsAtCompileTime>::run(evaluator); + return internal::any_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) @@ -124,7 +126,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase::any() const * \sa all(), any() */ template -EIGEN_DEVICE_FUNC inline Eigen::Index DenseBase::count() const +inline Eigen::Index DenseBase::count() const { return derived().template cast().template cast().sum(); } diff --git a/eigen/Eigen/src/Core/CommaInitializer.h b/eigen/Eigen/src/Core/CommaInitializer.h index 35fdbb8..d218e98 100644 --- a/eigen/Eigen/src/Core/CommaInitializer.h +++ b/eigen/Eigen/src/Core/CommaInitializer.h @@ -141,7 +141,7 @@ struct CommaInitializer * \sa CommaInitializer::finished(), class CommaInitializer */ template -EIGEN_DEVICE_FUNC inline CommaInitializer DenseBase::operator<< (const Scalar& s) +inline CommaInitializer DenseBase::operator<< (const Scalar& s) { return CommaInitializer(*static_cast(this), s); } @@ -149,7 +149,7 @@ EIGEN_DEVICE_FUNC inline CommaInitializer DenseBase::operator< /** \sa operator<<(const Scalar&) */ template template -EIGEN_DEVICE_FUNC inline CommaInitializer +inline CommaInitializer DenseBase::operator<<(const DenseBase& other) { return CommaInitializer(*static_cast(this), other); diff --git a/eigen/Eigen/src/Core/CoreEvaluators.h b/eigen/Eigen/src/Core/CoreEvaluators.h index 15b361b..f7c1eff 100644 --- a/eigen/Eigen/src/Core/CoreEvaluators.h +++ b/eigen/Eigen/src/Core/CoreEvaluators.h @@ -106,7 +106,7 @@ struct evaluator // ---------- base class for all evaluators ---------- template -struct evaluator_base +struct evaluator_base : public noncopyable { // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits ExpressionTraits; @@ -114,14 +114,6 @@ struct evaluator_base enum { Alignment = 0 }; - // noncopyable: - // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization) - // and make complex evaluator much larger than then should do. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {} -private: - EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&); - EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&); }; // -------------------- Matrix and Array -------------------- @@ -131,27 +123,6 @@ private: // Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, // so no need for more sophisticated dispatching. -// this helper permits to completely eliminate m_outerStride if it is known at compiletime. -template class plainobjectbase_evaluator_data { -public: - EIGEN_DEVICE_FUNC plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr) - { - EIGEN_ONLY_USED_FOR_DEBUG(outerStride); - eigen_internal_assert(outerStride==OuterStride); - } - EIGEN_DEVICE_FUNC Index outerStride() const { return OuterStride; } - const Scalar *data; -}; - -template class plainobjectbase_evaluator_data { -public: - EIGEN_DEVICE_FUNC plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {} - EIGEN_DEVICE_FUNC Index outerStride() const { return m_outerStride; } - const Scalar *data; -protected: - Index m_outerStride; -}; - template struct evaluator > : evaluator_base @@ -170,21 +141,18 @@ struct evaluator > Flags = traits::EvaluatorFlags, Alignment = traits::Alignment }; - enum { - // We do not need to know the outer stride for vectors - OuterStrideAtCompileTime = IsVectorAtCompileTime ? 0 - : int(IsRowMajor) ? ColsAtCompileTime - : RowsAtCompileTime - }; - + EIGEN_DEVICE_FUNC evaluator() - : m_d(0,OuterStrideAtCompileTime) + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) - : m_d(m.data(),IsVectorAtCompileTime ? 0 : m.outerStride()) + : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -193,30 +161,30 @@ struct evaluator > CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) - return m_d.data[row * m_d.outerStride() + col]; + return m_data[row * m_outerStride.value() + col]; else - return m_d.data[row + col * m_d.outerStride()]; + return m_data[row + col * m_outerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.data[index]; + return m_data[index]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) - return const_cast(m_d.data)[row * m_d.outerStride() + col]; + return const_cast(m_data)[row * m_outerStride.value() + col]; else - return const_cast(m_d.data)[row + col * m_d.outerStride()]; + return const_cast(m_data)[row + col * m_outerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return const_cast(m_d.data)[index]; + return const_cast(m_data)[index]; } template @@ -224,16 +192,16 @@ struct evaluator > PacketType packet(Index row, Index col) const { if (IsRowMajor) - return ploadt(m_d.data + row * m_d.outerStride() + col); + return ploadt(m_data + row * m_outerStride.value() + col); else - return ploadt(m_d.data + row + col * m_d.outerStride()); + return ploadt(m_data + row + col * m_outerStride.value()); } template EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return ploadt(m_d.data + index); + return ploadt(m_data + index); } template @@ -242,22 +210,26 @@ struct evaluator > { if (IsRowMajor) return pstoret - (const_cast(m_d.data) + row * m_d.outerStride() + col, x); + (const_cast(m_data) + row * m_outerStride.value() + col, x); else return pstoret - (const_cast(m_d.data) + row + col * m_d.outerStride(), x); + (const_cast(m_data) + row + col * m_outerStride.value(), x); } template EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return pstoret(const_cast(m_d.data) + index, x); + return pstoret(const_cast(m_data) + index, x); } protected: + const Scalar *m_data; - plainobjectbase_evaluator_data m_d; + // We do not need to know the outer stride for vectors + variable_if_dynamic m_outerStride; }; template @@ -555,7 +527,9 @@ struct unary_evaluator, IndexBased > }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - explicit unary_evaluator(const XprType& op) : m_d(op) + explicit unary_evaluator(const XprType& op) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -566,43 +540,32 @@ struct unary_evaluator, IndexBased > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.argImpl.coeff(row, col)); + return m_functor(m_argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.argImpl.coeff(index)); + return m_functor(m_argImpl.coeff(index)); } template EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.argImpl.template packet(row, col)); + return m_functor.packetOp(m_argImpl.template packet(row, col)); } template EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.argImpl.template packet(index)); + return m_functor.packetOp(m_argImpl.template packet(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - class Data : private UnaryOp - { - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const UnaryOp& func() const { return static_cast(*this); } - evaluator argImpl; - }; - - Data m_d; + const UnaryOp m_functor; + evaluator m_argImpl; }; // -------------------- CwiseTernaryOp -------------------- @@ -646,7 +609,11 @@ struct ternary_evaluator, IndexBased evaluator::Alignment) }; - EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr) + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_arg1Impl(xpr.arg1()), + m_arg2Impl(xpr.arg2()), + m_arg3Impl(xpr.arg3()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -657,47 +624,38 @@ struct ternary_evaluator, IndexBased EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col)); + return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index)); + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); } template EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.arg1Impl.template packet(row, col), - m_d.arg2Impl.template packet(row, col), - m_d.arg3Impl.template packet(row, col)); + return m_functor.packetOp(m_arg1Impl.template packet(row, col), + m_arg2Impl.template packet(row, col), + m_arg3Impl.template packet(row, col)); } template EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.arg1Impl.template packet(index), - m_d.arg2Impl.template packet(index), - m_d.arg3Impl.template packet(index)); + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); } protected: - // this helper permits to completely eliminate the functor if it is empty - struct Data : private TernaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : TernaryOp(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TernaryOp& func() const { return static_cast(*this); } - evaluator arg1Impl; - evaluator arg2Impl; - evaluator arg3Impl; - }; - - Data m_d; + const TernaryOp m_functor; + evaluator m_arg1Impl; + evaluator m_arg2Impl; + evaluator m_arg3Impl; }; // -------------------- CwiseBinaryOp -------------------- @@ -738,7 +696,10 @@ struct binary_evaluator, IndexBased, IndexBase Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment,evaluator::Alignment) }; - EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) : m_d(xpr) + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -749,45 +710,35 @@ struct binary_evaluator, IndexBased, IndexBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col)); + return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index)); + return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); } template EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.lhsImpl.template packet(row, col), - m_d.rhsImpl.template packet(row, col)); + return m_functor.packetOp(m_lhsImpl.template packet(row, col), + m_rhsImpl.template packet(row, col)); } template EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.lhsImpl.template packet(index), - m_d.rhsImpl.template packet(index)); + return m_functor.packetOp(m_lhsImpl.template packet(index), + m_rhsImpl.template packet(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - struct Data : private BinaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : BinaryOp(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const BinaryOp& func() const { return static_cast(*this); } - evaluator lhsImpl; - evaluator rhsImpl; - }; - - Data m_d; + const BinaryOp m_functor; + evaluator m_lhsImpl; + evaluator m_rhsImpl; }; // -------------------- CwiseUnaryView -------------------- @@ -806,7 +757,9 @@ struct unary_evaluator, IndexBased> Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_unaryOp(op.functor()), + m_argImpl(op.nestedExpression()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -818,40 +771,30 @@ struct unary_evaluator, IndexBased> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.argImpl.coeff(row, col)); + return m_unaryOp(m_argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.argImpl.coeff(index)); + return m_unaryOp(m_argImpl.coeff(index)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { - return m_d.func()(m_d.argImpl.coeffRef(row, col)); + return m_unaryOp(m_argImpl.coeffRef(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_d.func()(m_d.argImpl.coeffRef(index)); + return m_unaryOp(m_argImpl.coeffRef(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - struct Data : private UnaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const UnaryOp& func() const { return static_cast(*this); } - evaluator argImpl; - }; - - Data m_d; + const UnaryOp m_unaryOp; + evaluator m_argImpl; }; // -------------------- Map -------------------- diff --git a/eigen/Eigen/src/Core/CoreIterators.h b/eigen/Eigen/src/Core/CoreIterators.h index b967196..4eb42b9 100644 --- a/eigen/Eigen/src/Core/CoreIterators.h +++ b/eigen/Eigen/src/Core/CoreIterators.h @@ -48,11 +48,6 @@ public: * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView */ EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; } - EIGEN_STRONG_INLINE InnerIterator& operator+=(Index i) { m_iter.operator+=(i); return *this; } - EIGEN_STRONG_INLINE InnerIterator operator+(Index i) - { InnerIterator result(*this); result+=i; return result; } - - /// \returns the column or row index of the current coefficient. EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } /// \returns the row index of the current coefficient. diff --git a/eigen/Eigen/src/Core/CwiseBinaryOp.h b/eigen/Eigen/src/Core/CwiseBinaryOp.h index bf2632d..a36765e 100644 --- a/eigen/Eigen/src/Core/CwiseBinaryOp.h +++ b/eigen/Eigen/src/Core/CwiseBinaryOp.h @@ -158,7 +158,7 @@ public: */ template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & +EIGEN_STRONG_INLINE Derived & MatrixBase::operator-=(const MatrixBase &other) { call_assignment(derived(), other.derived(), internal::sub_assign_op()); @@ -171,7 +171,7 @@ MatrixBase::operator-=(const MatrixBase &other) */ template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & +EIGEN_STRONG_INLINE Derived & MatrixBase::operator+=(const MatrixBase& other) { call_assignment(derived(), other.derived(), internal::add_assign_op()); @@ -181,3 +181,4 @@ MatrixBase::operator+=(const MatrixBase& other) } // end namespace Eigen #endif // EIGEN_CWISE_BINARY_OP_H + diff --git a/eigen/Eigen/src/Core/CwiseNullaryOp.h b/eigen/Eigen/src/Core/CwiseNullaryOp.h index 144608e..ddd607e 100644 --- a/eigen/Eigen/src/Core/CwiseNullaryOp.h +++ b/eigen/Eigen/src/Core/CwiseNullaryOp.h @@ -131,7 +131,7 @@ DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f */ template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -170,7 +170,7 @@ DenseBase::NullaryExpr(const CustomNullaryOp& func) * \sa class CwiseNullaryOp */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Constant(Index rows, Index cols, const Scalar& value) { return DenseBase::NullaryExpr(rows, cols, internal::scalar_constant_op(value)); diff --git a/eigen/Eigen/src/Core/DenseBase.h b/eigen/Eigen/src/Core/DenseBase.h index fd933ee..90066ae 100644 --- a/eigen/Eigen/src/Core/DenseBase.h +++ b/eigen/Eigen/src/Core/DenseBase.h @@ -570,17 +570,13 @@ template class DenseBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase #define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL #define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) -#define EIGEN_DOC_UNARY_ADDONS(X,Y) -# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/BlockMethods.h" -# include "../plugins/IndexedViewMethods.h" # ifdef EIGEN_DENSEBASE_PLUGIN # include EIGEN_DENSEBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS #undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL #undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF -#undef EIGEN_DOC_UNARY_ADDONS // disable the use of evalTo for dense objects with a nice compilation error template diff --git a/eigen/Eigen/src/Core/Diagonal.h b/eigen/Eigen/src/Core/Diagonal.h index c62f5ff..49e7112 100644 --- a/eigen/Eigen/src/Core/Diagonal.h +++ b/eigen/Eigen/src/Core/Diagonal.h @@ -184,7 +184,7 @@ template class Diagonal * * \sa class Diagonal */ template -EIGEN_DEVICE_FUNC inline typename MatrixBase::DiagonalReturnType +inline typename MatrixBase::DiagonalReturnType MatrixBase::diagonal() { return DiagonalReturnType(derived()); @@ -192,7 +192,7 @@ MatrixBase::diagonal() /** This is the const version of diagonal(). */ template -EIGEN_DEVICE_FUNC inline typename MatrixBase::ConstDiagonalReturnType +inline typename MatrixBase::ConstDiagonalReturnType MatrixBase::diagonal() const { return ConstDiagonalReturnType(derived()); @@ -210,7 +210,7 @@ MatrixBase::diagonal() const * * \sa MatrixBase::diagonal(), class Diagonal */ template -EIGEN_DEVICE_FUNC inline typename MatrixBase::DiagonalDynamicIndexReturnType +inline typename MatrixBase::DiagonalDynamicIndexReturnType MatrixBase::diagonal(Index index) { return DiagonalDynamicIndexReturnType(derived(), index); @@ -218,7 +218,7 @@ MatrixBase::diagonal(Index index) /** This is the const version of diagonal(Index). */ template -EIGEN_DEVICE_FUNC inline typename MatrixBase::ConstDiagonalDynamicIndexReturnType +inline typename MatrixBase::ConstDiagonalDynamicIndexReturnType MatrixBase::diagonal(Index index) const { return ConstDiagonalDynamicIndexReturnType(derived(), index); @@ -237,7 +237,6 @@ MatrixBase::diagonal(Index index) const * \sa MatrixBase::diagonal(), class Diagonal */ template template -EIGEN_DEVICE_FUNC inline typename MatrixBase::template DiagonalIndexReturnType::Type MatrixBase::diagonal() { @@ -247,7 +246,6 @@ MatrixBase::diagonal() /** This is the const version of diagonal(). */ template template -EIGEN_DEVICE_FUNC inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type MatrixBase::diagonal() const { diff --git a/eigen/Eigen/src/Core/DiagonalMatrix.h b/eigen/Eigen/src/Core/DiagonalMatrix.h index 4e8297e..ecfdce8 100644 --- a/eigen/Eigen/src/Core/DiagonalMatrix.h +++ b/eigen/Eigen/src/Core/DiagonalMatrix.h @@ -44,7 +44,7 @@ class DiagonalBase : public EigenBase EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } - + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } EIGEN_DEVICE_FUNC @@ -273,7 +273,7 @@ class DiagonalWrapper * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal() **/ template -EIGEN_DEVICE_FUNC inline const DiagonalWrapper +inline const DiagonalWrapper MatrixBase::asDiagonal() const { return DiagonalWrapper(derived()); diff --git a/eigen/Eigen/src/Core/DiagonalProduct.h b/eigen/Eigen/src/Core/DiagonalProduct.h index 7911d1c..d372b93 100644 --- a/eigen/Eigen/src/Core/DiagonalProduct.h +++ b/eigen/Eigen/src/Core/DiagonalProduct.h @@ -17,7 +17,7 @@ namespace Eigen { */ template template -EIGEN_DEVICE_FUNC inline const Product +inline const Product MatrixBase::operator*(const DiagonalBase &a_diagonal) const { return Product(derived(),a_diagonal.derived()); diff --git a/eigen/Eigen/src/Core/Dot.h b/eigen/Eigen/src/Core/Dot.h index bb8e3fe..06ef18b 100644 --- a/eigen/Eigen/src/Core/Dot.h +++ b/eigen/Eigen/src/Core/Dot.h @@ -90,7 +90,7 @@ MatrixBase::dot(const MatrixBase& other) const * \sa dot(), norm(), lpNorm() */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const +EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const { return numext::real((*this).cwiseAbs2().sum()); } @@ -102,7 +102,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits -EIGEN_DEVICE_FUNC inline typename NumTraits::Scalar>::Real MatrixBase::norm() const +inline typename NumTraits::Scalar>::Real MatrixBase::norm() const { return numext::sqrt(squaredNorm()); } @@ -117,7 +117,7 @@ EIGEN_DEVICE_FUNC inline typename NumTraits:: * \sa norm(), normalize() */ template -EIGEN_DEVICE_FUNC inline const typename MatrixBase::PlainObject +inline const typename MatrixBase::PlainObject MatrixBase::normalized() const { typedef typename internal::nested_eval::type _Nested; @@ -139,7 +139,7 @@ MatrixBase::normalized() const * \sa norm(), normalized() */ template -EIGEN_DEVICE_FUNC inline void MatrixBase::normalize() +inline void MatrixBase::normalize() { RealScalar z = squaredNorm(); // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU @@ -160,7 +160,7 @@ EIGEN_DEVICE_FUNC inline void MatrixBase::normalize() * \sa stableNorm(), stableNormalize(), normalized() */ template -EIGEN_DEVICE_FUNC inline const typename MatrixBase::PlainObject +inline const typename MatrixBase::PlainObject MatrixBase::stableNormalized() const { typedef typename internal::nested_eval::type _Nested; @@ -185,7 +185,7 @@ MatrixBase::stableNormalized() const * \sa stableNorm(), stableNormalized(), normalize() */ template -EIGEN_DEVICE_FUNC inline void MatrixBase::stableNormalize() +inline void MatrixBase::stableNormalize() { RealScalar w = cwiseAbs().maxCoeff(); RealScalar z = (derived()/w).squaredNorm(); @@ -257,9 +257,9 @@ struct lpNorm_selector template template #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_DEVICE_FUNC inline typename NumTraits::Scalar>::Real +inline typename NumTraits::Scalar>::Real #else -EIGEN_DEVICE_FUNC MatrixBase::RealScalar +MatrixBase::RealScalar #endif MatrixBase::lpNorm() const { diff --git a/eigen/Eigen/src/Core/EigenBase.h b/eigen/Eigen/src/Core/EigenBase.h index ccc122c..b195506 100644 --- a/eigen/Eigen/src/Core/EigenBase.h +++ b/eigen/Eigen/src/Core/EigenBase.h @@ -14,6 +14,7 @@ namespace Eigen { /** \class EigenBase + * \ingroup Core_Module * * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * diff --git a/eigen/Eigen/src/Core/Fuzzy.h b/eigen/Eigen/src/Core/Fuzzy.h index 43aa49b..3e403a0 100644 --- a/eigen/Eigen/src/Core/Fuzzy.h +++ b/eigen/Eigen/src/Core/Fuzzy.h @@ -100,7 +100,7 @@ struct isMuchSmallerThan_scalar_selector */ template template -EIGEN_DEVICE_FUNC bool DenseBase::isApprox( +bool DenseBase::isApprox( const DenseBase& other, const RealScalar& prec ) const @@ -122,7 +122,7 @@ EIGEN_DEVICE_FUNC bool DenseBase::isApprox( * \sa isApprox(), isMuchSmallerThan(const DenseBase&, RealScalar) const */ template -EIGEN_DEVICE_FUNC bool DenseBase::isMuchSmallerThan( +bool DenseBase::isMuchSmallerThan( const typename NumTraits::Real& other, const RealScalar& prec ) const @@ -142,7 +142,7 @@ EIGEN_DEVICE_FUNC bool DenseBase::isMuchSmallerThan( */ template template -EIGEN_DEVICE_FUNC bool DenseBase::isMuchSmallerThan( +bool DenseBase::isMuchSmallerThan( const DenseBase& other, const RealScalar& prec ) const diff --git a/eigen/Eigen/src/Core/GeneralProduct.h b/eigen/Eigen/src/Core/GeneralProduct.h index b206b0a..0f16cd8 100644 --- a/eigen/Eigen/src/Core/GeneralProduct.h +++ b/eigen/Eigen/src/Core/GeneralProduct.h @@ -428,7 +428,7 @@ MatrixBase::operator*(const MatrixBase &other) const template template const Product -EIGEN_DEVICE_FUNC MatrixBase::lazyProduct(const MatrixBase &other) const +MatrixBase::lazyProduct(const MatrixBase &other) const { enum { ProductIsValid = Derived::ColsAtCompileTime==Dynamic diff --git a/eigen/Eigen/src/Core/GenericPacketMath.h b/eigen/Eigen/src/Core/GenericPacketMath.h index d19d5bb..029f8ac 100644 --- a/eigen/Eigen/src/Core/GenericPacketMath.h +++ b/eigen/Eigen/src/Core/GenericPacketMath.h @@ -61,7 +61,6 @@ struct default_packet_traits HasSqrt = 0, HasRsqrt = 0, HasExp = 0, - HasExpm1 = 0, HasLog = 0, HasLog1p = 0, HasLog10 = 0, @@ -402,10 +401,6 @@ Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { using std::exp; return exp(a); } -/** \internal \returns the expm1 of \a a (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet pexpm1(const Packet& a) { return numext::expm1(a); } - /** \internal \returns the log of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { using std::log; return log(a); } diff --git a/eigen/Eigen/src/Core/GlobalFunctions.h b/eigen/Eigen/src/Core/GlobalFunctions.h index 12828a7..769dc25 100644 --- a/eigen/Eigen/src/Core/GlobalFunctions.h +++ b/eigen/Eigen/src/Core/GlobalFunctions.h @@ -71,7 +71,6 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log) diff --git a/eigen/Eigen/src/Core/IndexedView.h b/eigen/Eigen/src/Core/IndexedView.h deleted file mode 100644 index 8c57a27..0000000 --- a/eigen/Eigen/src/Core/IndexedView.h +++ /dev/null @@ -1,207 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_INDEXED_VIEW_H -#define EIGEN_INDEXED_VIEW_H - -namespace Eigen { - -namespace internal { - -template -struct traits > - : traits -{ - enum { - RowsAtCompileTime = int(array_size::value), - ColsAtCompileTime = int(array_size::value), - MaxRowsAtCompileTime = RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) : int(traits::MaxRowsAtCompileTime), - MaxColsAtCompileTime = ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : int(traits::MaxColsAtCompileTime), - - XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0, - IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 - : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 - : XprTypeIsRowMajor, - - RowIncr = int(get_compile_time_incr::value), - ColIncr = int(get_compile_time_incr::value), - InnerIncr = IsRowMajor ? ColIncr : RowIncr, - OuterIncr = IsRowMajor ? RowIncr : ColIncr, - - HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), - XprInnerStride = HasSameStorageOrderAsXprType ? int(inner_stride_at_compile_time::ret) : int(outer_stride_at_compile_time::ret), - XprOuterstride = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time::ret) : int(inner_stride_at_compile_time::ret), - - InnerSize = XprTypeIsRowMajor ? ColsAtCompileTime : RowsAtCompileTime, - IsBlockAlike = InnerIncr==1 && OuterIncr==1, - IsInnerPannel = HasSameStorageOrderAsXprType && is_same,typename conditional::type>::value, - - InnerStrideAtCompileTime = InnerIncr<0 || InnerIncr==DynamicIndex || XprInnerStride==Dynamic ? Dynamic : XprInnerStride * InnerIncr, - OuterStrideAtCompileTime = OuterIncr<0 || OuterIncr==DynamicIndex || XprOuterstride==Dynamic ? Dynamic : XprOuterstride * OuterIncr, - - ReturnAsScalar = is_same::value && is_same::value, - ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike, - ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock), - - // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag, - // but this is too strict regarding negative strides... - DirectAccessMask = (int(InnerIncr)!=UndefinedIncr && int(OuterIncr)!=UndefinedIncr && InnerIncr>=0 && OuterIncr>=0) ? DirectAccessBit : 0, - FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, - FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, - Flags = (traits::Flags & (HereditaryBits | DirectAccessMask)) | FlagsLvalueBit | FlagsRowMajorBit - }; - - typedef Block BlockType; -}; - -} - -template -class IndexedViewImpl; - - -/** \class IndexedView - * \ingroup Core_Module - * - * \brief Expression of a non-sequential sub-matrix defined by arbitrary sequences of row and column indices - * - * \tparam XprType the type of the expression in which we are taking the intersections of sub-rows and sub-columns - * \tparam RowIndices the type of the object defining the sequence of row indices - * \tparam ColIndices the type of the object defining the sequence of column indices - * - * This class represents an expression of a sub-matrix (or sub-vector) defined as the intersection - * of sub-sets of rows and columns, that are themself defined by generic sequences of row indices \f$ \{r_0,r_1,..r_{m-1}\} \f$ - * and column indices \f$ \{c_0,c_1,..c_{n-1} \}\f$. Let \f$ A \f$ be the nested matrix, then the resulting matrix \f$ B \f$ has \c m - * rows and \c n columns, and its entries are given by: \f$ B(i,j) = A(r_i,c_j) \f$. - * - * The \c RowIndices and \c ColIndices types must be compatible with the following API: - * \code - * operator[](Index) const; - * Index size() const; - * \endcode - * - * Typical supported types thus include: - * - std::vector - * - std::valarray - * - std::array - * - Plain C arrays: int[N] - * - Eigen::ArrayXi - * - decltype(ArrayXi::LinSpaced(...)) - * - Any view/expressions of the previous types - * - Eigen::ArithmeticSequence - * - Eigen::internal::AllRange (helper for Eigen::all) - * - Eigen::internal::SingleRange (helper for single index) - * - etc. - * - * In typical usages of %Eigen, this class should never be used directly. It is the return type of - * DenseBase::operator()(const RowIndices&, const ColIndices&). - * - * \sa class Block - */ -template -class IndexedView : public IndexedViewImpl::StorageKind> -{ -public: - typedef typename IndexedViewImpl::StorageKind>::Base Base; - EIGEN_GENERIC_PUBLIC_INTERFACE(IndexedView) - EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView) - - typedef typename internal::ref_selector::non_const_type MatrixTypeNested; - typedef typename internal::remove_all::type NestedExpression; - - template - IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices) - : m_xpr(xpr), m_rowIndices(rowIndices), m_colIndices(colIndices) - {} - - /** \returns number of rows */ - Index rows() const { return internal::size(m_rowIndices); } - - /** \returns number of columns */ - Index cols() const { return internal::size(m_colIndices); } - - /** \returns the nested expression */ - const typename internal::remove_all::type& - nestedExpression() const { return m_xpr; } - - /** \returns the nested expression */ - typename internal::remove_reference::type& - nestedExpression() { return m_xpr.const_cast_derived(); } - - /** \returns a const reference to the object storing/generating the row indices */ - const RowIndices& rowIndices() const { return m_rowIndices; } - - /** \returns a const reference to the object storing/generating the column indices */ - const ColIndices& colIndices() const { return m_colIndices; } - -protected: - MatrixTypeNested m_xpr; - RowIndices m_rowIndices; - ColIndices m_colIndices; -}; - - -// Generic API dispatcher -template -class IndexedViewImpl - : public internal::generic_xpr_base >::type -{ -public: - typedef typename internal::generic_xpr_base >::type Base; -}; - -namespace internal { - - -template -struct unary_evaluator, IndexBased> - : evaluator_base > -{ - typedef IndexedView XprType; - - enum { - CoeffReadCost = evaluator::CoeffReadCost /* TODO + cost of row/col index */, - - Flags = (evaluator::Flags & (HereditaryBits /*| LinearAccessBit | DirectAccessBit*/)), - - Alignment = 0 - }; - - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) - { - EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - CoeffReturnType coeff(Index row, Index col) const - { - return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Scalar& coeffRef(Index row, Index col) - { - return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); - } - -protected: - - evaluator m_argImpl; - const XprType& m_xpr; - -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_INDEXED_VIEW_H diff --git a/eigen/Eigen/src/Core/MathFunctions.h b/eigen/Eigen/src/Core/MathFunctions.h index 5ec6c39..a648aa0 100644 --- a/eigen/Eigen/src/Core/MathFunctions.h +++ b/eigen/Eigen/src/Core/MathFunctions.h @@ -14,6 +14,7 @@ // TODO this should better be moved to NumTraits #define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L + namespace Eigen { // On WINCE, std::abs is defined for int only, so let's defined our own overloads: @@ -412,7 +413,7 @@ inline NewType cast(const OldType& x) static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) - EIGEN_USING_STD_MATH(round); + using std::round; return round(x); } }; @@ -481,55 +482,6 @@ struct arg_retval typedef typename NumTraits::Real type; }; -/**************************************************************************** -* Implementation of expm1 * -****************************************************************************/ - -// This implementation is based on GSL Math's expm1. -namespace std_fallback { - // fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar, - // or that there is no suitable std::expm1 function available. Implementation - // attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php. - template - EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) { - EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - typedef typename NumTraits::Real RealScalar; - - EIGEN_USING_STD_MATH(exp); - Scalar u = exp(x); - if (u == Scalar(1)) { - return x; - } - Scalar um1 = u - RealScalar(1); - if (um1 == Scalar(-1)) { - return RealScalar(-1); - } - - EIGEN_USING_STD_MATH(log); - return (u - RealScalar(1)) * x / log(u); - } -} - -template -struct expm1_impl { - EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) - { - EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - #if EIGEN_HAS_CXX11_MATH - using std::expm1; - #endif - using std_fallback::expm1; - return expm1(x); - } -}; - - -template -struct expm1_retval -{ - typedef Scalar type; -}; - /**************************************************************************** * Implementation of log1p * ****************************************************************************/ @@ -549,7 +501,7 @@ namespace std_fallback { template struct log1p_impl { - EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) + static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) #if EIGEN_HAS_CXX11_MATH @@ -688,7 +640,7 @@ template struct random_default_impl { static inline Scalar run(const Scalar& x, const Scalar& y) - { + { typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; if(y T generic_fast_tanh_float(const T& a_x); namespace numext { -#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__) +#ifndef __CUDA_ARCH__ template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) @@ -890,84 +842,6 @@ EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) EIGEN_USING_STD_MATH(max); return max EIGEN_NOT_A_MACRO (x,y); } - - -#elif defined(__SYCL_DEVICE_ONLY__) -template -EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) -{ - - return y < x ? y : x; -} - -template -EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) -{ - - return x < y ? y : x; -} - -EIGEN_ALWAYS_INLINE int mini(const int& x, const int& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE int maxi(const int& x, const int& y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned int mini(const unsigned int& x, const unsigned int& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned int maxi(const unsigned int& x, const unsigned int& y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE long mini(const long & x, const long & y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE long maxi(const long & x, const long & y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned long mini(const unsigned long& x, const unsigned long& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned long maxi(const unsigned long& x, const unsigned long& y) -{ - return cl::sycl::max(x,y); -} - - -EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) -{ - return cl::sycl::fmin(x,y); -} - -EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) -{ - return cl::sycl::fmax(x,y); -} - -EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y) -{ - return cl::sycl::fmin(x,y); -} - -EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y) -{ - return cl::sycl::fmax(x,y); -} - #else template EIGEN_DEVICE_FUNC @@ -1080,11 +954,6 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float log1p(float x) { return cl::sycl::log1p(x); } -EIGEN_ALWAYS_INLINE double log1p(double x) { return cl::sycl::log1p(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log1p(const float &x) { return ::log1pf(x); } @@ -1100,24 +969,10 @@ inline typename internal::pow_impl::result_type pow(const Scala return internal::pow_impl::run(x, y); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float pow(float x, float y) { return cl::sycl::pow(x, y); } -EIGEN_ALWAYS_INLINE double pow(double x, double y) { return cl::sycl::pow(x, y); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float isnan(float x) { return cl::sycl::isnan(x); } -EIGEN_ALWAYS_INLINE double isnan(double x) { return cl::sycl::isnan(x); } -EIGEN_ALWAYS_INLINE float isinf(float x) { return cl::sycl::isinf(x); } -EIGEN_ALWAYS_INLINE double isinf(double x) { return cl::sycl::isinf(x); } -EIGEN_ALWAYS_INLINE float isfinite(float x) { return cl::sycl::isfinite(x); } -EIGEN_ALWAYS_INLINE double isfinite(double x) { return cl::sycl::isfinite(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) @@ -1125,11 +980,6 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float round(float x) { return cl::sycl::round(x); } -EIGEN_ALWAYS_INLINE double round(double x) { return cl::sycl::round(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template EIGEN_DEVICE_FUNC T (floor)(const T& x) @@ -1138,11 +988,6 @@ T (floor)(const T& x) return floor(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float floor(float x) { return cl::sycl::floor(x); } -EIGEN_ALWAYS_INLINE double floor(double x) { return cl::sycl::floor(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float &x) { return ::floorf(x); } @@ -1159,11 +1004,6 @@ T (ceil)(const T& x) return ceil(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float ceil(float x) { return cl::sycl::ceil(x); } -EIGEN_ALWAYS_INLINE double ceil(double x) { return cl::sycl::ceil(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float &x) { return ::ceilf(x); } @@ -1204,11 +1044,6 @@ T sqrt(const T &x) return sqrt(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sqrt(float x) { return cl::sycl::sqrt(x); } -EIGEN_ALWAYS_INLINE double sqrt(double x) { return cl::sycl::sqrt(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log(const T &x) { @@ -1216,12 +1051,6 @@ T log(const T &x) { return log(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float log(float x) { return cl::sycl::log(x); } -EIGEN_ALWAYS_INLINE double log(double x) { return cl::sycl::log(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log(const float &x) { return ::logf(x); } @@ -1232,11 +1061,19 @@ double log(const double &x) { return ::log(x); } template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -typename NumTraits::Real abs(const T &x) { +typename internal::enable_if::IsSigned || NumTraits::IsComplex,typename NumTraits::Real>::type +abs(const T &x) { EIGEN_USING_STD_MATH(abs); return abs(x); } +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename internal::enable_if::IsSigned || NumTraits::IsComplex),typename NumTraits::Real>::type +abs(const T &x) { + return x; +} + #if defined(__SYCL_DEVICE_ONLY__) EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } @@ -1267,11 +1104,6 @@ T exp(const T &x) { return exp(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float exp(float x) { return cl::sycl::exp(x); } -EIGEN_ALWAYS_INLINE double exp(double x) { return cl::sycl::exp(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp(const float &x) { return ::expf(x); } @@ -1280,26 +1112,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp(const double &x) { return ::exp(x); } #endif -template -EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) -{ - return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x); -} - -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float expm1(float x) { return cl::sycl::expm1(x); } -EIGEN_ALWAYS_INLINE double expm1(double x) { return cl::sycl::expm1(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - -#ifdef __CUDACC__ -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -float expm1(const float &x) { return ::expm1f(x); } - -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -double expm1(const double &x) { return ::expm1(x); } -#endif - template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T &x) { @@ -1307,11 +1119,6 @@ T cos(const T &x) { return cos(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float cos(float x) { return cl::sycl::cos(x); } -EIGEN_ALWAYS_INLINE double cos(double x) { return cl::sycl::cos(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cos(const float &x) { return ::cosf(x); } @@ -1327,11 +1134,6 @@ T sin(const T &x) { return sin(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sin(float x) { return cl::sycl::sin(x); } -EIGEN_ALWAYS_INLINE double sin(double x) { return cl::sycl::sin(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sin(const float &x) { return ::sinf(x); } @@ -1347,11 +1149,6 @@ T tan(const T &x) { return tan(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float tan(float x) { return cl::sycl::tan(x); } -EIGEN_ALWAYS_INLINE double tan(double x) { return cl::sycl::tan(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tan(const float &x) { return ::tanf(x); } @@ -1367,11 +1164,6 @@ T acos(const T &x) { return acos(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float acos(float x) { return cl::sycl::acos(x); } -EIGEN_ALWAYS_INLINE double acos(double x) { return cl::sycl::acos(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float acos(const float &x) { return ::acosf(x); } @@ -1387,11 +1179,6 @@ T asin(const T &x) { return asin(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float asin(float x) { return cl::sycl::asin(x); } -EIGEN_ALWAYS_INLINE double asin(double x) { return cl::sycl::asin(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float asin(const float &x) { return ::asinf(x); } @@ -1407,11 +1194,6 @@ T atan(const T &x) { return atan(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float atan(float x) { return cl::sycl::atan(x); } -EIGEN_ALWAYS_INLINE double atan(double x) { return cl::sycl::atan(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float atan(const float &x) { return ::atanf(x); } @@ -1428,11 +1210,6 @@ T cosh(const T &x) { return cosh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float cosh(float x) { return cl::sycl::cosh(x); } -EIGEN_ALWAYS_INLINE double cosh(double x) { return cl::sycl::cosh(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cosh(const float &x) { return ::coshf(x); } @@ -1448,11 +1225,6 @@ T sinh(const T &x) { return sinh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sinh(float x) { return cl::sycl::sinh(x); } -EIGEN_ALWAYS_INLINE double sinh(double x) { return cl::sycl::sinh(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sinh(const float &x) { return ::sinhf(x); } @@ -1468,10 +1240,7 @@ T tanh(const T &x) { return tanh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float tanh(float x) { return cl::sycl::tanh(x); } -EIGEN_ALWAYS_INLINE double tanh(double x) { return cl::sycl::tanh(x); } -#elif (!defined(__CUDACC__)) && EIGEN_FAST_MATH +#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); } #endif @@ -1491,11 +1260,6 @@ T fmod(const T& a, const T& b) { return fmod(a, b); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float fmod(float x, float y) { return cl::sycl::fmod(x, y); } -EIGEN_ALWAYS_INLINE double fmod(double x, double y) { return cl::sycl::fmod(x, y); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1638,13 +1402,13 @@ template<> struct random_impl template<> struct scalar_fuzzy_impl { typedef bool RealScalar; - + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) { return !x; } - + EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { @@ -1656,10 +1420,10 @@ template<> struct scalar_fuzzy_impl { return (!x) || y; } - + }; - + } // end namespace internal } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/MathFunctionsImpl.h b/eigen/Eigen/src/Core/MathFunctionsImpl.h index ae1386b..3c9ef22 100644 --- a/eigen/Eigen/src/Core/MathFunctionsImpl.h +++ b/eigen/Eigen/src/Core/MathFunctionsImpl.h @@ -29,7 +29,12 @@ T generic_fast_tanh_float(const T& a_x) // this range is +/-1.0f in single-precision. const T plus_9 = pset1(9.f); const T minus_9 = pset1(-9.f); - const T x = pmax(pmin(a_x, plus_9), minus_9); + // NOTE GCC prior to 6.3 might improperly optimize this max/min + // step such that if a_x is nan, x will be either 9 or -9, + // and tanh will return 1 or -1 instead of nan. + // This is supposed to be fixed in gcc6.3, + // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 + const T x = pmax(minus_9,pmin(plus_9,a_x)); // The monomial coefficients of the numerator polynomial (odd). const T alpha_1 = pset1(4.89352455891786e-03f); const T alpha_3 = pset1(6.37261928875436e-04f); diff --git a/eigen/Eigen/src/Core/MatrixBase.h b/eigen/Eigen/src/Core/MatrixBase.h index 200e577..ce41218 100644 --- a/eigen/Eigen/src/Core/MatrixBase.h +++ b/eigen/Eigen/src/Core/MatrixBase.h @@ -76,7 +76,6 @@ template class MatrixBase using Base::coeffRef; using Base::lazyAssign; using Base::eval; - using Base::operator-; using Base::operator+=; using Base::operator-=; using Base::operator*=; @@ -123,6 +122,7 @@ template class MatrixBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase #define EIGEN_DOC_UNARY_ADDONS(X,Y) +# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/MatrixCwiseBinaryOps.h" diff --git a/eigen/Eigen/src/Core/NestByValue.h b/eigen/Eigen/src/Core/NestByValue.h index 01cf192..13adf07 100644 --- a/eigen/Eigen/src/Core/NestByValue.h +++ b/eigen/Eigen/src/Core/NestByValue.h @@ -67,25 +67,25 @@ template class NestByValue } template - EIGEN_DEVICE_FUNC inline const PacketScalar packet(Index row, Index col) const + inline const PacketScalar packet(Index row, Index col) const { return m_expression.template packet(row, col); } template - EIGEN_DEVICE_FUNC inline void writePacket(Index row, Index col, const PacketScalar& x) + inline void writePacket(Index row, Index col, const PacketScalar& x) { m_expression.const_cast_derived().template writePacket(row, col, x); } template - EIGEN_DEVICE_FUNC inline const PacketScalar packet(Index index) const + inline const PacketScalar packet(Index index) const { return m_expression.template packet(index); } template - EIGEN_DEVICE_FUNC inline void writePacket(Index index, const PacketScalar& x) + inline void writePacket(Index index, const PacketScalar& x) { m_expression.const_cast_derived().template writePacket(index, x); } @@ -99,7 +99,7 @@ template class NestByValue /** \returns an expression of the temporary version of *this. */ template -EIGEN_DEVICE_FUNC inline const NestByValue +inline const NestByValue DenseBase::nestByValue() const { return NestByValue(derived()); diff --git a/eigen/Eigen/src/Core/NumTraits.h b/eigen/Eigen/src/Core/NumTraits.h index aebc0c2..daf4898 100644 --- a/eigen/Eigen/src/Core/NumTraits.h +++ b/eigen/Eigen/src/Core/NumTraits.h @@ -71,7 +71,7 @@ struct default_digits10_impl // Integer * and to \c 0 otherwise. * \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed * to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers. - * Stay vague here. No need to do architecture-specific stuff. If you don't know what this means, just use \c Eigen::HugeCost. + * Stay vague here. No need to do architecture-specific stuff. * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. @@ -215,6 +215,8 @@ struct NumTraits > static inline RealScalar epsilon() { return NumTraits::epsilon(); } EIGEN_DEVICE_FUNC static inline RealScalar dummy_precision() { return NumTraits::dummy_precision(); } + + static inline int digits10() { return NumTraits::digits10(); } }; template<> struct NumTraits diff --git a/eigen/Eigen/src/Core/ProductEvaluators.h b/eigen/Eigen/src/Core/ProductEvaluators.h index 583b7f5..c42725d 100644 --- a/eigen/Eigen/src/Core/ProductEvaluators.h +++ b/eigen/Eigen/src/Core/ProductEvaluators.h @@ -207,6 +207,12 @@ struct evaluator_assume_aliasing +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, + const Product >, DenseShape > { + static const bool value = true; +}; + template struct assignment_from_xpr_op_product { diff --git a/eigen/Eigen/src/Core/Random.h b/eigen/Eigen/src/Core/Random.h index 486e9ed..6faf789 100644 --- a/eigen/Eigen/src/Core/Random.h +++ b/eigen/Eigen/src/Core/Random.h @@ -128,7 +128,7 @@ DenseBase::Random() * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index) */ template -EIGEN_DEVICE_FUNC inline Derived& DenseBase::setRandom() +inline Derived& DenseBase::setRandom() { return *this = Random(rows(), cols()); } diff --git a/eigen/Eigen/src/Core/Redux.h b/eigen/Eigen/src/Core/Redux.h index 2b5b73b..b6e8f88 100644 --- a/eigen/Eigen/src/Core/Redux.h +++ b/eigen/Eigen/src/Core/Redux.h @@ -407,7 +407,7 @@ protected: */ template template -EIGEN_DEVICE_FUNC typename internal::traits::Scalar +typename internal::traits::Scalar DenseBase::redux(const Func& func) const { eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); @@ -422,7 +422,7 @@ DenseBase::redux(const Func& func) const * \warning the result is undefined if \c *this contains NaN. */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { return derived().redux(Eigen::internal::scalar_min_op()); @@ -432,7 +432,7 @@ DenseBase::minCoeff() const * \warning the result is undefined if \c *this contains NaN. */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { return derived().redux(Eigen::internal::scalar_max_op()); @@ -445,7 +445,7 @@ DenseBase::maxCoeff() const * \sa trace(), prod(), mean() */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) @@ -458,7 +458,7 @@ DenseBase::sum() const * \sa trace(), prod(), sum() */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::mean() const { #ifdef __INTEL_COMPILER @@ -479,7 +479,7 @@ DenseBase::mean() const * \sa sum(), mean(), trace() */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::prod() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) @@ -494,7 +494,7 @@ DenseBase::prod() const * \sa diagonal(), sum() */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar +EIGEN_STRONG_INLINE typename internal::traits::Scalar MatrixBase::trace() const { return derived().diagonal().sum(); diff --git a/eigen/Eigen/src/Core/Ref.h b/eigen/Eigen/src/Core/Ref.h index abb1e51..bdf24f5 100644 --- a/eigen/Eigen/src/Core/Ref.h +++ b/eigen/Eigen/src/Core/Ref.h @@ -184,8 +184,6 @@ protected: * void foo(const Ref >& A) { foo_impl(A); } * \endcode * - * See also the following stackoverflow questions for further references: - * - Correct usage of the Eigen::Ref<> class * * \sa PlainObjectBase::Map(), \ref TopicStorageOrders */ diff --git a/eigen/Eigen/src/Core/Replicate.h b/eigen/Eigen/src/Core/Replicate.h index 0b2d6d7..9960ef8 100644 --- a/eigen/Eigen/src/Core/Replicate.h +++ b/eigen/Eigen/src/Core/Replicate.h @@ -115,7 +115,7 @@ template class Replicate */ template template -EIGEN_DEVICE_FUNC const Replicate +const Replicate DenseBase::replicate() const { return Replicate(derived()); @@ -130,7 +130,7 @@ DenseBase::replicate() const * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate */ template -EIGEN_DEVICE_FUNC const typename VectorwiseOp::ReplicateReturnType +const typename VectorwiseOp::ReplicateReturnType VectorwiseOp::replicate(Index factor) const { return typename VectorwiseOp::ReplicateReturnType diff --git a/eigen/Eigen/src/Core/ReturnByValue.h b/eigen/Eigen/src/Core/ReturnByValue.h index 11dc86d..c44b767 100644 --- a/eigen/Eigen/src/Core/ReturnByValue.h +++ b/eigen/Eigen/src/Core/ReturnByValue.h @@ -79,7 +79,7 @@ template class ReturnByValue template template -EIGEN_DEVICE_FUNC Derived& DenseBase::operator=(const ReturnByValue& other) +Derived& DenseBase::operator=(const ReturnByValue& other) { other.evalTo(derived()); return derived(); diff --git a/eigen/Eigen/src/Core/Reverse.h b/eigen/Eigen/src/Core/Reverse.h index 8b6b3ab..0640cda 100644 --- a/eigen/Eigen/src/Core/Reverse.h +++ b/eigen/Eigen/src/Core/Reverse.h @@ -114,7 +114,7 @@ template class Reverse * */ template -EIGEN_DEVICE_FUNC inline typename DenseBase::ReverseReturnType +inline typename DenseBase::ReverseReturnType DenseBase::reverse() { return ReverseReturnType(derived()); @@ -136,7 +136,7 @@ DenseBase::reverse() * * \sa VectorwiseOp::reverseInPlace(), reverse() */ template -EIGEN_DEVICE_FUNC inline void DenseBase::reverseInPlace() +inline void DenseBase::reverseInPlace() { if(cols()>rows()) { @@ -201,7 +201,7 @@ struct vectorwise_reverse_inplace_impl * * \sa DenseBase::reverseInPlace(), reverse() */ template -EIGEN_DEVICE_FUNC void VectorwiseOp::reverseInPlace() +void VectorwiseOp::reverseInPlace() { internal::vectorwise_reverse_inplace_impl::run(_expression().const_cast_derived()); } diff --git a/eigen/Eigen/src/Core/SelfAdjointView.h b/eigen/Eigen/src/Core/SelfAdjointView.h index 7e71fe3..504c98f 100644 --- a/eigen/Eigen/src/Core/SelfAdjointView.h +++ b/eigen/Eigen/src/Core/SelfAdjointView.h @@ -322,7 +322,7 @@ public: /** This is the const version of MatrixBase::selfadjointView() */ template template -EIGEN_DEVICE_FUNC typename MatrixBase::template ConstSelfAdjointViewReturnType::Type +typename MatrixBase::template ConstSelfAdjointViewReturnType::Type MatrixBase::selfadjointView() const { return typename ConstSelfAdjointViewReturnType::Type(derived()); @@ -339,7 +339,7 @@ MatrixBase::selfadjointView() const */ template template -EIGEN_DEVICE_FUNC typename MatrixBase::template SelfAdjointViewReturnType::Type +typename MatrixBase::template SelfAdjointViewReturnType::Type MatrixBase::selfadjointView() { return typename SelfAdjointViewReturnType::Type(derived()); diff --git a/eigen/Eigen/src/Core/Solve.h b/eigen/Eigen/src/Core/Solve.h index 960a585..a8daea5 100644 --- a/eigen/Eigen/src/Core/Solve.h +++ b/eigen/Eigen/src/Core/Solve.h @@ -34,12 +34,12 @@ template struct s template struct solve_traits { - typedef Matrix PlainObject; + RhsType::MaxColsAtCompileTime>::type PlainObject; }; template diff --git a/eigen/Eigen/src/Core/SolveTriangular.h b/eigen/Eigen/src/Core/SolveTriangular.h index a0011d4..049890b 100644 --- a/eigen/Eigen/src/Core/SolveTriangular.h +++ b/eigen/Eigen/src/Core/SolveTriangular.h @@ -164,7 +164,7 @@ struct triangular_solver_selector { #ifndef EIGEN_PARSED_BY_DOXYGEN template template -EIGEN_DEVICE_FUNC void TriangularViewImpl::solveInPlace(const MatrixBase& _other) const +void TriangularViewImpl::solveInPlace(const MatrixBase& _other) const { OtherDerived& other = _other.const_cast_derived(); eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) ); diff --git a/eigen/Eigen/src/Core/Transpose.h b/eigen/Eigen/src/Core/Transpose.h index ba7d6e6..79b767b 100644 --- a/eigen/Eigen/src/Core/Transpose.h +++ b/eigen/Eigen/src/Core/Transpose.h @@ -168,7 +168,7 @@ template class TransposeImpl * * \sa transposeInPlace(), adjoint() */ template -EIGEN_DEVICE_FUNC inline Transpose +inline Transpose DenseBase::transpose() { return TransposeReturnType(derived()); @@ -180,7 +180,7 @@ DenseBase::transpose() * * \sa transposeInPlace(), adjoint() */ template -EIGEN_DEVICE_FUNC inline typename DenseBase::ConstTransposeReturnType +inline typename DenseBase::ConstTransposeReturnType DenseBase::transpose() const { return ConstTransposeReturnType(derived()); @@ -206,7 +206,7 @@ DenseBase::transpose() const * * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */ template -EIGEN_DEVICE_FUNC inline const typename MatrixBase::AdjointReturnType +inline const typename MatrixBase::AdjointReturnType MatrixBase::adjoint() const { return AdjointReturnType(this->transpose()); @@ -281,7 +281,7 @@ struct inplace_transpose_selector { // non squ * * \sa transpose(), adjoint(), adjointInPlace() */ template -EIGEN_DEVICE_FUNC inline void DenseBase::transposeInPlace() +inline void DenseBase::transposeInPlace() { eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic)) && "transposeInPlace() called on a non-square non-resizable matrix"); @@ -312,7 +312,7 @@ EIGEN_DEVICE_FUNC inline void DenseBase::transposeInPlace() * * \sa transpose(), adjoint(), transposeInPlace() */ template -EIGEN_DEVICE_FUNC inline void MatrixBase::adjointInPlace() +inline void MatrixBase::adjointInPlace() { derived() = adjoint().eval(); } diff --git a/eigen/Eigen/src/Core/TriangularMatrix.h b/eigen/Eigen/src/Core/TriangularMatrix.h index ed80da3..667ef09 100644 --- a/eigen/Eigen/src/Core/TriangularMatrix.h +++ b/eigen/Eigen/src/Core/TriangularMatrix.h @@ -488,6 +488,7 @@ template class TriangularViewImpl<_Mat * \sa TriangularView::solveInPlace() */ template + EIGEN_DEVICE_FUNC inline const internal::triangular_solve_retval solve(const MatrixBase& other) const; @@ -553,7 +554,7 @@ template class TriangularViewImpl<_Mat // FIXME should we keep that possibility template template -EIGEN_DEVICE_FUNC inline TriangularView& +inline TriangularView& TriangularViewImpl::operator=(const MatrixBase& other) { internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op()); @@ -563,7 +564,7 @@ TriangularViewImpl::operator=(const MatrixBase template -EIGEN_DEVICE_FUNC void TriangularViewImpl::lazyAssign(const MatrixBase& other) +void TriangularViewImpl::lazyAssign(const MatrixBase& other) { internal::call_assignment_no_alias(derived(), other.template triangularView()); } @@ -572,7 +573,7 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl::lazyAssign(c template template -EIGEN_DEVICE_FUNC inline TriangularView& +inline TriangularView& TriangularViewImpl::operator=(const TriangularBase& other) { eigen_assert(Mode == int(OtherDerived::Mode)); @@ -582,7 +583,7 @@ TriangularViewImpl::operator=(const TriangularBase template -EIGEN_DEVICE_FUNC void TriangularViewImpl::lazyAssign(const TriangularBase& other) +void TriangularViewImpl::lazyAssign(const TriangularBase& other) { eigen_assert(Mode == int(OtherDerived::Mode)); internal::call_assignment_no_alias(derived(), other.derived()); @@ -597,7 +598,7 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl::lazyAssign(c * If the matrix is triangular, the opposite part is set to zero. */ template template -EIGEN_DEVICE_FUNC void TriangularBase::evalTo(MatrixBase &other) const +void TriangularBase::evalTo(MatrixBase &other) const { evalToLazy(other.derived()); } @@ -623,7 +624,6 @@ EIGEN_DEVICE_FUNC void TriangularBase::evalTo(MatrixBase */ template template -EIGEN_DEVICE_FUNC typename MatrixBase::template TriangularViewReturnType::Type MatrixBase::triangularView() { @@ -633,7 +633,6 @@ MatrixBase::triangularView() /** This is the const version of MatrixBase::triangularView() */ template template -EIGEN_DEVICE_FUNC typename MatrixBase::template ConstTriangularViewReturnType::Type MatrixBase::triangularView() const { @@ -931,7 +930,7 @@ struct triangular_assignment_loop * If the matrix is triangular, the opposite part is set to zero. */ template template -EIGEN_DEVICE_FUNC void TriangularBase::evalToLazy(MatrixBase &other) const +void TriangularBase::evalToLazy(MatrixBase &other) const { other.derived().resize(this->rows(), this->cols()); internal::call_triangular_assignment_loop(other.derived(), derived().nestedExpression()); diff --git a/eigen/Eigen/src/Core/VectorwiseOp.h b/eigen/Eigen/src/Core/VectorwiseOp.h index 893bc79..4fe267e 100644 --- a/eigen/Eigen/src/Core/VectorwiseOp.h +++ b/eigen/Eigen/src/Core/VectorwiseOp.h @@ -670,7 +670,7 @@ template class VectorwiseOp * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting */ template -EIGEN_DEVICE_FUNC inline typename DenseBase::ColwiseReturnType +inline typename DenseBase::ColwiseReturnType DenseBase::colwise() { return ColwiseReturnType(derived()); @@ -684,7 +684,7 @@ DenseBase::colwise() * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting */ template -EIGEN_DEVICE_FUNC inline typename DenseBase::RowwiseReturnType +inline typename DenseBase::RowwiseReturnType DenseBase::rowwise() { return RowwiseReturnType(derived()); diff --git a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h index 6362309..195d40f 100644 --- a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h @@ -183,22 +183,12 @@ template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& } #endif -template<> EIGEN_STRONG_INLINE Packet8f pmin(const Packet8f& a, const Packet8f& b) { - // Arguments are swapped to match NaN propagation behavior of std::min. - return _mm256_min_ps(b,a); -} -template<> EIGEN_STRONG_INLINE Packet4d pmin(const Packet4d& a, const Packet4d& b) { - // Arguments are swapped to match NaN propagation behavior of std::min. - return _mm256_min_pd(b,a); -} -template<> EIGEN_STRONG_INLINE Packet8f pmax(const Packet8f& a, const Packet8f& b) { - // Arguments are swapped to match NaN propagation behavior of std::max. - return _mm256_max_ps(b,a); -} -template<> EIGEN_STRONG_INLINE Packet4d pmax(const Packet4d& a, const Packet4d& b) { - // Arguments are swapped to match NaN propagation behavior of std::max. - return _mm256_max_pd(b,a); -} +template<> EIGEN_STRONG_INLINE Packet8f pmin(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet4d pmin(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); } + +template<> EIGEN_STRONG_INLINE Packet8f pmax(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet4d pmax(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); } + template<> EIGEN_STRONG_INLINE Packet8f pround(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); } template<> EIGEN_STRONG_INLINE Packet4d pround(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); } @@ -235,7 +225,7 @@ template<> EIGEN_STRONG_INLINE Packet8f ploaddup(const float* from) // Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from)); // tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1); // return _mm256_unpacklo_ps(tmp,tmp); - + // _mm256_insertf128_ps is very slow on Haswell, thus: Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from); // mimic an "inplace" permutation of the lower 128bits using a blend diff --git a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h index 12b8975..f6500a1 100644 --- a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -59,8 +59,8 @@ template<> struct packet_traits : default_packet_traits HasLog = 1, #endif HasExp = 1, - HasSqrt = EIGEN_FAST_MATH, - HasRsqrt = EIGEN_FAST_MATH, + HasSqrt = 1, + HasRsqrt = 1, #endif HasDiv = 1 }; @@ -75,7 +75,7 @@ template<> struct packet_traits : default_packet_traits size = 8, HasHalfPacket = 1, #if EIGEN_GNUC_AT_LEAST(5, 3) - HasSqrt = EIGEN_FAST_MATH, + HasSqrt = 1, HasRsqrt = EIGEN_FAST_MATH, #endif HasDiv = 1 @@ -230,27 +230,23 @@ EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b, template <> EIGEN_STRONG_INLINE Packet16f pmin(const Packet16f& a, const Packet16f& b) { - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm512_min_ps(b, a); + return _mm512_min_ps(a, b); } template <> EIGEN_STRONG_INLINE Packet8d pmin(const Packet8d& a, const Packet8d& b) { - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm512_min_pd(b, a); + return _mm512_min_pd(a, b); } template <> EIGEN_STRONG_INLINE Packet16f pmax(const Packet16f& a, const Packet16f& b) { - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm512_max_ps(b, a); + return _mm512_max_ps(a, b); } template <> EIGEN_STRONG_INLINE Packet8d pmax(const Packet8d& a, const Packet8d& b) { - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm512_max_pd(b, a); + return _mm512_max_pd(a, b); } template <> @@ -465,21 +461,53 @@ EIGEN_STRONG_INLINE Packet16i ploadu(const int* from) { // {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7} template <> EIGEN_STRONG_INLINE Packet16f ploaddup(const float* from) { - __m256i low_half = _mm256_load_si256(reinterpret_cast(from)); - __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half)); - __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0)); - return pairs; + Packet8f lane0 = _mm256_broadcast_ps((const __m128*)(const void*)from); + // mimic an "inplace" permutation of the lower 128bits using a blend + lane0 = _mm256_blend_ps( + lane0, _mm256_castps128_ps256(_mm_permute_ps( + _mm256_castps256_ps128(lane0), _MM_SHUFFLE(1, 0, 1, 0))), + 15); + // then we can perform a consistent permutation on the global register to get + // everything in shape: + lane0 = _mm256_permute_ps(lane0, _MM_SHUFFLE(3, 3, 2, 2)); + + Packet8f lane1 = _mm256_broadcast_ps((const __m128*)(const void*)(from + 4)); + // mimic an "inplace" permutation of the lower 128bits using a blend + lane1 = _mm256_blend_ps( + lane1, _mm256_castps128_ps256(_mm_permute_ps( + _mm256_castps256_ps128(lane1), _MM_SHUFFLE(1, 0, 1, 0))), + 15); + // then we can perform a consistent permutation on the global register to get + // everything in shape: + lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2)); + +#ifdef EIGEN_VECTORIZE_AVX512DQ + Packet16f res = _mm512_undefined_ps(); + return _mm512_insertf32x8(res, lane0, 0); + return _mm512_insertf32x8(res, lane1, 1); + return res; +#else + Packet16f res = _mm512_undefined_ps(); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 1), 1); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 0), 2); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 1), 3); + return res; +#endif } // Loads 4 doubles from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, // a3} template <> EIGEN_STRONG_INLINE Packet8d ploaddup(const double* from) { - __m512d x = _mm512_setzero_pd(); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3); - return x; + Packet4d lane0 = _mm256_broadcast_pd((const __m128d*)(const void*)from); + lane0 = _mm256_permute_pd(lane0, 3 << 2); + + Packet4d lane1 = _mm256_broadcast_pd((const __m128d*)(const void*)(from + 2)); + lane1 = _mm256_permute_pd(lane1, 3 << 2); + + Packet8d res = _mm512_undefined_pd(); + res = _mm512_insertf64x4(res, lane0, 0); + return _mm512_insertf64x4(res, lane1, 1); } // Loads 4 floats from memory a returns the packet @@ -497,11 +525,11 @@ EIGEN_STRONG_INLINE Packet16f ploadquad(const float* from) { // {a0, a0 a0, a0, a1, a1, a1, a1} template <> EIGEN_STRONG_INLINE Packet8d ploadquad(const double* from) { - __m128d tmp0 = _mm_load_pd1(from); - __m256d lane0 = _mm256_broadcastsd_pd(tmp0); - __m128d tmp1 = _mm_load_pd1(from + 1); - __m256d lane1 = _mm256_broadcastsd_pd(tmp1); - __m512d tmp = _mm512_undefined_pd(); + Packet8d tmp = _mm512_undefined_pd(); + Packet2d tmp0 = _mm_load_pd1(from); + Packet2d tmp1 = _mm_load_pd1(from + 1); + Packet4d lane0 = _mm256_broadcastsd_pd(tmp0); + Packet4d lane1 = _mm256_broadcastsd_pd(tmp1); tmp = _mm512_insertf64x4(tmp, lane0, 0); return _mm512_insertf64x4(tmp, lane1, 1); } @@ -632,8 +660,8 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) { #ifdef EIGEN_VECTORIZE_AVX512DQ // AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512 #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \ - __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0); \ - __m256 OUTPUT##_1 = _mm512_extractf32x8_ps(INPUT, 1) + __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0) __m256 OUTPUT##_1 = \ + _mm512_extractf32x8_ps(INPUT, 1) #else #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \ __m256 OUTPUT##_0 = _mm256_insertf128_ps( \ @@ -723,7 +751,7 @@ vecs) blend1 = _mm256_blend_ps(sum1, sum2, 0xcc); blend2 = _mm256_blend_ps(sum3, sum4, 0xcc); - final = _mm256_add_ps(final, _mm256_blend_ps(blend1, blend2, 0xf0)); + final = padd(final, _mm256_blend_ps(blend1, blend2, 0xf0)); hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0); hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0); @@ -773,7 +801,7 @@ vecs) blend1 = _mm256_blend_ps(sum1, sum2, 0xcc); blend2 = _mm256_blend_ps(sum3, sum4, 0xcc); - final_1 = _mm256_add_ps(final_1, _mm256_blend_ps(blend1, blend2, 0xf0)); + final_1 = padd(final_1, _mm256_blend_ps(blend1, blend2, 0xf0)); __m512 final_output; @@ -823,7 +851,7 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp(const Packet8d* vecs) tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1); tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1)); - final_0 = _mm256_add_pd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC)); + final_0 = padd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC)); tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0); tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1)); @@ -839,7 +867,7 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp(const Packet8d* vecs) tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1); tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1)); - final_1 = _mm256_add_pd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC)); + final_1 = padd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC)); __m512d final_output = _mm512_insertf64x4(final_output, final_0, 0); @@ -848,52 +876,55 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp(const Packet8d* vecs) template <> EIGEN_STRONG_INLINE float predux(const Packet16f& a) { -#ifdef EIGEN_VECTORIZE_AVX512DQ - __m256 lane0 = _mm512_extractf32x8_ps(a, 0); - __m256 lane1 = _mm512_extractf32x8_ps(a, 1); - Packet8f x = _mm256_add_ps(lane0, lane1); - return predux(x); + //#ifdef EIGEN_VECTORIZE_AVX512DQ +#if 0 + Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); + Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); + Packet8f sum = padd(lane0, lane1); + Packet8f tmp0 = _mm256_hadd_ps(sum, _mm256_permute2f128_ps(a, a, 1)); + tmp0 = _mm256_hadd_ps(tmp0, tmp0); + return pfirst(_mm256_hadd_ps(tmp0, tmp0)); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f sum = padd(padd(lane0, lane1), padd(lane2, lane3)); sum = _mm_hadd_ps(sum, sum); sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1)); - return _mm_cvtss_f32(sum); + return pfirst(sum); #endif } template <> EIGEN_STRONG_INLINE double predux(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d sum = _mm256_add_pd(lane0, lane1); - __m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1)); - return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0))); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d sum = padd(lane0, lane1); + Packet4d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1)); + return pfirst(_mm256_hadd_pd(tmp0, tmp0)); } template <> EIGEN_STRONG_INLINE Packet8f predux_downto4(const Packet16f& a) { #ifdef EIGEN_VECTORIZE_AVX512DQ - __m256 lane0 = _mm512_extractf32x8_ps(a, 0); - __m256 lane1 = _mm512_extractf32x8_ps(a, 1); - return _mm256_add_ps(lane0, lane1); + Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); + Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); + return padd(lane0, lane1); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 sum0 = _mm_add_ps(lane0, lane2); - __m128 sum1 = _mm_add_ps(lane1, lane3); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f sum0 = padd(lane0, lane2); + Packet4f sum1 = padd(lane1, lane3); return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1); #endif } template <> EIGEN_STRONG_INLINE Packet4d predux_downto4(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_add_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = padd(lane0, lane1); return res; } @@ -908,59 +939,58 @@ EIGEN_STRONG_INLINE float predux_mul(const Packet16f& a) { res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = pmul(pmul(lane0, lane1), pmul(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = pmul(pmul(lane0, lane1), pmul(lane2, lane3)); res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); #endif } template <> EIGEN_STRONG_INLINE double predux_mul(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = pmul(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = pmul(lane0, lane1); res = pmul(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1))); } template <> EIGEN_STRONG_INLINE float predux_min(const Packet16f& a) { - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3)); res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); } template <> EIGEN_STRONG_INLINE double predux_min(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_min_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = _mm256_min_pd(lane0, lane1); res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1))); } template <> EIGEN_STRONG_INLINE float predux_max(const Packet16f& a) { - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3)); res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); } - template <> EIGEN_STRONG_INLINE double predux_max(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_max_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = _mm256_max_pd(lane0, lane1); res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1))); } diff --git a/eigen/Eigen/src/Core/arch/CUDA/Half.h b/eigen/Eigen/src/Core/arch/CUDA/Half.h index 67518da..294c517 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/Half.h +++ b/eigen/Eigen/src/Core/arch/CUDA/Half.h @@ -53,7 +53,7 @@ namespace half_impl { // Make our own __half definition that is similar to CUDA's. struct __half { - EIGEN_DEVICE_FUNC __half() : x(0) {} + EIGEN_DEVICE_FUNC __half() {} explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {} unsigned short x; }; @@ -386,18 +386,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) { return result; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 - return half(hexp(a)); -#else - return half(::expf(float(a))); -#endif -} -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { - return half(numext::expm1(float(a))); + return half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 - return half(::hlog(a)); + return Eigen::half(::hlog(a)); #else return half(::logf(float(a))); #endif @@ -409,11 +402,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 - return half(hsqrt(a)); -#else - return half(::sqrtf(float(a))); -#endif + return half(::sqrtf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { return half(::powf(float(a), float(b))); @@ -431,18 +420,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 - return half(hfloor(a)); -#else return half(::floorf(float(a))); -#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 - return half(hceil(a)); -#else return half(::ceilf(float(a))); -#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { @@ -493,9 +474,59 @@ template<> struct is_arithmetic { enum { value = true }; }; } // end namespace internal +} // end namespace Eigen + +namespace std { +template<> +struct numeric_limits { + static const bool is_specialized = true; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const bool has_infinity = true; + static const bool has_quiet_NaN = true; + static const bool has_signaling_NaN = true; + static const float_denorm_style has_denorm = denorm_present; + static const bool has_denorm_loss = false; + static const std::float_round_style round_style = std::round_to_nearest; + static const bool is_iec559 = false; + static const bool is_bounded = false; + static const bool is_modulo = false; + static const int digits = 11; + static const int digits10 = 2; + //static const int max_digits10 = ; + static const int radix = 2; + static const int min_exponent = -13; + static const int min_exponent10 = -4; + static const int max_exponent = 16; + static const int max_exponent10 = 4; + static const bool traps = true; + static const bool tinyness_before = false; + + static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } + static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } + static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } + static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } + static Eigen::half round_error() { return Eigen::half(0.5); } + static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } + static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } +}; +} + +namespace Eigen { + template<> struct NumTraits : GenericNumTraits { + enum { + IsSigned = true, + IsInteger = false, + IsComplex = false, + RequireInitialization = false + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { return half_impl::raw_uint16_to_half(0x0800); } diff --git a/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h b/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h index 987a529..0348b41 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -56,18 +56,6 @@ double2 pexp(const double2& a) return make_double2(exp(a.x), exp(a.y)); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pexpm1(const float4& a) -{ - return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pexpm1(const double2& a) -{ - return make_double2(expm1(a.x), expm1(a.y)); -} - template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psqrt(const float4& a) { diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h index 8c46af0..4dda631 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -167,10 +167,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu(const d return make_double2(from[0], from[1]); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup(const float* from) { +template<> EIGEN_STRONG_INLINE float4 ploaddup(const float* from) { return make_float4(from[0], from[0], from[1], from[1]); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup(const double* from) { +template<> EIGEN_STRONG_INLINE double2 ploaddup(const double* from) { return make_double2(from[0], from[0]); } diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index b9a125b..ae54225 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -34,7 +34,6 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasExp = 1, - HasExpm1 = 1, HasLog = 1, HasLog1p = 1 }; @@ -276,14 +275,6 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { return __floats2half2_rn(r1, r2); } -template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1(const half2& a) { - float a1 = __low2float(a); - float a2 = __high2float(a); - float r1 = expm1f(a1); - float r2 = expm1f(a2); - return __floats2half2_rn(r1, r2); -} - #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 template<> __device__ EIGEN_STRONG_INLINE diff --git a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h index aede4a6..836fbc0 100644 --- a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h @@ -116,7 +116,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1(const int32_t& from) template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { - const float32_t f[] = {0, 1, 2, 3}; + const float f[] = {0, 1, 2, 3}; Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1(a), countdown); } diff --git a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h index 03c8a2c..3832de1 100644 --- a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h @@ -45,7 +45,7 @@ struct eigen_packet_wrapper m_val = v; return *this; } - + T m_val; }; typedef eigen_packet_wrapper<__m128> Packet4f; @@ -69,7 +69,7 @@ template<> struct is_arithmetic<__m128d> { enum { value = true }; }; #define vec2d_swizzle1(v,p,q) \ (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2))))) - + #define vec4f_swizzle2(a,b,p,q,r,s) \ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p)))) @@ -190,7 +190,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pload1(const float *from) { return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0); } #endif - + template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return _mm_add_ps(pset1(a), _mm_set_ps(3,2,1,0)); } template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { return _mm_add_pd(pset1(a),_mm_set_pd(1,0)); } template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return _mm_add_epi32(pset1(a),_mm_set_epi32(3,2,1,0)); } @@ -250,34 +250,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); } #endif -template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_min_ps, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet4f res = b; - asm("minps %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm_min_ps(b, a); -#endif -} -template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_min_pd, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet2d res = b; - asm("minpd %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm_min_pd(b, a); -#endif -} +template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { #ifdef EIGEN_VECTORIZE_SSE4_1 @@ -289,34 +263,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const #endif } -template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_max_ps, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet4f res = b; - asm("maxps %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm_max_ps(b, a); -#endif -} -template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_max_pd, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet2d res = b; - asm("maxpd %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm_max_pd(b, a); -#endif -} +template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { #ifdef EIGEN_VECTORIZE_SSE4_1 diff --git a/eigen/Eigen/src/Core/functors/NullaryFunctors.h b/eigen/Eigen/src/Core/functors/NullaryFunctors.h index 6a30466..b03be02 100644 --- a/eigen/Eigen/src/Core/functors/NullaryFunctors.h +++ b/eigen/Eigen/src/Core/functors/NullaryFunctors.h @@ -44,16 +44,16 @@ struct linspaced_op_impl { linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), - m_interPacket(plset(0)), m_flip(numext::abs(high) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { + typedef typename NumTraits::Real RealScalar; if(m_flip) - return (i==0)? m_low : (m_high - (m_size1-i)*m_step); + return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step); else - return (i==m_size1)? m_high : (m_low + i*m_step); + return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step); } template @@ -63,7 +63,7 @@ struct linspaced_op_impl // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) if(m_flip) { - Packet pi = padd(pset1(Scalar(i-m_size1)),m_interPacket); + Packet pi = plset(Scalar(i-m_size1)); Packet res = padd(pset1(m_high), pmul(pset1(m_step), pi)); if(i==0) res = pinsertfirst(res, m_low); @@ -71,7 +71,7 @@ struct linspaced_op_impl } else { - Packet pi = padd(pset1(Scalar(i)),m_interPacket); + Packet pi = plset(Scalar(i)); Packet res = padd(pset1(m_low), pmul(pset1(m_step), pi)); if(i==m_size1-unpacket_traits::size+1) res = pinsertlast(res, m_high); @@ -83,7 +83,6 @@ struct linspaced_op_impl const Scalar m_high; const Index m_size1; const Scalar m_step; - const Packet m_interPacket; const bool m_flip; }; diff --git a/eigen/Eigen/src/Core/functors/UnaryFunctors.h b/eigen/Eigen/src/Core/functors/UnaryFunctors.h index bfc0465..2e6a00f 100644 --- a/eigen/Eigen/src/Core/functors/UnaryFunctors.h +++ b/eigen/Eigen/src/Core/functors/UnaryFunctors.h @@ -262,26 +262,6 @@ struct functor_traits > { }; }; -/** \internal - * - * \brief Template functor to compute the exponential of a scalar - 1. - * - * \sa class CwiseUnaryOp, ArrayBase::expm1() - */ -template struct scalar_expm1_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_expm1_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::expm1(a); } - template - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexpm1(a); } -}; -template -struct functor_traits > { - enum { - PacketAccess = packet_traits::HasExpm1, - Cost = functor_traits >::Cost // TODO measure cost of expm1 - }; -}; - /** \internal * * \brief Template functor to compute the logarithm of a scalar @@ -698,13 +678,7 @@ struct functor_traits > template struct scalar_isnan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isnan(a); -#else - return (numext::isnan)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); } }; template struct functor_traits > @@ -722,13 +696,7 @@ struct functor_traits > template struct scalar_isinf_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isinf(a); -#else - return (numext::isinf)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); } }; template struct functor_traits > @@ -746,13 +714,7 @@ struct functor_traits > template struct scalar_isfinite_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isfinite(a); -#else - return (numext::isfinite)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); } }; template struct functor_traits > diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index ad38bcf..e844e37 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -269,10 +269,13 @@ struct general_product_to_triangular_selector enum { IsRowMajor = (internal::traits::Flags&RowMajorBit) ? 1 : 0, LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, - RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 + RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0, + SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0 }; Index size = mat.cols(); + if(SkipDiag) + size--; Index depth = actualLhs.cols(); typedef internal::gemm_blocking_space internal::general_matrix_matrix_triangular_product + IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)> ::run(size, depth, - &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), - mat.data(), mat.outerStride(), actualAlpha, blocking); + &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(), + &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(), + mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking); } }; template template -EIGEN_DEVICE_FUNC TriangularView& TriangularViewImpl::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) +TriangularView& TriangularViewImpl::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) { + EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED); eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); - + general_product_to_triangular_selector::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); - + return derived(); } diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 5b7c15c..41e18ff 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product& blocking) \ { \ - if (lhs==rhs) { \ + if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \ general_matrix_matrix_rankupdate \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ } else { \ diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h index 41d8242..3c1a7fc 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2016 Gael Guennebaud +// Copyright (C) 2008-2009 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,8 +15,10 @@ namespace Eigen { namespace internal { /* Optimized col-major matrix * vector product: - * This algorithm processes the matrix per vertical panels, - * which are then processed horizontaly per chunck of 8*PacketSize x 1 vertical segments. + * This algorithm processes 4 columns at onces that allows to both reduce + * the number of load/stores of the result by a factor 4 and to reduce + * the instruction dependency. Moreover, we know that all bands have the + * same alignment pattern. * * Mixing type logic: C += alpha * A * B * | A | B |alpha| comments @@ -25,7 +27,33 @@ namespace internal { * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp * |cplx |real |real | optimal case, vectorization possible via real-cplx mul * + * Accesses to the matrix coefficients follow the following logic: + * + * - if all columns have the same alignment then + * - if the columns have the same alignment as the result vector, then easy! (-> AllAligned case) + * - otherwise perform unaligned loads only (-> NoneAligned case) + * - otherwise + * - if even columns have the same alignment then + * // odd columns are guaranteed to have the same alignment too + * - if even or odd columns have the same alignment as the result, then + * // for a register size of 2 scalars, this is guarantee to be the case (e.g., SSE with double) + * - perform half aligned and half unaligned loads (-> EvenAligned case) + * - otherwise perform unaligned loads only (-> NoneAligned case) + * - otherwise, if the register size is 4 scalars (e.g., SSE with float) then + * - one over 4 consecutive columns is guaranteed to be aligned with the result vector, + * perform simple aligned loads for this column and aligned loads plus re-alignment for the other. (-> FirstAligned case) + * // this re-alignment is done by the palign function implemented for SSE in Eigen/src/Core/arch/SSE/PacketMath.h + * - otherwise, + * // if we get here, this means the register size is greater than 4 (e.g., AVX with floats), + * // we currently fall back to the NoneAligned case + * * The same reasoning apply for the transposed case. + * + * The last case (PacketSize>4) could probably be improved by generalizing the FirstAligned case, but since we do not support AVX yet... + * One might also wonder why in the EvenAligned case we perform unaligned loads instead of using the aligned-loads plus re-alignment + * strategy as in the FirstAligned case. The reason is that we observed that unaligned loads on a 8 byte boundary are not too slow + * compared to unaligned loads on a 4 byte boundary. + * */ template struct general_matrix_vector_product @@ -59,145 +87,238 @@ EIGEN_DONT_INLINE static void run( template EIGEN_DONT_INLINE void general_matrix_vector_product::run( Index rows, Index cols, - const LhsMapper& alhs, + const LhsMapper& lhs, const RhsMapper& rhs, ResScalar* res, Index resIncr, RhsScalar alpha) { EIGEN_UNUSED_VARIABLE(resIncr); eigen_internal_assert(resIncr==1); - - // The following copy tells the compiler that lhs's attributes are not modified outside this function - // This helps GCC to generate propoer code. - LhsMapper lhs(alhs); + #ifdef _EIGEN_ACCUMULATE_PACKETS + #error _EIGEN_ACCUMULATE_PACKETS has already been defined + #endif + #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) \ + pstore(&res[j], \ + padd(pload(&res[j]), \ + padd( \ + padd(pcj.pmul(lhs0.template load(j), ptmp0), \ + pcj.pmul(lhs1.template load(j), ptmp1)), \ + padd(pcj.pmul(lhs2.template load(j), ptmp2), \ + pcj.pmul(lhs3.template load(j), ptmp3)) ))) + + typedef typename LhsMapper::VectorMapper LhsScalars; conj_helper cj; conj_helper pcj; + if(ConjugateRhs) + alpha = numext::conj(alpha); + + enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned }; + const Index columnsAtOnce = 4; + const Index peels = 2; + const Index LhsPacketAlignedMask = LhsPacketSize-1; + const Index ResPacketAlignedMask = ResPacketSize-1; +// const Index PeelAlignedMask = ResPacketSize*peels-1; + const Index size = rows; + const Index lhsStride = lhs.stride(); - // TODO: for padded aligned inputs, we could enable aligned reads - enum { LhsAlignment = Unaligned }; - const Index n8 = rows-8*ResPacketSize+1; - const Index n4 = rows-4*ResPacketSize+1; - const Index n3 = rows-3*ResPacketSize+1; - const Index n2 = rows-2*ResPacketSize+1; - const Index n1 = rows-1*ResPacketSize+1; + // How many coeffs of the result do we have to skip to be aligned. + // Here we assume data are at least aligned on the base scalar type. + Index alignedStart = internal::first_default_aligned(res,size); + Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0; + const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1; + + const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0; + Index alignmentPattern = alignmentStep==0 ? AllAligned + : alignmentStep==(LhsPacketSize/2) ? EvenAligned + : FirstAligned; - // TODO: improve the following heuristic: - const Index block_cols = cols<128 ? cols : (lhsStride*sizeof(LhsScalar)<32000?16:4); - ResPacket palpha = pset1(alpha); + // we cannot assume the first element is aligned because of sub-matrices + const Index lhsAlignmentOffset = lhs.firstAligned(size); - for(Index j2=0; j2 4) + { + // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4. + // Currently, it seems to be better to perform unaligned loads anyway + alignmentPattern = NoneAligned; + } + else if (LhsPacketSize>1) + { + // eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size(ResScalar(0)), - c1 = pset1(ResScalar(0)), - c2 = pset1(ResScalar(0)), - c3 = pset1(ResScalar(0)), - c4 = pset1(ResScalar(0)), - c5 = pset1(ResScalar(0)), - c6 = pset1(ResScalar(0)), - c7 = pset1(ResScalar(0)); - - for(Index j=j2; j(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load(i+LhsPacketSize*2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load(i+LhsPacketSize*3,j),b0,c3); - c4 = pcj.pmadd(lhs.template load(i+LhsPacketSize*4,j),b0,c4); - c5 = pcj.pmadd(lhs.template load(i+LhsPacketSize*5,j),b0,c5); - c6 = pcj.pmadd(lhs.template load(i+LhsPacketSize*6,j),b0,c6); - c7 = pcj.pmadd(lhs.template load(i+LhsPacketSize*7,j),b0,c7); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu(res+i+ResPacketSize*2))); - pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu(res+i+ResPacketSize*3))); - pstoreu(res+i+ResPacketSize*4, pmadd(c4,palpha,ploadu(res+i+ResPacketSize*4))); - pstoreu(res+i+ResPacketSize*5, pmadd(c5,palpha,ploadu(res+i+ResPacketSize*5))); - pstoreu(res+i+ResPacketSize*6, pmadd(c6,palpha,ploadu(res+i+ResPacketSize*6))); - pstoreu(res+i+ResPacketSize*7, pmadd(c7,palpha,ploadu(res+i+ResPacketSize*7))); + // nothing can be aligned, no need to skip any column + alignmentPattern = NoneAligned; + skipColumns = 0; } - if(i(ResScalar(0)), - c1 = pset1(ResScalar(0)), - c2 = pset1(ResScalar(0)), - c3 = pset1(ResScalar(0)); + skipColumns = (std::min)(skipColumns,cols); + // note that the skiped columns are processed later. + } - for(Index j=j2; j(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load(i+LhsPacketSize*2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load(i+LhsPacketSize*3,j),b0,c3); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu(res+i+ResPacketSize*2))); - pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu(res+i+ResPacketSize*3))); + /* eigen_internal_assert( (alignmentPattern==NoneAligned) + || (skipColumns + columnsAtOnce >= cols) + || LhsPacketSize > size + || (size_t(firstLhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);*/ + } + else if(Vectorizable) + { + alignedStart = 0; + alignedSize = size; + alignmentPattern = AllAligned; + } - i+=ResPacketSize*4; - } - if(i(ResScalar(0)), - c1 = pset1(ResScalar(0)), - c2 = pset1(ResScalar(0)); + const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; + const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; - for(Index j=j2; j(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load(i+LhsPacketSize*2,j),b0,c2); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu(res+i+ResPacketSize*2))); + Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; + for (Index i=skipColumns; i(alpha*rhs(i, 0)), + ptmp1 = pset1(alpha*rhs(i+offset1, 0)), + ptmp2 = pset1(alpha*rhs(i+2, 0)), + ptmp3 = pset1(alpha*rhs(i+offset3, 0)); - i+=ResPacketSize*3; - } - if(i(ResScalar(0)), - c1 = pset1(ResScalar(0)); + /* explicit vectorization */ + // process initial unaligned coeffs + for (Index j=0; jalignedStart) { - RhsPacket b0 = pset1(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+LhsPacketSize*1,j),b0,c1); + switch(alignmentPattern) + { + case AllAligned: + for (Index j = alignedStart; j1) + { + LhsPacket A00, A01, A02, A03, A10, A11, A12, A13; + ResPacket T0, T1; + + A01 = lhs1.template load(alignedStart-1); + A02 = lhs2.template load(alignedStart-2); + A03 = lhs3.template load(alignedStart-3); + + for (; j(j-1+LhsPacketSize); palign<1>(A01,A11); + A12 = lhs2.template load(j-2+LhsPacketSize); palign<2>(A02,A12); + A13 = lhs3.template load(j-3+LhsPacketSize); palign<3>(A03,A13); + + A00 = lhs0.template load(j); + A10 = lhs0.template load(j+LhsPacketSize); + T0 = pcj.pmadd(A00, ptmp0, pload(&res[j])); + T1 = pcj.pmadd(A10, ptmp0, pload(&res[j+ResPacketSize])); + + T0 = pcj.pmadd(A01, ptmp1, T0); + A01 = lhs1.template load(j-1+2*LhsPacketSize); palign<1>(A11,A01); + T0 = pcj.pmadd(A02, ptmp2, T0); + A02 = lhs2.template load(j-2+2*LhsPacketSize); palign<2>(A12,A02); + T0 = pcj.pmadd(A03, ptmp3, T0); + pstore(&res[j],T0); + A03 = lhs3.template load(j-3+2*LhsPacketSize); palign<3>(A13,A03); + T1 = pcj.pmadd(A11, ptmp1, T1); + T1 = pcj.pmadd(A12, ptmp2, T1); + T1 = pcj.pmadd(A13, ptmp3, T1); + pstore(&res[j+ResPacketSize],T1); + } + } + for (; j(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu(res+i+ResPacketSize*1))); - i+=ResPacketSize*2; + } // end explicit vectorization + + /* process remaining coeffs (or all if there is no explicit vectorization) */ + for (Index j=alignedSize; j(ResScalar(0)); - for(Index j=j2; j(alpha*rhs(k, 0)); + const LhsScalars lhs0 = lhs.getVectorMapper(0, k); + + if (Vectorizable) { - RhsPacket b0 = pset1(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load(i+0,j),b0,c0); + /* explicit vectorization */ + // process first unaligned result's coeffs + for (Index j=0; j(alignedStart)) + for (Index i = alignedStart;i(i), ptmp0, pload(&res[i]))); + else + for (Index i = alignedStart;i(i), ptmp0, pload(&res[i]))); } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu(res+i+ResPacketSize*0))); - i+=ResPacketSize; + + // process remaining scalars (or all if no explicit vectorization) + for (Index i=alignedSize; i EIGEN_DONT_INLINE void general_matrix_vector_product::run( Index rows, Index cols, - const LhsMapper& alhs, + const LhsMapper& lhs, const RhsMapper& rhs, ResScalar* res, Index resIncr, ResScalar alpha) { - // The following copy tells the compiler that lhs's attributes are not modified outside this function - // This helps GCC to generate propoer code. - LhsMapper lhs(alhs); - eigen_internal_assert(rhs.stride()==1); + + #ifdef _EIGEN_ACCUMULATE_PACKETS + #error _EIGEN_ACCUMULATE_PACKETS has already been defined + #endif + + #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) {\ + RhsPacket b = rhs.getVectorMapper(j, 0).template load(0); \ + ptmp0 = pcj.pmadd(lhs0.template load(j), b, ptmp0); \ + ptmp1 = pcj.pmadd(lhs1.template load(j), b, ptmp1); \ + ptmp2 = pcj.pmadd(lhs2.template load(j), b, ptmp2); \ + ptmp3 = pcj.pmadd(lhs3.template load(j), b, ptmp3); } + conj_helper cj; conj_helper pcj; - // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large, - // processing 8 rows at once might be counter productive wrt cache. - const Index n8 = lhs.stride()*sizeof(LhsScalar)>32000 ? 0 : rows-7; - const Index n4 = rows-3; - const Index n2 = rows-1; + typedef typename LhsMapper::VectorMapper LhsScalars; - // TODO: for padded aligned inputs, we could enable aligned reads - enum { LhsAlignment = Unaligned }; + enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 }; + const Index rowsAtOnce = 4; + const Index peels = 2; + const Index RhsPacketAlignedMask = RhsPacketSize-1; + const Index LhsPacketAlignedMask = LhsPacketSize-1; + const Index depth = cols; + const Index lhsStride = lhs.stride(); - Index i=0; - for(; i1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0; + const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1; + + const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0; + Index alignmentPattern = alignmentStep==0 ? AllAligned + : alignmentStep==(LhsPacketSize/2) ? EvenAligned + : FirstAligned; + + // we cannot assume the first element is aligned because of sub-matrices + const Index lhsAlignmentOffset = lhs.firstAligned(depth); + const Index rhsAlignmentOffset = rhs.firstAligned(rows); + + // find how many rows do we have to skip to be aligned with rhs (if possible) + Index skipRows = 0; + // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) + if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || + (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == depth) || + (rhsAlignmentOffset < 0) || (rhsAlignmentOffset == rows) ) { - ResPacket c0 = pset1(ResScalar(0)), - c1 = pset1(ResScalar(0)), - c2 = pset1(ResScalar(0)), - c3 = pset1(ResScalar(0)), - c4 = pset1(ResScalar(0)), - c5 = pset1(ResScalar(0)), - c6 = pset1(ResScalar(0)), - c7 = pset1(ResScalar(0)); - - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) - { - RhsPacket b0 = rhs.template load(j,0); - - c0 = pcj.pmadd(lhs.template load(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load(i+2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load(i+3,j),b0,c3); - c4 = pcj.pmadd(lhs.template load(i+4,j),b0,c4); - c5 = pcj.pmadd(lhs.template load(i+5,j),b0,c5); - c6 = pcj.pmadd(lhs.template load(i+6,j),b0,c6); - c7 = pcj.pmadd(lhs.template load(i+7,j),b0,c7); - } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - ResScalar cc2 = predux(c2); - ResScalar cc3 = predux(c3); - ResScalar cc4 = predux(c4); - ResScalar cc5 = predux(c5); - ResScalar cc6 = predux(c6); - ResScalar cc7 = predux(c7); - for(; j 4) { - ResPacket c0 = pset1(ResScalar(0)), - c1 = pset1(ResScalar(0)), - c2 = pset1(ResScalar(0)), - c3 = pset1(ResScalar(0)); + // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4. + alignmentPattern = NoneAligned; + } + else if (LhsPacketSize>1) + { + // eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth(j,0); - - c0 = pcj.pmadd(lhs.template load(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load(i+2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load(i+3,j),b0,c3); + // nothing can be aligned, no need to skip any column + alignmentPattern = NoneAligned; + skipRows = 0; } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - ResScalar cc2 = predux(c2); - ResScalar cc3 = predux(c3); - for(; j= rows) + || LhsPacketSize > depth + || (size_t(firstLhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);*/ } - for(; i(ResScalar(0)), - c1 = pset1(ResScalar(0)); + alignedStart = 0; + alignedSize = depth; + alignmentPattern = AllAligned; + } - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) - { - RhsPacket b0 = rhs.template load(j,0); + const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; + const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; - c0 = pcj.pmadd(lhs.template load(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load(i+1,j),b0,c1); - } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - for(; j(ResScalar(0)), ptmp1 = pset1(ResScalar(0)), + ptmp2 = pset1(ResScalar(0)), ptmp3 = pset1(ResScalar(0)); + + // process initial unaligned coeffs + // FIXME this loop get vectorized by the compiler ! + for (Index j=0; jalignedStart) + { + switch(alignmentPattern) + { + case AllAligned: + for (Index j = alignedStart; j1) + { + /* Here we proccess 4 rows with with two peeled iterations to hide + * the overhead of unaligned loads. Moreover unaligned loads are handled + * using special shift/move operations between the two aligned packets + * overlaping the desired unaligned packet. This is *much* more efficient + * than basic unaligned loads. + */ + LhsPacket A01, A02, A03, A11, A12, A13; + A01 = lhs1.template load(alignedStart-1); + A02 = lhs2.template load(alignedStart-2); + A03 = lhs3.template load(alignedStart-3); + + for (; j(0); + A11 = lhs1.template load(j-1+LhsPacketSize); palign<1>(A01,A11); + A12 = lhs2.template load(j-2+LhsPacketSize); palign<2>(A02,A12); + A13 = lhs3.template load(j-3+LhsPacketSize); palign<3>(A03,A13); + + ptmp0 = pcj.pmadd(lhs0.template load(j), b, ptmp0); + ptmp1 = pcj.pmadd(A01, b, ptmp1); + A01 = lhs1.template load(j-1+2*LhsPacketSize); palign<1>(A11,A01); + ptmp2 = pcj.pmadd(A02, b, ptmp2); + A02 = lhs2.template load(j-2+2*LhsPacketSize); palign<2>(A12,A02); + ptmp3 = pcj.pmadd(A03, b, ptmp3); + A03 = lhs3.template load(j-3+2*LhsPacketSize); palign<3>(A13,A03); + + b = rhs.getVectorMapper(j+RhsPacketSize, 0).template load(0); + ptmp0 = pcj.pmadd(lhs0.template load(j+LhsPacketSize), b, ptmp0); + ptmp1 = pcj.pmadd(A11, b, ptmp1); + ptmp2 = pcj.pmadd(A12, b, ptmp2); + ptmp3 = pcj.pmadd(A13, b, ptmp3); + } + } + for (; j(ResScalar(0)); - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) + for (Index i=start; i(j,0); - c0 = pcj.pmadd(lhs.template load(i,j),b0,c0); + EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0); + ResPacket ptmp0 = pset1(tmp0); + const LhsScalars lhs0 = lhs.getVectorMapper(i, 0); + // process first unaligned result's coeffs + // FIXME this loop get vectorized by the compiler ! + for (Index j=0; jalignedStart) + { + // process aligned rhs coeffs + if (lhs0.template aligned(alignedStart)) + for (Index j = alignedStart;j(j), rhs.getVectorMapper(j, 0).template load(0), ptmp0); + else + for (Index j = alignedStart;j(j), rhs.getVectorMapper(j, 0).template load(0), ptmp0); + tmp0 += predux(ptmp0); + } + + // process remaining scalars + // FIXME this loop get vectorized by the compiler ! + for (Index j=alignedSize; j template template -EIGEN_DEVICE_FUNC SelfAdjointView& SelfAdjointView +SelfAdjointView& SelfAdjointView ::rankUpdate(const MatrixBase& u, const Scalar& alpha) { selfadjoint_product_selector::run(_expression().const_cast_derived(), u.derived(), alpha); diff --git a/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h b/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h index d395888..2ae3641 100644 --- a/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +++ b/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -57,7 +57,7 @@ template struct conj_expr_if template template -EIGEN_DEVICE_FUNC SelfAdjointView& SelfAdjointView +SelfAdjointView& SelfAdjointView ::rankUpdate(const MatrixBase& u, const MatrixBase& v, const Scalar& alpha) { typedef internal::blas_traits UBlasTraits; diff --git a/eigen/Eigen/src/Core/util/BlasUtil.h b/eigen/Eigen/src/Core/util/BlasUtil.h index b1791fb..6e6ee11 100644 --- a/eigen/Eigen/src/Core/util/BlasUtil.h +++ b/eigen/Eigen/src/Core/util/BlasUtil.h @@ -222,11 +222,6 @@ class blas_data_mapper { return ploadt(&operator()(i, j)); } - template - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const { - return ploadt(&operator()(i, j)); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { return ploadt(&operator()(i, j)); } diff --git a/eigen/Eigen/src/Core/util/Constants.h b/eigen/Eigen/src/Core/util/Constants.h index 5d37e5d..7587d68 100644 --- a/eigen/Eigen/src/Core/util/Constants.h +++ b/eigen/Eigen/src/Core/util/Constants.h @@ -25,10 +25,6 @@ const int Dynamic = -1; */ const int DynamicIndex = 0xffffff; -/** This value means that the increment to go from one value to another in a sequence is not constant for each step. - */ -const int UndefinedIncr = 0xfffffe; - /** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm(). * The value Infinity there means the L-infinity norm. */ diff --git a/eigen/Eigen/src/Core/util/DisableStupidWarnings.h b/eigen/Eigen/src/Core/util/DisableStupidWarnings.h index 4431f2f..7559e12 100644 --- a/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/eigen/Eigen/src/Core/util/DisableStupidWarnings.h @@ -4,6 +4,7 @@ #ifdef _MSC_VER // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p)) // 4101 - unreferenced local variable + // 4127 - conditional expression is constant // 4181 - qualifier applied to reference type ignored // 4211 - nonstandard extension used : redefined extern to static // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data @@ -19,7 +20,7 @@ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) + #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) @@ -41,9 +42,6 @@ #pragma clang diagnostic push #endif #pragma clang diagnostic ignored "-Wconstant-logical-operand" - #if __clang_major__ >= 3 && __clang_minor__ >= 5 - #pragma clang diagnostic ignored "-Wabsolute-value" - #endif #elif defined __GNUC__ && __GNUC__>=6 diff --git a/eigen/Eigen/src/Core/util/ForwardDeclarations.h b/eigen/Eigen/src/Core/util/ForwardDeclarations.h index 1a48cff..ea10739 100644 --- a/eigen/Eigen/src/Core/util/ForwardDeclarations.h +++ b/eigen/Eigen/src/Core/util/ForwardDeclarations.h @@ -83,7 +83,6 @@ template class ForceAlignedAccess; template class SwapWrapper; template class Block; -template class IndexedView; template class VectorBlock; template class Transpose; diff --git a/eigen/Eigen/src/Core/util/IndexedViewHelper.h b/eigen/Eigen/src/Core/util/IndexedViewHelper.h deleted file mode 100644 index ab01c85..0000000 --- a/eigen/Eigen/src/Core/util/IndexedViewHelper.h +++ /dev/null @@ -1,187 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#ifndef EIGEN_INDEXED_VIEW_HELPER_H -#define EIGEN_INDEXED_VIEW_HELPER_H - -namespace Eigen { - -/** \namespace Eigen::placeholders - * \ingroup Core_Module - * - * Namespace containing symbolic placeholder and identifiers - */ -namespace placeholders { - -namespace internal { -struct symbolic_last_tag {}; -} - -/** \var last - * \ingroup Core_Module - * - * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last element/row/columns - * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&). - * - * This symbolic placeholder support standard arithmetic operation. - * - * A typical usage example would be: - * \code - * using namespace Eigen; - * using Eigen::placeholders::last; - * VectorXd v(n); - * v(seq(2,last-2)).setOnes(); - * \endcode - * - * \sa end - */ -static const Symbolic::SymbolExpr last; - -/** \var end - * \ingroup Core_Module - * - * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last+1 element/row/columns - * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&). - * - * This symbolic placeholder support standard arithmetic operation. - * It is essentially an alias to last+1 - * - * \sa last - */ -#ifdef EIGEN_PARSED_BY_DOXYGEN -static const auto end = last+1; -#else -// Using a FixedExpr<1> expression is important here to make sure the compiler -// can fully optimize the computation starting indices with zero overhead. -static const Symbolic::AddExpr,Symbolic::ValueExpr > > end(last+fix<1>()); -#endif - -} // end namespace placeholders - -namespace internal { - - // Replace symbolic last/end "keywords" by their true runtime value -inline Index eval_expr_given_size(Index x, Index /* size */) { return x; } - -template -FixedInt eval_expr_given_size(FixedInt x, Index /*size*/) { return x; } - -template -Index eval_expr_given_size(const Symbolic::BaseExpr &x, Index size) -{ - return x.derived().eval(placeholders::last=size-1); -} - -// Extract increment/step at compile time -template struct get_compile_time_incr { - enum { value = UndefinedIncr }; -}; - -// Analogue of std::get<0>(x), but tailored for our needs. -template -Index first(const T& x) { return x.first(); } - -// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by MatrixSlice -// The generic implementation is a no-op -template -struct IndexedViewCompatibleType { - typedef T type; -}; - -template -const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) { return x; } - -//-------------------------------------------------------------------------------- -// Handling of a single Index -//-------------------------------------------------------------------------------- - -struct SingleRange { - enum { - SizeAtCompileTime = 1 - }; - SingleRange(Index val) : m_value(val) {} - Index operator[](Index) const { return m_value; } - Index size() const { return 1; } - Index first() const { return m_value; } - Index m_value; -}; - -template<> struct get_compile_time_incr { - enum { value = 1 }; // 1 or 0 ?? -}; - -// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operatro[](int) methods) -template -struct IndexedViewCompatibleType::value>::type> { - // Here we could simply use Array, but maybe it's less work for the compiler to use - // a simpler wrapper as SingleRange - //typedef Eigen::Array type; - typedef SingleRange type; -}; - -template -struct IndexedViewCompatibleType::value>::type> { - typedef SingleRange type; -}; - - -template -typename enable_if::value,SingleRange>::type -makeIndexedViewCompatible(const T& id, Index size, SpecializedType) { - return eval_expr_given_size(id,size); -} - -//-------------------------------------------------------------------------------- -// Handling of all -//-------------------------------------------------------------------------------- - -struct all_t { all_t() {} }; - -// Convert a symbolic 'all' into a usable range type -template -struct AllRange { - enum { SizeAtCompileTime = XprSize }; - AllRange(Index size = XprSize) : m_size(size) {} - Index operator[](Index i) const { return i; } - Index size() const { return m_size.value(); } - Index first() const { return 0; } - variable_if_dynamic m_size; -}; - -template -struct IndexedViewCompatibleType { - typedef AllRange type; -}; - -template -inline AllRange::value> makeIndexedViewCompatible(all_t , XprSizeType size, SpecializedType) { - return AllRange::value>(size); -} - -template struct get_compile_time_incr > { - enum { value = 1 }; -}; - -} // end namespace internal - - -namespace placeholders { - -/** \var all - * \ingroup Core_Module - * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or columns - */ -static const Eigen::internal::all_t all; - -} - -} // end namespace Eigen - -#endif // EIGEN_INDEXED_VIEW_HELPER_H diff --git a/eigen/Eigen/src/Core/util/IntegralConstant.h b/eigen/Eigen/src/Core/util/IntegralConstant.h deleted file mode 100644 index 78a4705..0000000 --- a/eigen/Eigen/src/Core/util/IntegralConstant.h +++ /dev/null @@ -1,270 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#ifndef EIGEN_INTEGRAL_CONSTANT_H -#define EIGEN_INTEGRAL_CONSTANT_H - -namespace Eigen { - -namespace internal { - -template class FixedInt; -template class VariableAndFixedInt; - -/** \internal - * \class FixedInt - * - * This class embeds a compile-time integer \c N. - * - * It is similar to c++11 std::integral_constant but with some additional features - * such as: - * - implicit conversion to int - * - arithmetic and some bitwise operators: -, +, *, /, %, &, | - * - c++98/14 compatibility with fix and fix() syntax to define integral constants. - * - * It is strongly discouraged to directly deal with this class FixedInt. Instances are expcected to - * be created by the user using Eigen::fix or Eigen::fix(). In C++98-11, the former syntax does - * not create a FixedInt instance but rather a point to function that needs to be \em cleaned-up - * using the generic helper: - * \code - * internal::cleanup_index_type::type - * internal::cleanup_index_type::type - * \endcode - * where T can a FixedInt, a pointer to function FixedInt (*)(), or numerous other integer-like representations. - * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. - * - * For convenience, you can extract the compile-time value \c N in a generic way using the following helper: - * \code - * internal::get_fixed_value::value - * \endcode - * that will give you \c N if T equals FixedInt or FixedInt (*)(), and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). - * - * \sa fix, class VariableAndFixedInt - */ -template class FixedInt -{ -public: - static const int value = N; - operator int() const { return value; } - FixedInt() {} - FixedInt( VariableAndFixedInt other) { - EIGEN_ONLY_USED_FOR_DEBUG(other); - eigen_internal_assert(int(other)==N); - } - - FixedInt<-N> operator-() const { return FixedInt<-N>(); } - template - FixedInt operator+( FixedInt) const { return FixedInt(); } - template - FixedInt operator-( FixedInt) const { return FixedInt(); } - template - FixedInt operator*( FixedInt) const { return FixedInt(); } - template - FixedInt operator/( FixedInt) const { return FixedInt(); } - template - FixedInt operator%( FixedInt) const { return FixedInt(); } - template - FixedInt operator|( FixedInt) const { return FixedInt(); } - template - FixedInt operator&( FixedInt) const { return FixedInt(); } - -#if EIGEN_HAS_CXX14 - // Needed in C++14 to allow fix(): - FixedInt operator() () const { return *this; } - - VariableAndFixedInt operator() (int val) const { return VariableAndFixedInt(val); } -#else - FixedInt ( FixedInt (*)() ) {} -#endif - -#if EIGEN_HAS_CXX11 - FixedInt(std::integral_constant) {} -#endif -}; - -/** \internal - * \class VariableAndFixedInt - * - * This class embeds both a compile-time integer \c N and a runtime integer. - * Both values are supposed to be equal unless the compile-time value \c N has a special - * value meaning that the runtime-value should be used. Depending on the context, this special - * value can be either Eigen::Dynamic (for positive quantities) or Eigen::DynamicIndex (for - * quantities that can be negative). - * - * It is the return-type of the function Eigen::fix(int), and most of the time this is the only - * way it is used. It is strongly discouraged to directly deal with instances of VariableAndFixedInt. - * Indeed, in order to write generic code, it is the responsibility of the callee to properly convert - * it to either a true compile-time quantity (i.e. a FixedInt), or to a runtime quantity (e.g., an Index) - * using the following generic helper: - * \code - * internal::cleanup_index_type::type - * internal::cleanup_index_type::type - * \endcode - * where T can be a template instantiation of VariableAndFixedInt or numerous other integer-like representations. - * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. - * - * For convenience, you can also extract the compile-time value \c N using the following helper: - * \code - * internal::get_fixed_value::value - * \endcode - * that will give you \c N if T equals VariableAndFixedInt, and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). - * - * \sa fix(int), class FixedInt - */ -template class VariableAndFixedInt -{ -public: - static const int value = N; - operator int() const { return m_value; } - VariableAndFixedInt(int val) { m_value = val; } -protected: - int m_value; -}; - -template struct get_fixed_value { - static const int value = Default; -}; - -template struct get_fixed_value,Default> { - static const int value = N; -}; - -#if !EIGEN_HAS_CXX14 -template struct get_fixed_value (*)(),Default> { - static const int value = N; -}; -#endif - -template struct get_fixed_value,Default> { - static const int value = N ; -}; - -template -struct get_fixed_value,Default> { - static const int value = N; -}; - -template EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; } -#if !EIGEN_HAS_CXX14 -template EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt (*)()) { return N; } -#endif - -// Cleanup integer/FixedInt/VariableAndFixedInt/etc types: - -// By default, no cleanup: -template struct cleanup_index_type { typedef T type; }; - -// Convert any integral type (e.g., short, int, unsigned int, etc.) to Eigen::Index -template struct cleanup_index_type::value>::type> { typedef Index type; }; - -#if !EIGEN_HAS_CXX14 -// In c++98/c++11, fix is a pointer to function that we better cleanup to a true FixedInt: -template struct cleanup_index_type (*)(), DynamicKey> { typedef FixedInt type; }; -#endif - -// If VariableAndFixedInt does not match DynamicKey, then we turn it to a pure compile-time value: -template struct cleanup_index_type, DynamicKey> { typedef FixedInt type; }; -// If VariableAndFixedInt matches DynamicKey, then we turn it to a pure runtime-value (aka Index): -template struct cleanup_index_type, DynamicKey> { typedef Index type; }; - -#if EIGEN_HAS_CXX11 -template struct cleanup_index_type, DynamicKey> { typedef FixedInt type; }; -#endif - -} // end namespace internal - -#ifndef EIGEN_PARSED_BY_DOXYGEN - -#if EIGEN_HAS_CXX14 -template -static const internal::FixedInt fix{}; -#else -template -inline internal::FixedInt fix() { return internal::FixedInt(); } - -// The generic typename T is mandatory. Otherwise, a code like fix could refer to either the function above or this next overload. -// This way a code like fix can only refer to the previous function. -template -inline internal::VariableAndFixedInt fix(T val) { return internal::VariableAndFixedInt(val); } -#endif - -#else // EIGEN_PARSED_BY_DOXYGEN - -/** \var fix() - * \ingroup Core_Module - * - * This \em identifier permits to construct an object embedding a compile-time integer \c N. - * - * \tparam N the compile-time integer value - * - * It is typically used in conjunction with the Eigen::seq and Eigen::seqN functions to pass compile-time values to them: - * \code - * seqN(10,fix<4>,fix<-3>) // <=> [10 7 4 1] - * \endcode - * - * See also the function fix(int) to pass both a compile-time and runtime value. - * - * In c++14, it is implemented as: - * \code - * template static const internal::FixedInt fix{}; - * \endcode - * where internal::FixedInt is an internal template class similar to - * \c std::integral_constant - * Here, \c fix is thus an object of type \c internal::FixedInt. - * - * In c++98/11, it is implemented as a function: - * \code - * template inline internal::FixedInt fix(); - * \endcode - * Here internal::FixedInt is thus a pointer to function. - * - * If for some reason you want a true object in c++98 then you can write: \code fix() \endcode which is also valid in c++14. - * - * \sa fix(int), seq, seqN - */ -template -static const auto fix(); - -/** \fn fix(int) - * \ingroup Core_Module - * - * This function returns an object embedding both a compile-time integer \c N, and a fallback runtime value \a val. - * - * \tparam N the compile-time integer value - * \param val the fallback runtime integer value - * - * This function is a more general version of the \ref fix identifier/function that can be used in template code - * where the compile-time value could turn out to actually mean "undefined at compile-time". For positive integers - * such as a size or a dimension, this case is identified by Eigen::Dynamic, whereas runtime signed integers - * (e.g., an increment/stride) are identified as Eigen::DynamicIndex. In such a case, the runtime value \a val - * will be used as a fallback. - * - * A typical use case would be: - * \code - * template void foo(const MatrixBase &mat) { - * const int N = Derived::RowsAtCompileTime==Dynamic ? Dynamic : Derived::RowsAtCompileTime/2; - * const int n = mat.rows()/2; - * ... mat( seqN(0,fix(n) ) ...; - * } - * \endcode - * In this example, the function Eigen::seqN knows that the second argument is expected to be a size. - * If the passed compile-time value N equals Eigen::Dynamic, then the proxy object returned by fix will be dissmissed, and converted to an Eigen::Index of value \c n. - * Otherwise, the runtime-value \c n will be dissmissed, and the returned ArithmeticSequence will be of the exact same type as seqN(0,fix) . - * - * \sa fix, seqN, class ArithmeticSequence - */ -template -static const auto fix(int val); - -#endif // EIGEN_PARSED_BY_DOXYGEN - -} // end namespace Eigen - -#endif // EIGEN_INTEGRAL_CONSTANT_H diff --git a/eigen/Eigen/src/Core/util/Macros.h b/eigen/Eigen/src/Core/util/Macros.h index 14ec87d..38d6ddb 100644 --- a/eigen/Eigen/src/Core/util/Macros.h +++ b/eigen/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 3 -#define EIGEN_MINOR_VERSION 90 +#define EIGEN_MINOR_VERSION 4 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -23,7 +23,7 @@ /// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC #ifdef __GNUC__ - #define EIGEN_COMP_GNUC (__GNUC__*10+__GNUC_MINOR__) + #define EIGEN_COMP_GNUC 1 #else #define EIGEN_COMP_GNUC 0 #endif @@ -349,14 +349,6 @@ # define __has_feature(x) 0 #endif -// Some old compilers do not support template specializations like: -// template void foo(const T x[N]); -#if !( EIGEN_COMP_CLANG && ((EIGEN_COMP_CLANG<309) || defined(__apple_build_version__)) || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49) -#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1 -#else -#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0 -#endif - // Upperbound on the C++ version to use. // Expected values are 03, 11, 14, 17, etc. // By default, let's use an arbitrarily large C++ version. @@ -370,11 +362,6 @@ #define EIGEN_HAS_CXX11 0 #endif -#if EIGEN_MAX_CPP_VER>=14 && (defined(__cplusplus) && (__cplusplus > 201103L) || EIGEN_COMP_MSVC >= 1910) -#define EIGEN_HAS_CXX14 1 -#else -#define EIGEN_HAS_CXX14 0 -#endif // Do we support r-value references? #ifndef EIGEN_HAS_RVALUE_REFERENCES @@ -393,8 +380,7 @@ #if EIGEN_MAX_CPP_VER>=11 && \ ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ - || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \ - || (EIGEN_COMP_MSVC >= 1900) || defined(__SYCL_DEVICE_ONLY__)) + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))) #define EIGEN_HAS_C99_MATH 1 #else #define EIGEN_HAS_C99_MATH 0 @@ -413,12 +399,10 @@ // Does the compiler support variadic templates? #ifndef EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ - && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) + && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 -#elif EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(__SYCL_DEVICE_ONLY__) -#define EIGEN_HAS_VARIADIC_TEMPLATES 1 #else #define EIGEN_HAS_VARIADIC_TEMPLATES 0 #endif @@ -427,14 +411,13 @@ // Does the compiler fully support const expressions? (as in c++14) #ifndef EIGEN_HAS_CONSTEXPR -#if defined(__CUDACC__) +#ifdef __CUDACC__ // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) #define EIGEN_HAS_CONSTEXPR 1 #endif #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ - (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) || \ - (EIGEN_COMP_CLANG >= 306 && (__cplusplus > 199711L))) + (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L))) #define EIGEN_HAS_CONSTEXPR 1 #endif @@ -542,8 +525,8 @@ // - static is not very good because it prevents definitions from different object files to be merged. // So static causes the resulting linked executable to be bloated with multiple copies of the same function. // - inline is not perfect either as it unwantedly hints the compiler toward inlining the function. -#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC -#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline +#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline #ifdef NDEBUG # ifndef EIGEN_NO_DEBUG @@ -641,14 +624,6 @@ namespace Eigen { #endif -#if EIGEN_COMP_MSVC - // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362. - // This workaround is ugly, but it does the job. -# define EIGEN_CONST_CONDITIONAL(cond) (void)0, cond -#else -# define EIGEN_CONST_CONDITIONAL(cond) cond -#endif - //------------------------------------------------------------------------------------------ // Static and dynamic alignment control // @@ -878,8 +853,7 @@ namespace Eigen { typedef typename Eigen::internal::ref_selector::type Nested; \ typedef typename Eigen::internal::traits::StorageKind StorageKind; \ typedef typename Eigen::internal::traits::StorageIndex StorageIndex; \ - enum CompileTimeTraits \ - { RowsAtCompileTime = Eigen::internal::traits::RowsAtCompileTime, \ + enum { RowsAtCompileTime = Eigen::internal::traits::RowsAtCompileTime, \ ColsAtCompileTime = Eigen::internal::traits::ColsAtCompileTime, \ Flags = Eigen::internal::traits::Flags, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ diff --git a/eigen/Eigen/src/Core/util/Memory.h b/eigen/Eigen/src/Core/util/Memory.h index 7d90534..c634d7e 100644 --- a/eigen/Eigen/src/Core/util/Memory.h +++ b/eigen/Eigen/src/Core/util/Memory.h @@ -63,7 +63,7 @@ namespace Eigen { namespace internal { -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() { #ifdef EIGEN_EXCEPTIONS @@ -114,7 +114,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = void *previous_aligned = static_cast(original)+previous_offset; if(aligned!=previous_aligned) std::memmove(aligned, previous_aligned, size); - + *(reinterpret_cast(aligned) - 1) = original; return aligned; } @@ -142,7 +142,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); } -#else +#else EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {} #endif @@ -471,8 +471,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index } /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size - */ -template + */ +template inline Index first_multiple(Index size, Index base) { return ((size+base-1)/base)*base; @@ -502,7 +502,7 @@ template struct smart_copy_helper { { std::copy(start, end, target); } }; -// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. +// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. template struct smart_memmove_helper; template void smart_memmove(const T* start, const T* end, T* target) @@ -522,15 +522,15 @@ template struct smart_memmove_helper { template struct smart_memmove_helper { static inline void run(const T* start, const T* end, T* target) - { + { if (UIntPtr(target) < UIntPtr(start)) { std::copy(start, end, target); } - else + else { std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); - std::copy_backward(start, end, target + count); + std::copy_backward(start, end, target + count); } } }; @@ -603,7 +603,7 @@ template void swap(scoped_array &a,scoped_array &b) { std::swap(a.ptr(),b.ptr()); } - + } // end namespace internal /** \internal @@ -622,7 +622,7 @@ template void swap(scoped_array &a,scoped_array &b) * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. */ #ifdef EIGEN_ALLOCA - + #if EIGEN_DEFAULT_ALIGN_BYTES>0 // We always manually re-align the result of EIGEN_ALLOCA. // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. @@ -645,7 +645,7 @@ template void swap(scoped_array &a,scoped_array &b) Eigen::internal::check_size_for_overflow(SIZE); \ TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \ Eigen::internal::aligned_stack_memory_handler EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true) - + #endif @@ -701,7 +701,7 @@ template void swap(scoped_array &a,scoped_array &b) * Example: * \code * // Matrix4f requires 16 bytes alignment: -* std::map< int, Matrix4f, std::less, +* std::map< int, Matrix4f, std::less, * aligned_allocator > > my_map_mat4; * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator: * std::map< int, Vector3f > my_map_vec3; diff --git a/eigen/Eigen/src/Core/util/Meta.h b/eigen/Eigen/src/Core/util/Meta.h index 8de6055..7f63707 100644 --- a/eigen/Eigen/src/Core/util/Meta.h +++ b/eigen/Eigen/src/Core/util/Meta.h @@ -97,22 +97,17 @@ template<> struct is_arithmetic { enum { value = true }; }; template<> struct is_arithmetic { enum { value = true }; }; template<> struct is_arithmetic { enum { value = true }; }; -#if EIGEN_HAS_CXX11 -using std::is_integral; -#else -template struct is_integral { enum { value = false }; }; -template<> struct is_integral { enum { value = true }; }; -template<> struct is_integral { enum { value = true }; }; -template<> struct is_integral { enum { value = true }; }; -template<> struct is_integral { enum { value = true }; }; -template<> struct is_integral { enum { value = true }; }; -template<> struct is_integral { enum { value = true }; }; -template<> struct is_integral { enum { value = true }; }; -template<> struct is_integral { enum { value = true }; }; -template<> struct is_integral { enum { value = true }; }; -template<> struct is_integral { enum { value = true }; }; -#endif - +template struct is_integral { enum { value = false }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; template struct add_const { typedef const T type; }; template struct add_const { typedef T& type; }; @@ -283,59 +278,6 @@ protected: EIGEN_DEVICE_FUNC ~noncopyable() {} }; -/** \internal - * Provides access to the number of elements in the object of as a compile-time constant expression. - * It "returns" Eigen::Dynamic if the size cannot be resolved at compile-time (default). - * - * Similar to std::tuple_size, but more general. - * - * It currently supports: - * - any types T defining T::SizeAtCompileTime - * - plain C arrays as T[N] - * - std::array (c++11) - * - some internal types such as SingleRange and AllRange - * - * The second template parameter eases SFINAE-based specializations. - */ -template struct array_size { - enum { value = Dynamic }; -}; - -template struct array_size::type> { - enum { value = T::SizeAtCompileTime }; -}; - -template struct array_size { - enum { value = N }; -}; -template struct array_size { - enum { value = N }; -}; - -#if EIGEN_HAS_CXX11 -template struct array_size > { - enum { value = N }; -}; -template struct array_size > { - enum { value = N }; -}; -#endif - -/** \internal - * Analogue of the std::size free function. - * It returns the size of the container or view \a x of type \c T - * - * It currently supports: - * - any types T defining a member T::size() const - * - plain C arrays as T[N] - * - */ -template -Index size(const T& x) { return x.size(); } - -template -Index size(const T (&) [N]) { return N; } - /** \internal * Convenient struct to get the result type of a unary or binary functor. * @@ -433,10 +375,10 @@ struct meta_no { char a[2]; }; template struct has_ReturnType { - template static meta_yes testFunctor(C const *, typename C::ReturnType const * = 0); - template static meta_no testFunctor(...); + template static meta_yes testFunctor(typename C::ReturnType const *); + template static meta_no testFunctor(...); - enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; + enum { value = sizeof(testFunctor(0)) == sizeof(meta_yes) }; }; template const T* return_ptr(); diff --git a/eigen/Eigen/src/Core/util/SymbolicIndex.h b/eigen/Eigen/src/Core/util/SymbolicIndex.h deleted file mode 100644 index bb6349e..0000000 --- a/eigen/Eigen/src/Core/util/SymbolicIndex.h +++ /dev/null @@ -1,300 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SYMBOLIC_INDEX_H -#define EIGEN_SYMBOLIC_INDEX_H - -namespace Eigen { - -/** \namespace Eigen::Symbolic - * \ingroup Core_Module - * - * This namespace defines a set of classes and functions to build and evaluate symbolic expressions of scalar type Index. - * Here is a simple example: - * - * \code - * // First step, defines symbols: - * struct x_tag {}; static const Symbolic::SymbolExpr x; - * struct y_tag {}; static const Symbolic::SymbolExpr y; - * struct z_tag {}; static const Symbolic::SymbolExpr z; - * - * // Defines an expression: - * auto expr = (x+3)/y+z; - * - * // And evaluate it: (c++14) - * std::cout << expr.eval(x=6,y=3,z=-13) << "\n"; - * - * // In c++98/11, only one symbol per expression is supported for now: - * auto expr98 = (3-x)/2; - * std::cout << expr98.eval(x=6) << "\n"; - * \endcode - * - * It is currently only used internally to define and minipulate the placeholders::last and placeholders::end symbols in Eigen::seq and Eigen::seqN. - * - */ -namespace Symbolic { - -template class Symbol; -template class NegateExpr; -template class AddExpr; -template class ProductExpr; -template class QuotientExpr; - -// A simple wrapper around an integral value to provide the eval method. -// We could also use a free-function symbolic_eval... -template -class ValueExpr { -public: - ValueExpr(IndexType val) : m_value(val) {} - template - IndexType eval_impl(const T&) const { return m_value; } -protected: - IndexType m_value; -}; - -// Specialization for compile-time value, -// It is similar to ValueExpr(N) but this version helps the compiler to generate better code. -template -class ValueExpr > { -public: - ValueExpr() {} - template - Index eval_impl(const T&) const { return N; } -}; - - -/** \class BaseExpr - * \ingroup Core_Module - * Common base class of any symbolic expressions - */ -template -class BaseExpr -{ -public: - const Derived& derived() const { return *static_cast(this); } - - /** Evaluate the expression given the \a values of the symbols. - * - * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue - * as constructed by SymbolExpr::operator= operator. - * - */ - template - Index eval(const T& values) const { return derived().eval_impl(values); } - -#if EIGEN_HAS_CXX14 - template - Index eval(Types&&... values) const { return derived().eval_impl(std::make_tuple(values...)); } -#endif - - NegateExpr operator-() const { return NegateExpr(derived()); } - - AddExpr > operator+(Index b) const - { return AddExpr >(derived(), b); } - AddExpr > operator-(Index a) const - { return AddExpr >(derived(), -a); } - ProductExpr > operator*(Index a) const - { return ProductExpr >(derived(),a); } - QuotientExpr > operator/(Index a) const - { return QuotientExpr >(derived(),a); } - - friend AddExpr > operator+(Index a, const BaseExpr& b) - { return AddExpr >(b.derived(), a); } - friend AddExpr,ValueExpr<> > operator-(Index a, const BaseExpr& b) - { return AddExpr,ValueExpr<> >(-b.derived(), a); } - friend ProductExpr,Derived> operator*(Index a, const BaseExpr& b) - { return ProductExpr,Derived>(a,b.derived()); } - friend QuotientExpr,Derived> operator/(Index a, const BaseExpr& b) - { return QuotientExpr,Derived>(a,b.derived()); } - - template - AddExpr > > operator+(internal::FixedInt) const - { return AddExpr > >(derived(), ValueExpr >()); } - template - AddExpr > > operator-(internal::FixedInt) const - { return AddExpr > >(derived(), ValueExpr >()); } - template - ProductExpr > > operator*(internal::FixedInt) const - { return ProductExpr > >(derived(),ValueExpr >()); } - template - QuotientExpr > > operator/(internal::FixedInt) const - { return QuotientExpr > >(derived(),ValueExpr >()); } - - template - friend AddExpr > > operator+(internal::FixedInt, const BaseExpr& b) - { return AddExpr > >(b.derived(), ValueExpr >()); } - template - friend AddExpr,ValueExpr > > operator-(internal::FixedInt, const BaseExpr& b) - { return AddExpr,ValueExpr > >(-b.derived(), ValueExpr >()); } - template - friend ProductExpr >,Derived> operator*(internal::FixedInt, const BaseExpr& b) - { return ProductExpr >,Derived>(ValueExpr >(),b.derived()); } - template - friend QuotientExpr >,Derived> operator/(internal::FixedInt, const BaseExpr& b) - { return QuotientExpr > ,Derived>(ValueExpr >(),b.derived()); } - -#if (!EIGEN_HAS_CXX14) - template - AddExpr > > operator+(internal::FixedInt (*)()) const - { return AddExpr > >(derived(), ValueExpr >()); } - template - AddExpr > > operator-(internal::FixedInt (*)()) const - { return AddExpr > >(derived(), ValueExpr >()); } - template - ProductExpr > > operator*(internal::FixedInt (*)()) const - { return ProductExpr > >(derived(),ValueExpr >()); } - template - QuotientExpr > > operator/(internal::FixedInt (*)()) const - { return QuotientExpr > >(derived(),ValueExpr >()); } - - template - friend AddExpr > > operator+(internal::FixedInt (*)(), const BaseExpr& b) - { return AddExpr > >(b.derived(), ValueExpr >()); } - template - friend AddExpr,ValueExpr > > operator-(internal::FixedInt (*)(), const BaseExpr& b) - { return AddExpr,ValueExpr > >(-b.derived(), ValueExpr >()); } - template - friend ProductExpr >,Derived> operator*(internal::FixedInt (*)(), const BaseExpr& b) - { return ProductExpr >,Derived>(ValueExpr >(),b.derived()); } - template - friend QuotientExpr >,Derived> operator/(internal::FixedInt (*)(), const BaseExpr& b) - { return QuotientExpr > ,Derived>(ValueExpr >(),b.derived()); } -#endif - - - template - AddExpr operator+(const BaseExpr &b) const - { return AddExpr(derived(), b.derived()); } - - template - AddExpr > operator-(const BaseExpr &b) const - { return AddExpr >(derived(), -b.derived()); } - - template - ProductExpr operator*(const BaseExpr &b) const - { return ProductExpr(derived(), b.derived()); } - - template - QuotientExpr operator/(const BaseExpr &b) const - { return QuotientExpr(derived(), b.derived()); } -}; - -template -struct is_symbolic { - // BaseExpr has no conversion ctor, so we only have to check whether T can be staticaly cast to its base class BaseExpr. - enum { value = internal::is_convertible >::value }; -}; - -// Specialization for functions, because is_convertible fails in this case. -// Useful in c++98/11 mode when testing is_symbolic)> -template -struct is_symbolic { - enum { value = false }; -}; - -/** Represents the actual value of a symbol identified by its tag - * - * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used. - */ -template -class SymbolValue -{ -public: - /** Default constructor from the value \a val */ - SymbolValue(Index val) : m_value(val) {} - - /** \returns the stored value of the symbol */ - Index value() const { return m_value; } -protected: - Index m_value; -}; - -/** Expression of a symbol uniquely identified by the template parameter type \c tag */ -template -class SymbolExpr : public BaseExpr > -{ -public: - /** Alias to the template parameter \c tag */ - typedef tag Tag; - - SymbolExpr() {} - - /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag. - * - * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified runtime-time value. - */ - SymbolValue operator=(Index val) const { - return SymbolValue(val); - } - - Index eval_impl(const SymbolValue &values) const { return values.value(); } - -#if EIGEN_HAS_CXX14 - // C++14 versions suitable for multiple symbols - template - Index eval_impl(const std::tuple& values) const { return std::get >(values).value(); } -#endif -}; - -template -class NegateExpr : public BaseExpr > -{ -public: - NegateExpr(const Arg0& arg0) : m_arg0(arg0) {} - - template - Index eval_impl(const T& values) const { return -m_arg0.eval_impl(values); } -protected: - Arg0 m_arg0; -}; - -template -class AddExpr : public BaseExpr > -{ -public: - AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - - template - Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) + m_arg1.eval_impl(values); } -protected: - Arg0 m_arg0; - Arg1 m_arg1; -}; - -template -class ProductExpr : public BaseExpr > -{ -public: - ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - - template - Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) * m_arg1.eval_impl(values); } -protected: - Arg0 m_arg0; - Arg1 m_arg1; -}; - -template -class QuotientExpr : public BaseExpr > -{ -public: - QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - - template - Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) / m_arg1.eval_impl(values); } -protected: - Arg0 m_arg0; - Arg1 m_arg1; -}; - -} // end namespace Symbolic - -} // end namespace Eigen - -#endif // EIGEN_SYMBOLIC_INDEX_H diff --git a/eigen/Eigen/src/Core/util/XprHelper.h b/eigen/Eigen/src/Core/util/XprHelper.h index 4b337f2..ba5bd18 100644 --- a/eigen/Eigen/src/Core/util/XprHelper.h +++ b/eigen/Eigen/src/Core/util/XprHelper.h @@ -109,7 +109,6 @@ template class variable_if_dynamic EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return T(Value); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {} }; @@ -120,7 +119,6 @@ template class variable_if_dynamic public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; } }; @@ -673,7 +671,7 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if +template struct scalar_div_cost { enum { value = 8*NumTraits::MulCost }; }; diff --git a/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h b/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h index dbbd480..4fec8af 100644 --- a/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +++ b/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h @@ -85,7 +85,7 @@ MatrixBase::eigenvalues() const * \sa SelfAdjointEigenSolver::eigenvalues(), MatrixBase::eigenvalues() */ template -EIGEN_DEVICE_FUNC inline typename SelfAdjointView::EigenvaluesReturnType +inline typename SelfAdjointView::EigenvaluesReturnType SelfAdjointView::eigenvalues() const { typedef typename SelfAdjointView::PlainObject PlainObject; @@ -149,7 +149,7 @@ MatrixBase::operatorNorm() const * \sa eigenvalues(), MatrixBase::operatorNorm() */ template -EIGEN_DEVICE_FUNC inline typename SelfAdjointView::RealScalar +inline typename SelfAdjointView::RealScalar SelfAdjointView::operatorNorm() const { return eigenvalues().cwiseAbs().maxCoeff(); diff --git a/eigen/Eigen/src/Geometry/AlignedBox.h b/eigen/Eigen/src/Geometry/AlignedBox.h index c902d8f..066eae4 100644 --- a/eigen/Eigen/src/Geometry/AlignedBox.h +++ b/eigen/Eigen/src/Geometry/AlignedBox.h @@ -63,7 +63,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** Default constructor initializing a null box. */ EIGEN_DEVICE_FUNC inline AlignedBox() - { if (EIGEN_CONST_CONDITIONAL(AmbientDimAtCompileTime!=Dynamic)) setEmpty(); } + { if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); } /** Constructs a null box with \a _dim the dimension of the ambient space. */ EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim) diff --git a/eigen/Eigen/src/Geometry/ParametrizedLine.h b/eigen/Eigen/src/Geometry/ParametrizedLine.h index 3929ca8..1e985d8 100644 --- a/eigen/Eigen/src/Geometry/ParametrizedLine.h +++ b/eigen/Eigen/src/Geometry/ParametrizedLine.h @@ -104,44 +104,7 @@ public: template EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const; - /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this. - * - * \param mat the Dim x Dim transformation matrix - * \param traits specifies whether the matrix \a mat represents an #Isometry - * or a more generic #Affine transformation. The default is #Affine. - */ - template - EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const MatrixBase& mat, TransformTraits traits = Affine) - { - if (traits==Affine) - direction() = (mat * direction()).normalized(); - else if (traits==Isometry) - direction() = mat * direction(); - else - { - eigen_assert(0 && "invalid traits value in ParametrizedLine::transform()"); - } - origin() = mat * origin(); - return *this; - } - - /** Applies the transformation \a t to \c *this and returns a reference to \c *this. - * - * \param t the transformation of dimension Dim - * \param traits specifies whether the transformation \a t represents an #Isometry - * or a more generic #Affine transformation. The default is #Affine. - * Other kind of transformations are not supported. - */ - template - EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const Transform& t, - TransformTraits traits = Affine) - { - transform(t.linear(), traits); - origin() += t.translation(); - return *this; - } - -/** \returns \c *this with scalar type casted to \a NewScalarType + /** \returns \c *this with scalar type casted to \a NewScalarType * * Note that if \a NewScalarType is equal to the current scalar type of \c *this * then this function smartly returns a const reference to \c *this. diff --git a/eigen/Eigen/src/Geometry/Quaternion.h b/eigen/Eigen/src/Geometry/Quaternion.h index f6ef1bc..3e5a9ba 100644 --- a/eigen/Eigen/src/Geometry/Quaternion.h +++ b/eigen/Eigen/src/Geometry/Quaternion.h @@ -423,7 +423,7 @@ typedef Map, Aligned> QuaternionMapAlignedd; // Generic Quaternion * Quaternion product // This product can be specialized for a given architecture via the Arch template argument. namespace internal { -template struct quat_product +template struct quat_product { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion run(const QuaternionBase& a, const QuaternionBase& b){ return Quaternion @@ -446,8 +446,7 @@ QuaternionBase::operator* (const QuaternionBase& other) c EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) return internal::quat_product::Scalar, - EIGEN_PLAIN_ENUM_MIN(internal::traits::Alignment, internal::traits::Alignment)>::run(*this, other); + typename internal::traits::Scalar>::run(*this, other); } /** \sa operator*(Quaternion) */ @@ -672,7 +671,7 @@ EIGEN_DEVICE_FUNC inline Quaternion::Scalar> // Generic conjugate of a Quaternion namespace internal { -template struct quat_conj +template struct quat_conj { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion run(const QuaternionBase& q){ return Quaternion(q.w(),-q.x(),-q.y(),-q.z()); @@ -691,8 +690,7 @@ EIGEN_DEVICE_FUNC inline Quaternion::Scalar> QuaternionBase::conjugate() const { return internal::quat_conj::Scalar, - internal::traits::Alignment>::run(*this); + typename internal::traits::Scalar>::run(*this); } diff --git a/eigen/Eigen/src/Geometry/Transform.h b/eigen/Eigen/src/Geometry/Transform.h index 2d36dfa..3f31ee4 100644 --- a/eigen/Eigen/src/Geometry/Transform.h +++ b/eigen/Eigen/src/Geometry/Transform.h @@ -335,7 +335,7 @@ public: OtherModeIsAffineCompact = OtherMode == int(AffineCompact) }; - if(EIGEN_CONST_CONDITIONAL(ModeIsAffineCompact == OtherModeIsAffineCompact)) + if(ModeIsAffineCompact == OtherModeIsAffineCompact) { // We need the block expression because the code is compiled for all // combinations of transformations and will trigger a compile time error @@ -343,7 +343,7 @@ public: m_matrix.template block(0,0) = other.matrix().template block(0,0); makeAffine(); } - else if(EIGEN_CONST_CONDITIONAL(OtherModeIsAffineCompact)) + else if(OtherModeIsAffineCompact) { typedef typename Transform::MatrixType OtherMatrixType; internal::transform_construct_from_matrix::run(this, other.matrix()); @@ -481,7 +481,7 @@ public: TransformTimeDiagonalReturnType res; res.linear().noalias() = a*b.linear(); res.translation().noalias() = a*b.translation(); - if (EIGEN_CONST_CONDITIONAL(Mode!=int(AffineCompact))) + if (Mode!=int(AffineCompact)) res.matrix().row(Dim) = b.matrix().row(Dim); return res; } @@ -755,7 +755,7 @@ template Transform& Transform::operator=(const QMatrix& other) { EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) + if (Mode == int(AffineCompact)) m_matrix << other.m11(), other.m21(), other.dx(), other.m12(), other.m22(), other.dy(); else @@ -801,7 +801,7 @@ Transform& Transform::operator { check_template_params(); EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) + if (Mode == int(AffineCompact)) m_matrix << other.m11(), other.m21(), other.dx(), other.m12(), other.m22(), other.dy(); else @@ -819,7 +819,7 @@ template QTransform Transform::toQTransform(void) const { EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) + if (Mode == int(AffineCompact)) return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0), m_matrix.coeff(0,1), m_matrix.coeff(1,1), m_matrix.coeff(0,2), m_matrix.coeff(1,2)); @@ -912,7 +912,7 @@ EIGEN_DEVICE_FUNC Transform& Transform::pretranslate(const MatrixBase &other) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - if(EIGEN_CONST_CONDITIONAL(int(Mode)==int(Projective))) + if(int(Mode)==int(Projective)) affine() += other * m_matrix.row(Dim); else translation() += other; diff --git a/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h b/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h index 1a86ff8..f68cab5 100644 --- a/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -16,17 +16,23 @@ namespace Eigen { namespace internal { template -struct quat_product +struct quat_product { + enum { + AAlignment = traits::Alignment, + BAlignment = traits::Alignment, + ResAlignment = traits >::Alignment + }; static inline Quaternion run(const QuaternionBase& _a, const QuaternionBase& _b) { Quaternion res; const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); - __m128 a = _a.coeffs().template packet(0); - __m128 b = _b.coeffs().template packet(0); + __m128 a = _a.coeffs().template packet(0); + __m128 b = _b.coeffs().template packet(0); __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); - pstore(&res.x(), + pstoret( + &res.x(), _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), vec4f_swizzle1(b,1,2,0,0))), @@ -36,14 +42,17 @@ struct quat_product } }; -template -struct quat_conj +template +struct quat_conj { + enum { + ResAlignment = traits >::Alignment + }; static inline Quaternion run(const QuaternionBase& q) { Quaternion res; const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); - pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet(0))); + pstoret(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet::Alignment>(0))); return res; } }; @@ -52,6 +61,9 @@ struct quat_conj template struct cross3_impl { + enum { + ResAlignment = traits::type>::Alignment + }; static inline typename plain_matrix_type::type run(const VectorLhs& lhs, const VectorRhs& rhs) { @@ -60,7 +72,7 @@ struct cross3_impl __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); typename plain_matrix_type::type res; - pstore(&res.x(),_mm_sub_ps(mul1,mul2)); + pstoret(&res.x(),_mm_sub_ps(mul1,mul2)); return res; } }; @@ -68,9 +80,14 @@ struct cross3_impl -template -struct quat_product +template +struct quat_product { + enum { + BAlignment = traits::Alignment, + ResAlignment = traits >::Alignment + }; + static inline Quaternion run(const QuaternionBase& _a, const QuaternionBase& _b) { const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); @@ -78,8 +95,8 @@ struct quat_product Quaternion res; const double* a = _a.coeffs().data(); - Packet2d b_xy = _b.coeffs().template packet(0); - Packet2d b_zw = _b.coeffs().template packet(2); + Packet2d b_xy = _b.coeffs().template packet(0); + Packet2d b_zw = _b.coeffs().template packet(2); Packet2d a_xx = pset1(a[0]); Packet2d a_yy = pset1(a[1]); Packet2d a_zz = pset1(a[2]); @@ -97,9 +114,9 @@ struct quat_product t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); #ifdef EIGEN_VECTORIZE_SSE3 EIGEN_UNUSED_VARIABLE(mask) - pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2))); + pstoret(&res.x(), _mm_addsub_pd(t1, preverse(t2))); #else - pstore(&res.x(), padd(t1, pxor(mask,preverse(t2)))); + pstoret(&res.x(), padd(t1, pxor(mask,preverse(t2)))); #endif /* @@ -111,25 +128,28 @@ struct quat_product t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); #ifdef EIGEN_VECTORIZE_SSE3 EIGEN_UNUSED_VARIABLE(mask) - pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); + pstoret(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); #else - pstore(&res.z(), psub(t1, pxor(mask,preverse(t2)))); + pstoret(&res.z(), psub(t1, pxor(mask,preverse(t2)))); #endif return res; } }; -template -struct quat_conj +template +struct quat_conj { + enum { + ResAlignment = traits >::Alignment + }; static inline Quaternion run(const QuaternionBase& q) { Quaternion res; const __m128d mask0 = _mm_setr_pd(-0.,-0.); const __m128d mask2 = _mm_setr_pd(-0.,0.); - pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet(0))); - pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet(2))); + pstoret(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet::Alignment>(0))); + pstoret(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet::Alignment>(2))); return res; } }; diff --git a/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index 358444a..facdaf8 100644 --- a/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -152,13 +152,28 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar> { // Compute the inverse squared-norm of each column of mat m_invdiag.resize(mat.cols()); - for(Index j=0; j0) - m_invdiag(j) = RealScalar(1)/sum; - else - m_invdiag(j) = RealScalar(1); + m_invdiag.setZero(); + for(Index j=0; jRealScalar(0)) + m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j)); + } + else + { + for(Index j=0; jRealScalar(0)) + m_invdiag(j) = RealScalar(1)/sum; + else + m_invdiag(j) = RealScalar(1); + } } Base::m_isInitialized = true; return *this; diff --git a/eigen/Eigen/src/Jacobi/Jacobi.h b/eigen/Eigen/src/Jacobi/Jacobi.h index d25af8e..c30326e 100644 --- a/eigen/Eigen/src/Jacobi/Jacobi.h +++ b/eigen/Eigen/src/Jacobi/Jacobi.h @@ -302,8 +302,12 @@ template void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x, DenseBase& xpr_y, const JacobiRotation& j) { typedef typename VectorX::Scalar Scalar; - enum { PacketSize = packet_traits::size }; + enum { + PacketSize = packet_traits::size, + OtherPacketSize = packet_traits::size + }; typedef typename packet_traits::type Packet; + typedef typename packet_traits::type OtherPacket; eigen_assert(xpr_x.size() == xpr_y.size()); Index size = xpr_x.size(); Index incrx = xpr_x.derived().innerStride(); @@ -321,6 +325,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x if(VectorX::SizeAtCompileTime == Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (PacketSize == OtherPacketSize) && ((incrx==1 && incry==1) || PacketSize == 1)) { // both vectors are sequentially stored in memory => vectorization @@ -329,9 +334,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x Index alignedStart = internal::first_default_aligned(y, size); Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; - const Packet pc = pset1(c); - const Packet ps = pset1(s); - conj_helper::IsComplex,false> pcj; + const OtherPacket pc = pset1(c); + const OtherPacket ps = pset1(s); + conj_helper::IsComplex,false> pcj; + conj_helper pm; for(Index i=0; i& xpr_x { Packet xi = pload(px); Packet yi = pload(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); px += PacketSize; py += PacketSize; } @@ -365,10 +371,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x Packet xi1 = ploadu(px+PacketSize); Packet yi = pload (py); Packet yi1 = pload (py+PacketSize); - pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1))); - pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); - pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1))); + pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); + pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); px += Peeling*PacketSize; py += Peeling*PacketSize; } @@ -376,8 +382,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x { Packet xi = ploadu(x+peelingEnd); Packet yi = pload (y+peelingEnd); - pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); } } @@ -393,19 +399,21 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase& xpr_x /*** fixed-size vectorized path ***/ else if(VectorX::SizeAtCompileTime != Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (PacketSize == OtherPacketSize) && (EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment)>0)) // FIXME should be compared to the required alignment { - const Packet pc = pset1(c); - const Packet ps = pset1(s); - conj_helper::IsComplex,false> pcj; + const OtherPacket pc = pset1(c); + const OtherPacket ps = pset1(s); + conj_helper::IsComplex,false> pcj; + conj_helper pm; Scalar* EIGEN_RESTRICT px = x; Scalar* EIGEN_RESTRICT py = y; for(Index i=0; i(px); Packet yi = pload(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); px += PacketSize; py += PacketSize; } diff --git a/eigen/Eigen/src/LU/FullPivLU.h b/eigen/Eigen/src/LU/FullPivLU.h index ec61086..03b6af7 100644 --- a/eigen/Eigen/src/LU/FullPivLU.h +++ b/eigen/Eigen/src/LU/FullPivLU.h @@ -411,9 +411,11 @@ template class FullPivLU #ifndef EIGEN_PARSED_BY_DOXYGEN template + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; template + EIGEN_DEVICE_FUNC void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const; #endif diff --git a/eigen/Eigen/src/QR/ColPivHouseholderQR.h b/eigen/Eigen/src/QR/ColPivHouseholderQR.h index d35395d..a7b47d5 100644 --- a/eigen/Eigen/src/QR/ColPivHouseholderQR.h +++ b/eigen/Eigen/src/QR/ColPivHouseholderQR.h @@ -416,6 +416,7 @@ template class ColPivHouseholderQR #ifndef EIGEN_PARSED_BY_DOXYGEN template + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif @@ -505,8 +506,8 @@ void ColPivHouseholderQR::computeInPlace() m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k); } - RealScalar threshold_helper = numext::abs2(m_colNormsUpdated.maxCoeff() * NumTraits::epsilon()) / RealScalar(rows); - RealScalar norm_downdate_threshold = numext::sqrt(NumTraits::epsilon()); + RealScalar threshold_helper = numext::abs2(m_colNormsUpdated.maxCoeff() * NumTraits::epsilon()) / RealScalar(rows); + RealScalar norm_downdate_threshold = numext::sqrt(NumTraits::epsilon()); m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) m_maxpivot = RealScalar(0); @@ -552,12 +553,12 @@ void ColPivHouseholderQR::computeInPlace() // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf // and used in LAPACK routines xGEQPF and xGEQP3. // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html - if (m_colNormsUpdated.coeffRef(j) != 0) { + if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) { RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j); temp = (RealScalar(1) + temp) * (RealScalar(1) - temp); - temp = temp < 0 ? 0 : temp; - RealScalar temp2 = temp * numext::abs2(m_colNormsUpdated.coeffRef(j) / - m_colNormsDirect.coeffRef(j)); + temp = temp < RealScalar(0) ? RealScalar(0) : temp; + RealScalar temp2 = temp * numext::abs2(m_colNormsUpdated.coeffRef(j) / + m_colNormsDirect.coeffRef(j)); if (temp2 <= norm_downdate_threshold) { // The updated norm has become too inaccurate so re-compute the column // norm directly. diff --git a/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h index 13b61fc..34c637b 100644 --- a/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +++ b/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h @@ -367,7 +367,7 @@ class CompleteOrthogonalDecomposition { #ifndef EIGEN_PARSED_BY_DOXYGEN template - void _solve_impl(const RhsType& rhs, DstType& dst) const; + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType& rhs, DstType& dst) const; #endif protected: diff --git a/eigen/Eigen/src/QR/FullPivHouseholderQR.h b/eigen/Eigen/src/QR/FullPivHouseholderQR.h index c31e47c..e489bdd 100644 --- a/eigen/Eigen/src/QR/FullPivHouseholderQR.h +++ b/eigen/Eigen/src/QR/FullPivHouseholderQR.h @@ -392,21 +392,22 @@ template class FullPivHouseholderQR * diagonal coefficient of U. */ RealScalar maxPivot() const { return m_maxpivot; } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + void computeInPlace(); - + MatrixType m_qr; HCoeffsType m_hCoeffs; IntDiagSizeVectorType m_rows_transpositions; diff --git a/eigen/Eigen/src/QR/HouseholderQR.h b/eigen/Eigen/src/QR/HouseholderQR.h index 762b21c..3513d99 100644 --- a/eigen/Eigen/src/QR/HouseholderQR.h +++ b/eigen/Eigen/src/QR/HouseholderQR.h @@ -204,27 +204,28 @@ template class HouseholderQR inline Index rows() const { return m_qr.rows(); } inline Index cols() const { return m_qr.cols(); } - + /** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q. * * For advanced uses only. */ const HCoeffsType& hCoeffs() const { return m_hCoeffs; } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } void computeInPlace(); - + MatrixType m_qr; HCoeffsType m_hCoeffs; RowVectorType m_temp; diff --git a/eigen/Eigen/src/SVD/BDCSVD.h b/eigen/Eigen/src/SVD/BDCSVD.h index 25fca6f..d7a4271 100644 --- a/eigen/Eigen/src/SVD/BDCSVD.h +++ b/eigen/Eigen/src/SVD/BDCSVD.h @@ -77,6 +77,7 @@ public: typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; + typedef typename NumTraits::Literal Literal; enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, @@ -259,7 +260,7 @@ BDCSVD& BDCSVD::compute(const MatrixType& matrix, unsign //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows RealScalar scale = matrix.cwiseAbs().maxCoeff(); - if(scale==RealScalar(0)) scale = RealScalar(1); + if(scale==Literal(0)) scale = Literal(1); MatrixX copy; if (m_isTranspose) copy = matrix.adjoint()/scale; else copy = matrix/scale; @@ -351,13 +352,13 @@ void BDCSVD::structured_update(Block A, co Index k1=0, k2=0; for(Index j=0; j::divide (Index firstCol, Index lastCol, Index firstRowW, l = m_naiveU.row(1).segment(firstCol, k); f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1); } - if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1; + if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1); if (r0::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n); m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal(); ArrayRef diag = m_workspace.head(n); - diag(0) = 0; + diag(0) = Literal(0); // Allocate space for singular values and vectors singVals.resize(n); @@ -590,7 +591,7 @@ void BDCSVD::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec // but others are interleaved and we must ignore them at this stage. // To this end, let's compute a permutation skipping them: Index actual_n = n; - while(actual_n>1 && diag(actual_n-1)==0) --actual_n; + while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n; Index m = 0; // size of the deflated problem for(Index k=0;kconsiderZero) @@ -691,7 +692,7 @@ template typename BDCSVD::RealScalar BDCSVD::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift) { Index m = perm.size(); - RealScalar res = 1; + RealScalar res = Literal(1); for(Index i=0; i::computeSingVals(const ArrayRef& col0, const ArrayRef& d Index n = col0.size(); Index actual_n = n; - while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n; for (Index k = 0; k < n; ++k) { - if (col0(k) == 0 || actual_n==1) + if (col0(k) == Literal(0) || actual_n==1) { // if col0(k) == 0, then entry is deflated, so singular value is on diagonal // if actual_n==1, then the deflated problem is already diagonalized singVals(k) = k==0 ? col0(0) : diag(k); - mus(k) = 0; + mus(k) = Literal(0); shifts(k) = k==0 ? col0(0) : diag(k); continue; } @@ -733,13 +734,13 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d { // Skip deflated singular values Index l = k+1; - while(col0(l)==0) { ++l; eigen_internal_assert(l::computeSingVals(const ArrayRef& col0, const ArrayRef& d << " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0) << " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n"; #endif - RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; + RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right; // measure everything relative to shift Map diagShifted(m_workspace.data()+4*n, n); @@ -785,13 +786,13 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d // rational interpolation: fit a function of the form a / mu + b through the two previous // iterates and use its zero to compute the next iterate - bool useBisection = fPrev*fCur>0; - while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits::epsilon() && !useBisection) + bool useBisection = fPrev*fCur>Literal(0); + while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits::epsilon() * numext::maxi(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits::epsilon() && !useBisection) { ++m_numIters; // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples. - RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev); + RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev); RealScalar b = fCur - a / muCur; // And find mu such that f(mu)==0: RealScalar muZero = -a/b; @@ -803,8 +804,8 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d fCur = fZero; - if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true; - if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true; + if (shift == left && (muCur < Literal(0) || muCur > right - left)) useBisection = true; + if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true; if (abs(fCur)>abs(fPrev)) useBisection = true; } @@ -841,13 +842,13 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; } #endif - eigen_internal_assert(fLeft * fRight < 0); + eigen_internal_assert(fLeft * fRight < Literal(0)); - while (rightShifted - leftShifted > 2 * NumTraits::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted))) + while (rightShifted - leftShifted > Literal(2) * NumTraits::epsilon() * numext::maxi(abs(leftShifted), abs(rightShifted))) { - RealScalar midShifted = (leftShifted + rightShifted) / 2; + RealScalar midShifted = (leftShifted + rightShifted) / Literal(2); fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift); - if (fLeft * fMid < 0) + if (fLeft * fMid < Literal(0)) { rightShifted = midShifted; } @@ -858,7 +859,7 @@ void BDCSVD::computeSingVals(const ArrayRef& col0, const ArrayRef& d } } - muCur = (leftShifted + rightShifted) / 2; + muCur = (leftShifted + rightShifted) / Literal(2); } singVals[k] = shift + muCur; @@ -892,8 +893,8 @@ void BDCSVD::perturbCol0 // The offset permits to skip deflated entries while computing zhat for (Index k = 0; k < n; ++k) { - if (col0(k) == 0) // deflated - zhat(k) = 0; + if (col0(k) == Literal(0)) // deflated + zhat(k) = Literal(0); else { // see equation (3.6) @@ -918,7 +919,7 @@ void BDCSVD::perturbCol0 std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n"; #endif RealScalar tmp = sqrt(prod); - zhat(k) = col0(k) > 0 ? tmp : -tmp; + zhat(k) = col0(k) > Literal(0) ? tmp : -tmp; } } } @@ -934,7 +935,7 @@ void BDCSVD::computeSingVecs for (Index k = 0; k < n; ++k) { - if (zhat(k) == 0) + if (zhat(k) == Literal(0)) { U.col(k) = VectorType::Unit(n+1, k); if (m_compV) V.col(k) = VectorType::Unit(n, k); @@ -947,7 +948,7 @@ void BDCSVD::computeSingVecs Index i = perm(l); U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); } - U(n,k) = 0; + U(n,k) = Literal(0); U.col(k).normalize(); if (m_compV) @@ -958,7 +959,7 @@ void BDCSVD::computeSingVecs Index i = perm(l); V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); } - V(0,k) = -1; + V(0,k) = Literal(-1); V.col(k).normalize(); } } @@ -980,14 +981,14 @@ void BDCSVD::deflation43(Index firstCol, Index shift, Index i, Index RealScalar c = m_computed(start, start); RealScalar s = m_computed(start+i, start); RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); - if (r == 0) + if (r == Literal(0)) { - m_computed(start+i, start+i) = 0; + m_computed(start+i, start+i) = Literal(0); return; } m_computed(start,start) = r; - m_computed(start+i, start) = 0; - m_computed(start+i, start+i) = 0; + m_computed(start+i, start) = Literal(0); + m_computed(start+i, start+i) = Literal(0); JacobiRotation J(c/r,-s/r); if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J); @@ -1020,7 +1021,7 @@ void BDCSVD::deflation44(Index firstColu , Index firstColm, Index fi << m_computed(firstColm + i+1, firstColm+i+1) << " " << m_computed(firstColm + i+2, firstColm+i+2) << "\n"; #endif - if (r==0) + if (r==Literal(0)) { m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); return; @@ -1029,7 +1030,7 @@ void BDCSVD::deflation44(Index firstColu , Index firstColm, Index fi s/=r; m_computed(firstColm + i, firstColm) = r; m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); - m_computed(firstColm + j, firstColm) = 0; + m_computed(firstColm + j, firstColm) = Literal(0); JacobiRotation J(c,-s); if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J); @@ -1053,7 +1054,7 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index const RealScalar considerZero = (std::numeric_limits::min)(); RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); RealScalar epsilon_strict = numext::maxi(considerZero,NumTraits::epsilon() * maxDiag); - RealScalar epsilon_coarse = 8 * NumTraits::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag); + RealScalar epsilon_coarse = Literal(8) * NumTraits::epsilon() * numext::maxi(col0.cwiseAbs().maxCoeff(), maxDiag); #ifdef EIGEN_BDCSVD_SANITY_CHECKS assert(m_naiveU.allFinite()); @@ -1081,7 +1082,7 @@ void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n"; #endif - col0(i) = 0; + col0(i) = Literal(0); } //condition 4.3 diff --git a/eigen/Eigen/src/SVD/SVDBase.h b/eigen/Eigen/src/SVD/SVDBase.h index 4294147..cc90a3b 100644 --- a/eigen/Eigen/src/SVD/SVDBase.h +++ b/eigen/Eigen/src/SVD/SVDBase.h @@ -212,6 +212,7 @@ public: #ifndef EIGEN_PARSED_BY_DOXYGEN template + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif diff --git a/eigen/Eigen/src/SVD/UpperBidiagonalization.h b/eigen/Eigen/src/SVD/UpperBidiagonalization.h index 0b14608..11ac847 100644 --- a/eigen/Eigen/src/SVD/UpperBidiagonalization.h +++ b/eigen/Eigen/src/SVD/UpperBidiagonalization.h @@ -159,6 +159,8 @@ void upperbidiagonalization_blocked_helper(MatrixType& A, traits::Flags & RowMajorBit> > Y) { typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename NumTraits::Literal Literal; enum { StorageOrder = traits::Flags & RowMajorBit }; typedef InnerStride ColInnerStride; typedef InnerStride RowInnerStride; @@ -263,7 +265,7 @@ void upperbidiagonalization_blocked_helper(MatrixType& A, SubMatType A10( A.block(bs,0, brows-bs,bs) ); SubMatType A01( A.block(0,bs, bs,bcols-bs) ); Scalar tmp = A01(bs-1,0); - A01(bs-1,0) = 1; + A01(bs-1,0) = Literal(1); A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint(); A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01; A01(bs-1,0) = tmp; diff --git a/eigen/Eigen/src/SparseCore/SparseCompressedBase.h b/eigen/Eigen/src/SparseCore/SparseCompressedBase.h index e0b3c22..5ccb466 100644 --- a/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/eigen/Eigen/src/SparseCore/SparseCompressedBase.h @@ -185,14 +185,6 @@ class SparseCompressedBase::InnerIterator } inline InnerIterator& operator++() { m_id++; return *this; } - inline InnerIterator& operator+=(Index i) { m_id += i ; return *this; } - - inline InnerIterator operator+(Index i) - { - InnerIterator result = *this; - result += i; - return result; - } inline const Scalar& value() const { return m_values[m_id]; } inline Scalar& valueRef() { return const_cast(m_values[m_id]); } @@ -253,14 +245,6 @@ class SparseCompressedBase::ReverseInnerIterator } inline ReverseInnerIterator& operator--() { --m_id; return *this; } - inline ReverseInnerIterator& operator-=(Index i) { m_id -= i; return *this; } - - inline ReverseInnerIterator operator-(Index i) - { - ReverseInnerIterator result = *this; - result -= i; - return result; - } inline const Scalar& value() const { return m_values[m_id-1]; } inline Scalar& valueRef() { return const_cast(m_values[m_id-1]); } diff --git a/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index c41c07a..e315e35 100644 --- a/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -212,7 +212,8 @@ public: enum { CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, - Flags = XprType::Flags + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit) }; explicit binary_evaluator(const XprType& xpr) @@ -299,7 +300,8 @@ public: enum { CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, - Flags = XprType::Flags + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit) }; explicit binary_evaluator(const XprType& xpr) @@ -531,7 +533,8 @@ public: enum { CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, - Flags = XprType::Flags + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(RhsArg::Flags)&RowMajorBit) }; explicit sparse_conjunction_evaluator(const XprType& xpr) @@ -605,7 +608,8 @@ public: enum { CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, - Flags = XprType::Flags + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(LhsArg::Flags)&RowMajorBit) }; explicit sparse_conjunction_evaluator(const XprType& xpr) diff --git a/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h b/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h index 9e39be7..5ab64f1 100644 --- a/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -47,6 +47,7 @@ template class SparseSelfAdjointView enum { Mode = _Mode, + TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0), RowsAtCompileTime = internal::traits::RowsAtCompileTime, ColsAtCompileTime = internal::traits::ColsAtCompileTime }; @@ -368,7 +369,7 @@ struct generic_product_impl dstT(dst); - internal::sparse_selfadjoint_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); + internal::sparse_selfadjoint_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); } }; diff --git a/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h b/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h index 9568cc1..91c09ab 100644 --- a/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -320,7 +320,7 @@ class UmfPackLU : public SparseSolverBase > * * \sa umfpackControl() */ - void printUmfpackControl() + void umfpackReportControl() { umfpack_report_control(m_control.data(), Scalar()); } @@ -329,7 +329,7 @@ class UmfPackLU : public SparseSolverBase > * * \sa analyzePattern(), compute() */ - void printUmfpackInfo() + void umfpackReportInfo() { eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); umfpack_report_info(m_control.data(), m_umfpackInfo.data(), Scalar()); @@ -339,7 +339,7 @@ class UmfPackLU : public SparseSolverBase > * * \sa analyzePattern(), compute() */ - void printUmfpackStatus() { + void umfpackReportStatus() { eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); umfpack_report_status(m_control.data(), m_fact_errorCode, Scalar()); } diff --git a/eigen/Eigen/src/misc/lapacke.h b/eigen/Eigen/src/misc/lapacke.h index 3d8e24f..8c7e79b 100644 --- a/eigen/Eigen/src/misc/lapacke.h +++ b/eigen/Eigen/src/misc/lapacke.h @@ -43,6 +43,10 @@ #include "lapacke_config.h" #endif +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + #include #ifndef lapack_int @@ -104,11 +108,6 @@ lapack_complex_double lapack_make_complex_double( double re, double im ); #endif - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - #ifndef LAPACKE_malloc #define LAPACKE_malloc( size ) malloc( size ) #endif diff --git a/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 43615bd..ebaa3f1 100644 --- a/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -10,7 +10,6 @@ typedef CwiseUnaryOp, const Derived> Inverse typedef CwiseUnaryOp, const Derived> BooleanNotReturnType; typedef CwiseUnaryOp, const Derived> ExpReturnType; -typedef CwiseUnaryOp, const Derived> Expm1ReturnType; typedef CwiseUnaryOp, const Derived> LogReturnType; typedef CwiseUnaryOp, const Derived> Log1pReturnType; typedef CwiseUnaryOp, const Derived> Log10ReturnType; @@ -91,20 +90,6 @@ exp() const return ExpReturnType(derived()); } -/** \returns an expression of the coefficient-wise exponential of *this minus 1. - * - * In exact arithmetic, \c x.expm1() is equivalent to \c x.exp() - 1, - * however, with finite precision, this function is much more accurate when \c x is close to zero. - * - * \sa Math functions, exp() - */ -EIGEN_DEVICE_FUNC -inline const Expm1ReturnType -expm1() const -{ - return Expm1ReturnType(derived()); -} - /** \returns an expression of the coefficient-wise logarithm of *this. * * This function computes the coefficient-wise logarithm. The function MatrixBase::log() in the @@ -113,7 +98,7 @@ expm1() const * Example: \include Cwise_log.cpp * Output: \verbinclude Cwise_log.out * - * \sa Math functions, log() + * \sa Math functions, exp() */ EIGEN_DEVICE_FUNC inline const LogReturnType diff --git a/eigen/Eigen/src/plugins/BlockMethods.h b/eigen/Eigen/src/plugins/BlockMethods.h index 5caf144..ac35a00 100644 --- a/eigen/Eigen/src/plugins/BlockMethods.h +++ b/eigen/Eigen/src/plugins/BlockMethods.h @@ -42,116 +42,66 @@ template struct ConstFixedSegmentReturnType { typedef const VectorBloc #endif // not EIGEN_PARSED_BY_DOXYGEN -/// \returns an expression of a block in \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of a block in *this. /// -/// \param startRow the first row in the block -/// \param startCol the first column in the block -/// \param blockRows number of rows in the block, specified at either run-time or compile-time -/// \param blockCols number of columns in the block, specified at either run-time or compile-time -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. +/// \param startRow the first row in the block +/// \param startCol the first column in the block +/// \param blockRows the number of rows in the block +/// \param blockCols the number of columns in the block /// -/// Example using runtime (aka dynamic) sizes: \include MatrixBase_block_int_int_int_int.cpp +/// Example: \include MatrixBase_block_int_int_int_int.cpp /// Output: \verbinclude MatrixBase_block_int_int_int_int.out /// -/// \newin{3.4}: -/// -/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. In the later case, \c n plays the role of a runtime fallback value in case \c N equals Eigen::Dynamic. -/// Here is an example with a fixed number of rows \c NRows and dynamic number of columns \c cols: -/// \code -/// mat.block(i,j,fix,cols) -/// \endcode -/// -/// This function thus fully covers the features offered by the following overloads block(Index, Index), -/// and block(Index, Index, Index, Index) that are thus obsolete. Indeed, this generic version avoids -/// redundancy, it preserves the argument order, and prevents the need to rely on the template keyword in templated code. -/// -/// but with less redundancy and more consistency as it does not modify the argument order -/// and seamlessly enable hybrid fixed/dynamic sizes. -/// -/// \note Even in the case that the returned expression has dynamic size, in the case +/// \note Even though the returned expression has dynamic size, in the case /// when it is applied to a fixed-size matrix, it inherits a fixed maximal size, /// which means that evaluating it does not cause a dynamic memory allocation. /// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa class Block, fix, fix(int) +/// \sa class Block, block(Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedBlockXpr::value,internal::get_fixed_value::value>::Type -#else -inline typename FixedBlockXpr<...,...>::Type -#endif -block(Index startRow, Index startCol, NRowsType blockRows, NColsType blockCols) +inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) { - return typename FixedBlockXpr::value,internal::get_fixed_value::value>::Type( - derived(), startRow, startCol, internal::get_runtime_value(blockRows), internal::get_runtime_value(blockCols)); + return BlockXpr(derived(), startRow, startCol, blockRows, blockCols); } -/// This is the const version of block(Index,Index,NRowsType,NColsType) -template +/// This is the const version of block(Index,Index,Index,Index). */ EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedBlockXpr::value,internal::get_fixed_value::value>::Type -#else -inline const typename ConstFixedBlockXpr<...,...>::Type -#endif -block(Index startRow, Index startCol, NRowsType blockRows, NColsType blockCols) const +inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const { - return typename ConstFixedBlockXpr::value,internal::get_fixed_value::value>::Type( - derived(), startRow, startCol, internal::get_runtime_value(blockRows), internal::get_runtime_value(blockCols)); + return ConstBlockXpr(derived(), startRow, startCol, blockRows, blockCols); } -/// \returns a expression of a top-right corner of \c *this with either dynamic or fixed sizes. + +/// \returns a dynamic-size expression of a top-right corner of *this. /// /// \param cRows the number of rows in the corner /// \param cCols the number of columns in the corner -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// -/// Example with dynamic sizes: \include MatrixBase_topRightCorner_int_int.cpp +/// Example: \include MatrixBase_topRightCorner_int_int.cpp /// Output: \verbinclude MatrixBase_topRightCorner_int_int.out /// -/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedBlockXpr::value,internal::get_fixed_value::value>::Type -#else -inline typename FixedBlockXpr<...,...>::Type -#endif -topRightCorner(NRowsType cRows, NColsType cCols) +inline BlockXpr topRightCorner(Index cRows, Index cCols) { - return typename FixedBlockXpr::value,internal::get_fixed_value::value>::Type - (derived(), 0, cols() - internal::get_runtime_value(cCols), internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols); } -/// This is the const version of topRightCorner(NRowsType, NColsType). -template +/// This is the const version of topRightCorner(Index, Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedBlockXpr::value,internal::get_fixed_value::value>::Type -#else -inline const typename ConstFixedBlockXpr<...,...>::Type -#endif -topRightCorner(NRowsType cRows, NColsType cCols) const +inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const { - return typename ConstFixedBlockXpr::value,internal::get_fixed_value::value>::Type - (derived(), 0, cols() - internal::get_runtime_value(cCols), internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols); } -/// \returns an expression of a fixed-size top-right corner of \c *this. +/// \returns an expression of a fixed-size top-right corner of *this. /// /// \tparam CRows the number of rows in the corner /// \tparam CCols the number of columns in the corner @@ -178,7 +128,7 @@ inline const typename ConstFixedBlockXpr::Type topRightCorner() con return typename ConstFixedBlockXpr::Type(derived(), 0, cols() - CCols); } -/// \returns an expression of a top-right corner of \c *this. +/// \returns an expression of a top-right corner of *this. /// /// \tparam CRows number of rows in corner as specified at compile-time /// \tparam CCols number of columns in corner as specified at compile-time @@ -212,51 +162,32 @@ inline const typename ConstFixedBlockXpr::Type topRightCorner(Index -/// \returns an expression of a top-left corner of \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of a top-left corner of *this. /// /// \param cRows the number of rows in the corner /// \param cCols the number of columns in the corner -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include MatrixBase_topLeftCorner_int_int.cpp /// Output: \verbinclude MatrixBase_topLeftCorner_int_int.out /// -/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedBlockXpr::value,internal::get_fixed_value::value>::Type -#else -inline typename FixedBlockXpr<...,...>::Type -#endif -topLeftCorner(NRowsType cRows, NColsType cCols) +inline BlockXpr topLeftCorner(Index cRows, Index cCols) { - return typename FixedBlockXpr::value,internal::get_fixed_value::value>::Type - (derived(), 0, 0, internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return BlockXpr(derived(), 0, 0, cRows, cCols); } /// This is the const version of topLeftCorner(Index, Index). -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedBlockXpr::value,internal::get_fixed_value::value>::Type -#else -inline const typename ConstFixedBlockXpr<...,...>::Type -#endif -topLeftCorner(NRowsType cRows, NColsType cCols) const +inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const { - return typename ConstFixedBlockXpr::value,internal::get_fixed_value::value>::Type - (derived(), 0, 0, internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return ConstBlockXpr(derived(), 0, 0, cRows, cCols); } -/// \returns an expression of a fixed-size top-left corner of \c *this. +/// \returns an expression of a fixed-size top-left corner of *this. /// /// The template parameters CRows and CCols are the number of rows and columns in the corner. /// @@ -265,7 +196,7 @@ topLeftCorner(NRowsType cRows, NColsType cCols) const /// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template EIGEN_DEVICE_FUNC @@ -282,7 +213,7 @@ inline const typename ConstFixedBlockXpr::Type topLeftCorner() cons return typename ConstFixedBlockXpr::Type(derived(), 0, 0); } -/// \returns an expression of a top-left corner of \c *this. +/// \returns an expression of a top-left corner of *this. /// /// \tparam CRows number of rows in corner as specified at compile-time /// \tparam CCols number of columns in corner as specified at compile-time @@ -316,53 +247,32 @@ inline const typename ConstFixedBlockXpr::Type topLeftCorner(Index -/// \returns an expression of a bottom-right corner of \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of a bottom-right corner of *this. /// /// \param cRows the number of rows in the corner /// \param cCols the number of columns in the corner -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include MatrixBase_bottomRightCorner_int_int.cpp /// Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out /// -/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedBlockXpr::value,internal::get_fixed_value::value>::Type -#else -inline typename FixedBlockXpr<...,...>::Type -#endif -bottomRightCorner(NRowsType cRows, NColsType cCols) +inline BlockXpr bottomRightCorner(Index cRows, Index cCols) { - return typename FixedBlockXpr::value,internal::get_fixed_value::value>::Type - (derived(), rows() - internal::get_runtime_value(cRows), cols() - internal::get_runtime_value(cCols), - internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } -/// This is the const version of bottomRightCorner(NRowsType, NColsType). -template +/// This is the const version of bottomRightCorner(Index, Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedBlockXpr::value,internal::get_fixed_value::value>::Type -#else -inline const typename ConstFixedBlockXpr<...,...>::Type -#endif -bottomRightCorner(NRowsType cRows, NColsType cCols) const +inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const { - return typename ConstFixedBlockXpr::value,internal::get_fixed_value::value>::Type - (derived(), rows() - internal::get_runtime_value(cRows), cols() - internal::get_runtime_value(cCols), - internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } -/// \returns an expression of a fixed-size bottom-right corner of \c *this. +/// \returns an expression of a fixed-size bottom-right corner of *this. /// /// The template parameters CRows and CCols are the number of rows and columns in the corner. /// @@ -371,7 +281,7 @@ bottomRightCorner(NRowsType cRows, NColsType cCols) const /// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template EIGEN_DEVICE_FUNC @@ -388,7 +298,7 @@ inline const typename ConstFixedBlockXpr::Type bottomRightCorner() return typename ConstFixedBlockXpr::Type(derived(), rows() - CRows, cols() - CCols); } -/// \returns an expression of a bottom-right corner of \c *this. +/// \returns an expression of a bottom-right corner of *this. /// /// \tparam CRows number of rows in corner as specified at compile-time /// \tparam CCols number of columns in corner as specified at compile-time @@ -422,53 +332,32 @@ inline const typename ConstFixedBlockXpr::Type bottomRightCorner(In -/// \returns an expression of a bottom-left corner of \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of a bottom-left corner of *this. /// /// \param cRows the number of rows in the corner /// \param cCols the number of columns in the corner -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include MatrixBase_bottomLeftCorner_int_int.cpp /// Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out /// -/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedBlockXpr::value,internal::get_fixed_value::value>::Type -#else -inline typename FixedBlockXpr<...,...>::Type -#endif -bottomLeftCorner(NRowsType cRows, NColsType cCols) +inline BlockXpr bottomLeftCorner(Index cRows, Index cCols) { - return typename FixedBlockXpr::value,internal::get_fixed_value::value>::Type - (derived(), rows() - internal::get_runtime_value(cRows), 0, - internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols); } -/// This is the const version of bottomLeftCorner(NRowsType, NColsType). -template +/// This is the const version of bottomLeftCorner(Index, Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename ConstFixedBlockXpr::value,internal::get_fixed_value::value>::Type -#else -inline typename ConstFixedBlockXpr<...,...>::Type -#endif -bottomLeftCorner(NRowsType cRows, NColsType cCols) const +inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const { - return typename ConstFixedBlockXpr::value,internal::get_fixed_value::value>::Type - (derived(), rows() - internal::get_runtime_value(cRows), 0, - internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols); } -/// \returns an expression of a fixed-size bottom-left corner of \c *this. +/// \returns an expression of a fixed-size bottom-left corner of *this. /// /// The template parameters CRows and CCols are the number of rows and columns in the corner. /// @@ -477,7 +366,7 @@ bottomLeftCorner(NRowsType cRows, NColsType cCols) const /// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template EIGEN_DEVICE_FUNC @@ -494,7 +383,7 @@ inline const typename ConstFixedBlockXpr::Type bottomLeftCorner() c return typename ConstFixedBlockXpr::Type(derived(), rows() - CRows, 0); } -/// \returns an expression of a bottom-left corner of \c *this. +/// \returns an expression of a bottom-left corner of *this. /// /// \tparam CRows number of rows in corner as specified at compile-time /// \tparam CCols number of columns in corner as specified at compile-time @@ -528,50 +417,31 @@ inline const typename ConstFixedBlockXpr::Type bottomLeftCorner(Ind -/// \returns a block consisting of the top rows of \c *this. +/// \returns a block consisting of the top rows of *this. /// /// \param n the number of rows in the block -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. /// /// Example: \include MatrixBase_topRows_int.cpp /// Output: \verbinclude MatrixBase_topRows_int.out /// -/// The number of rows \a n can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NRowsBlockXpr::value>::Type -#else -inline typename NRowsBlockXpr<...>::Type -#endif -topRows(NRowsType n) +inline RowsBlockXpr topRows(Index n) { - return typename NRowsBlockXpr::value>::Type - (derived(), 0, 0, internal::get_runtime_value(n), cols()); + return RowsBlockXpr(derived(), 0, 0, n, cols()); } -/// This is the const version of topRows(NRowsType). -template +/// This is the const version of topRows(Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNRowsBlockXpr::value>::Type -#else -inline const typename ConstNRowsBlockXpr<...>::Type -#endif -topRows(NRowsType n) const +inline ConstRowsBlockXpr topRows(Index n) const { - return typename ConstNRowsBlockXpr::value>::Type - (derived(), 0, 0, internal::get_runtime_value(n), cols()); + return ConstRowsBlockXpr(derived(), 0, 0, n, cols()); } -/// \returns a block consisting of the top rows of \c *this. +/// \returns a block consisting of the top rows of *this. /// /// \tparam N the number of rows in the block as specified at compile-time /// \param n the number of rows in the block as specified at run-time @@ -584,7 +454,7 @@ topRows(NRowsType n) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template EIGEN_DEVICE_FUNC @@ -603,50 +473,31 @@ inline typename ConstNRowsBlockXpr::Type topRows(Index n = N) const -/// \returns a block consisting of the bottom rows of \c *this. +/// \returns a block consisting of the bottom rows of *this. /// /// \param n the number of rows in the block -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. /// /// Example: \include MatrixBase_bottomRows_int.cpp /// Output: \verbinclude MatrixBase_bottomRows_int.out /// -/// The number of rows \a n can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NRowsBlockXpr::value>::Type -#else -inline typename NRowsBlockXpr<...>::Type -#endif -bottomRows(NRowsType n) +inline RowsBlockXpr bottomRows(Index n) { - return typename NRowsBlockXpr::value>::Type - (derived(), rows() - internal::get_runtime_value(n), 0, internal::get_runtime_value(n), cols()); + return RowsBlockXpr(derived(), rows() - n, 0, n, cols()); } -/// This is the const version of bottomRows(NRowsType). -template +/// This is the const version of bottomRows(Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNRowsBlockXpr::value>::Type -#else -inline const typename ConstNRowsBlockXpr<...>::Type -#endif -bottomRows(NRowsType n) const +inline ConstRowsBlockXpr bottomRows(Index n) const { - return typename ConstNRowsBlockXpr::value>::Type - (derived(), rows() - internal::get_runtime_value(n), 0, internal::get_runtime_value(n), cols()); + return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols()); } -/// \returns a block consisting of the bottom rows of \c *this. +/// \returns a block consisting of the bottom rows of *this. /// /// \tparam N the number of rows in the block as specified at compile-time /// \param n the number of rows in the block as specified at run-time @@ -659,7 +510,7 @@ bottomRows(NRowsType n) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template EIGEN_DEVICE_FUNC @@ -678,51 +529,32 @@ inline typename ConstNRowsBlockXpr::Type bottomRows(Index n = N) const -/// \returns a block consisting of a range of rows of \c *this. +/// \returns a block consisting of a range of rows of *this. /// /// \param startRow the index of the first row in the block /// \param n the number of rows in the block -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. /// /// Example: \include DenseBase_middleRows_int.cpp /// Output: \verbinclude DenseBase_middleRows_int.out /// -/// The number of rows \a n can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NRowsBlockXpr::value>::Type -#else -inline typename NRowsBlockXpr<...>::Type -#endif -middleRows(Index startRow, NRowsType n) +inline RowsBlockXpr middleRows(Index startRow, Index n) { - return typename NRowsBlockXpr::value>::Type - (derived(), startRow, 0, internal::get_runtime_value(n), cols()); + return RowsBlockXpr(derived(), startRow, 0, n, cols()); } -/// This is the const version of middleRows(Index,NRowsType). -template +/// This is the const version of middleRows(Index,Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNRowsBlockXpr::value>::Type -#else -inline const typename ConstNRowsBlockXpr<...>::Type -#endif -middleRows(Index startRow, NRowsType n) const +inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const { - return typename ConstNRowsBlockXpr::value>::Type - (derived(), startRow, 0, internal::get_runtime_value(n), cols()); + return ConstRowsBlockXpr(derived(), startRow, 0, n, cols()); } -/// \returns a block consisting of a range of rows of \c *this. +/// \returns a block consisting of a range of rows of *this. /// /// \tparam N the number of rows in the block as specified at compile-time /// \param startRow the index of the first row in the block @@ -736,7 +568,7 @@ middleRows(Index startRow, NRowsType n) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template EIGEN_DEVICE_FUNC @@ -755,50 +587,31 @@ inline typename ConstNRowsBlockXpr::Type middleRows(Index startRow, Index n = -/// \returns a block consisting of the left columns of \c *this. +/// \returns a block consisting of the left columns of *this. /// /// \param n the number of columns in the block -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include MatrixBase_leftCols_int.cpp /// Output: \verbinclude MatrixBase_leftCols_int.out /// -/// The number of columns \a n can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NColsBlockXpr::value>::Type -#else -inline typename NColsBlockXpr<...>::Type -#endif -leftCols(NColsType n) +inline ColsBlockXpr leftCols(Index n) { - return typename NColsBlockXpr::value>::Type - (derived(), 0, 0, rows(), internal::get_runtime_value(n)); + return ColsBlockXpr(derived(), 0, 0, rows(), n); } -/// This is the const version of leftCols(NColsType). -template +/// This is the const version of leftCols(Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNColsBlockXpr::value>::Type -#else -inline const typename ConstNColsBlockXpr<...>::Type -#endif -leftCols(NColsType n) const +inline ConstColsBlockXpr leftCols(Index n) const { - return typename ConstNColsBlockXpr::value>::Type - (derived(), 0, 0, rows(), internal::get_runtime_value(n)); + return ConstColsBlockXpr(derived(), 0, 0, rows(), n); } -/// \returns a block consisting of the left columns of \c *this. +/// \returns a block consisting of the left columns of *this. /// /// \tparam N the number of columns in the block as specified at compile-time /// \param n the number of columns in the block as specified at run-time @@ -811,7 +624,7 @@ leftCols(NColsType n) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template EIGEN_DEVICE_FUNC @@ -830,50 +643,31 @@ inline typename ConstNColsBlockXpr::Type leftCols(Index n = N) const -/// \returns a block consisting of the right columns of \c *this. +/// \returns a block consisting of the right columns of *this. /// /// \param n the number of columns in the block -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include MatrixBase_rightCols_int.cpp /// Output: \verbinclude MatrixBase_rightCols_int.out /// -/// The number of columns \a n can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NColsBlockXpr::value>::Type -#else -inline typename NColsBlockXpr<...>::Type -#endif -rightCols(NColsType n) +inline ColsBlockXpr rightCols(Index n) { - return typename NColsBlockXpr::value>::Type - (derived(), 0, cols() - internal::get_runtime_value(n), rows(), internal::get_runtime_value(n)); + return ColsBlockXpr(derived(), 0, cols() - n, rows(), n); } -/// This is the const version of rightCols(NColsType). -template +/// This is the const version of rightCols(Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNColsBlockXpr::value>::Type -#else -inline const typename ConstNColsBlockXpr<...>::Type -#endif -rightCols(NColsType n) const +inline ConstColsBlockXpr rightCols(Index n) const { - return typename ConstNColsBlockXpr::value>::Type - (derived(), 0, cols() - internal::get_runtime_value(n), rows(), internal::get_runtime_value(n)); + return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n); } -/// \returns a block consisting of the right columns of \c *this. +/// \returns a block consisting of the right columns of *this. /// /// \tparam N the number of columns in the block as specified at compile-time /// \param n the number of columns in the block as specified at run-time @@ -886,7 +680,7 @@ rightCols(NColsType n) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template EIGEN_DEVICE_FUNC @@ -905,51 +699,32 @@ inline typename ConstNColsBlockXpr::Type rightCols(Index n = N) const -/// \returns a block consisting of a range of columns of \c *this. +/// \returns a block consisting of a range of columns of *this. /// /// \param startCol the index of the first column in the block /// \param numCols the number of columns in the block -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include DenseBase_middleCols_int.cpp /// Output: \verbinclude DenseBase_middleCols_int.out /// -/// The number of columns \a n can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NColsBlockXpr::value>::Type -#else -inline typename NColsBlockXpr<...>::Type -#endif -middleCols(Index startCol, NColsType numCols) +inline ColsBlockXpr middleCols(Index startCol, Index numCols) { - return typename NColsBlockXpr::value>::Type - (derived(), 0, startCol, rows(), internal::get_runtime_value(numCols)); + return ColsBlockXpr(derived(), 0, startCol, rows(), numCols); } -/// This is the const version of middleCols(Index,NColsType). -template +/// This is the const version of middleCols(Index,Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNColsBlockXpr::value>::Type -#else -inline const typename ConstNColsBlockXpr<...>::Type -#endif -middleCols(Index startCol, NColsType numCols) const +inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const { - return typename ConstNColsBlockXpr::value>::Type - (derived(), 0, startCol, rows(), internal::get_runtime_value(numCols)); + return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols); } -/// \returns a block consisting of a range of columns of \c *this. +/// \returns a block consisting of a range of columns of *this. /// /// \tparam N the number of columns in the block as specified at compile-time /// \param startCol the index of the first column in the block @@ -963,7 +738,7 @@ middleCols(Index startCol, NColsType numCols) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template EIGEN_DEVICE_FUNC @@ -982,7 +757,7 @@ inline typename ConstNColsBlockXpr::Type middleCols(Index startCol, Index n = -/// \returns a fixed-size expression of a block of \c *this. +/// \returns a fixed-size expression of a block in *this. /// /// The template parameters \a NRows and \a NCols are the number of /// rows and columns in the block. @@ -993,18 +768,12 @@ inline typename ConstNColsBlockXpr::Type middleCols(Index startCol, Index n = /// Example: \include MatrixBase_block_int_int.cpp /// Output: \verbinclude MatrixBase_block_int_int.out /// -/// \note The usage of of this overload is discouraged from %Eigen 3.4, better used the generic -/// block(Index,Index,NRowsType,NColsType), here is the one-to-one equivalence: -/// \code -/// mat.template block(i,j) <--> mat.block(i,j,fix,fix) -/// \endcode -/// /// \note since block is a templated member, the keyword template has to be used /// if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode /// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template EIGEN_DEVICE_FUNC @@ -1021,7 +790,7 @@ inline const typename ConstFixedBlockXpr::Type block(Index startRow return typename ConstFixedBlockXpr::Type(derived(), startRow, startCol); } -/// \returns an expression of a block of \c *this. +/// \returns an expression of a block in *this. /// /// \tparam NRows number of rows in block as specified at compile-time /// \tparam NCols number of columns in block as specified at compile-time @@ -1038,19 +807,9 @@ inline const typename ConstFixedBlockXpr::Type block(Index startRow /// Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp /// Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp /// -/// \note The usage of of this overload is discouraged from %Eigen 3.4, better used the generic -/// block(Index,Index,NRowsType,NColsType), here is the one-to-one complete equivalence: -/// \code -/// mat.template block(i,j,rows,cols) <--> mat.block(i,j,fix(rows),fix(cols)) -/// \endcode -/// If we known that, e.g., NRows==Dynamic and NCols!=Dynamic, then the equivalence becomes: -/// \code -/// mat.template block(i,j,rows,NCols) <--> mat.block(i,j,rows,fix) -/// \endcode -/// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template inline typename FixedBlockXpr::Type block(Index startRow, Index startCol, @@ -1067,7 +826,7 @@ inline const typename ConstFixedBlockXpr::Type block(Index startRow return typename ConstFixedBlockXpr::Type(derived(), startRow, startCol, blockRows, blockCols); } -/// \returns an expression of the \a i-th column of \c *this. Note that the numbering starts at 0. +/// \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0. /// /// Example: \include MatrixBase_col.cpp /// Output: \verbinclude MatrixBase_col.out @@ -1088,7 +847,7 @@ inline ConstColXpr col(Index i) const return ConstColXpr(derived(), i); } -/// \returns an expression of the \a i-th row of \c *this. Note that the numbering starts at 0. +/// \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0. /// /// Example: \include MatrixBase_row.cpp /// Output: \verbinclude MatrixBase_row.out @@ -1109,153 +868,96 @@ inline ConstRowXpr row(Index i) const return ConstRowXpr(derived(), i); } -/// \returns an expression of a segment (i.e. a vector block) in \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of a segment (i.e. a vector block) in *this. /// /// \only_for_vectors /// /// \param start the first coefficient in the segment /// \param n the number of coefficients in the segment -/// \tparam NType the type of the value handling the number of coefficients in the segment, typically Index. /// /// Example: \include MatrixBase_segment_int_int.cpp /// Output: \verbinclude MatrixBase_segment_int_int.out /// -/// The number of coefficients \a n can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// -/// \note Even in the case that the returned expression has dynamic size, in the case +/// \note Even though the returned expression has dynamic size, in the case /// when it is applied to a fixed-size vector, it inherits a fixed maximal size, /// which means that evaluating it does not cause a dynamic memory allocation. /// -/// \sa block(Index,Index,NRowsType,NColsType), fix, fix(int), class Block +/// \sa class Block, segment(Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedSegmentReturnType::value>::Type -#else -inline typename FixedSegmentReturnType<...>::Type -#endif -segment(Index start, NType n) +inline SegmentReturnType segment(Index start, Index n) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename FixedSegmentReturnType::value>::Type - (derived(), start, internal::get_runtime_value(n)); + return SegmentReturnType(derived(), start, n); } -/// This is the const version of segment(Index,NType). -template +/// This is the const version of segment(Index,Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedSegmentReturnType::value>::Type -#else -inline const typename ConstFixedSegmentReturnType<...>::Type -#endif -segment(Index start, NType n) const +inline ConstSegmentReturnType segment(Index start, Index n) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename ConstFixedSegmentReturnType::value>::Type - (derived(), start, internal::get_runtime_value(n)); + return ConstSegmentReturnType(derived(), start, n); } -/// \returns an expression of the first coefficients of \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of the first coefficients of *this. /// /// \only_for_vectors /// /// \param n the number of coefficients in the segment -/// \tparam NType the type of the value handling the number of coefficients in the segment, typically Index. /// /// Example: \include MatrixBase_start_int.cpp /// Output: \verbinclude MatrixBase_start_int.out /// -/// The number of coefficients \a n can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// -/// \note Even in the case that the returned expression has dynamic size, in the case +/// \note Even though the returned expression has dynamic size, in the case /// when it is applied to a fixed-size vector, it inherits a fixed maximal size, /// which means that evaluating it does not cause a dynamic memory allocation. /// /// \sa class Block, block(Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedSegmentReturnType::value>::Type -#else -inline typename FixedSegmentReturnType<...>::Type -#endif -head(NType n) +inline SegmentReturnType head(Index n) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename FixedSegmentReturnType::value>::Type - (derived(), 0, internal::get_runtime_value(n)); + return SegmentReturnType(derived(), 0, n); } -/// This is the const version of head(NType). -template +/// This is the const version of head(Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedSegmentReturnType::value>::Type -#else -inline const typename ConstFixedSegmentReturnType<...>::Type -#endif -head(NType n) const +inline ConstSegmentReturnType head(Index n) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename ConstFixedSegmentReturnType::value>::Type - (derived(), 0, internal::get_runtime_value(n)); + return ConstSegmentReturnType(derived(), 0, n); } -/// \returns an expression of a last coefficients of \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of the last coefficients of *this. /// /// \only_for_vectors /// /// \param n the number of coefficients in the segment -/// \tparam NType the type of the value handling the number of coefficients in the segment, typically Index. /// /// Example: \include MatrixBase_end_int.cpp /// Output: \verbinclude MatrixBase_end_int.out /// -/// The number of coefficients \a n can also be specified at compile-time by passing Eigen::fix, -/// or Eigen::fix(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// -/// \note Even in the case that the returned expression has dynamic size, in the case +/// \note Even though the returned expression has dynamic size, in the case /// when it is applied to a fixed-size vector, it inherits a fixed maximal size, /// which means that evaluating it does not cause a dynamic memory allocation. /// /// \sa class Block, block(Index,Index) /// -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedSegmentReturnType::value>::Type -#else -inline typename FixedSegmentReturnType<...>::Type -#endif -tail(NType n) +inline SegmentReturnType tail(Index n) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename FixedSegmentReturnType::value>::Type - (derived(), this->size() - internal::get_runtime_value(n), internal::get_runtime_value(n)); + return SegmentReturnType(derived(), this->size() - n, n); } /// This is the const version of tail(Index). -template EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedSegmentReturnType::value>::Type -#else -inline const typename ConstFixedSegmentReturnType<...>::Type -#endif -tail(NType n) const +inline ConstSegmentReturnType tail(Index n) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename ConstFixedSegmentReturnType::value>::Type - (derived(), this->size() - internal::get_runtime_value(n), internal::get_runtime_value(n)); + return ConstSegmentReturnType(derived(), this->size() - n, n); } /// \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this @@ -1272,7 +974,7 @@ tail(NType n) const /// Example: \include MatrixBase_template_int_segment.cpp /// Output: \verbinclude MatrixBase_template_int_segment.out /// -/// \sa segment(Index,NType), class Block +/// \sa class Block /// template EIGEN_DEVICE_FUNC @@ -1291,7 +993,7 @@ inline typename ConstFixedSegmentReturnType::Type segment(Index start, Index return typename ConstFixedSegmentReturnType::Type(derived(), start, n); } -/// \returns a fixed-size expression of the first coefficients of \c *this. +/// \returns a fixed-size expression of the first coefficients of *this. /// /// \only_for_vectors /// @@ -1304,7 +1006,7 @@ inline typename ConstFixedSegmentReturnType::Type segment(Index start, Index /// Example: \include MatrixBase_template_int_start.cpp /// Output: \verbinclude MatrixBase_template_int_start.out /// -/// \sa head(NType), class Block +/// \sa class Block /// template EIGEN_DEVICE_FUNC @@ -1323,7 +1025,7 @@ inline typename ConstFixedSegmentReturnType::Type head(Index n = N) const return typename ConstFixedSegmentReturnType::Type(derived(), 0, n); } -/// \returns a fixed-size expression of the last coefficients of \c *this. +/// \returns a fixed-size expression of the last coefficients of *this. /// /// \only_for_vectors /// @@ -1336,7 +1038,7 @@ inline typename ConstFixedSegmentReturnType::Type head(Index n = N) const /// Example: \include MatrixBase_template_int_end.cpp /// Output: \verbinclude MatrixBase_template_int_end.out /// -/// \sa tail(NType), class Block +/// \sa class Block /// template EIGEN_DEVICE_FUNC diff --git a/eigen/Eigen/src/plugins/IndexedViewMethods.h b/eigen/Eigen/src/plugins/IndexedViewMethods.h deleted file mode 100644 index 22c1666..0000000 --- a/eigen/Eigen/src/plugins/IndexedViewMethods.h +++ /dev/null @@ -1,267 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#if !defined(EIGEN_PARSED_BY_DOXYGEN) - -// This file is automatically included twice to generate const and non-const versions - -#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS -#define EIGEN_INDEXED_VIEW_METHOD_CONST const -#define EIGEN_INDEXED_VIEW_METHOD_TYPE ConstIndexedViewType -#else -#define EIGEN_INDEXED_VIEW_METHOD_CONST -#define EIGEN_INDEXED_VIEW_METHOD_TYPE IndexedViewType -#endif - -#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS -protected: - -// define some aliases to ease readability - -template -struct IvcRowType : public internal::IndexedViewCompatibleType {}; - -template -struct IvcColType : public internal::IndexedViewCompatibleType {}; - -template -struct IvcType : public internal::IndexedViewCompatibleType {}; - -typedef typename internal::IndexedViewCompatibleType::type IvcIndex; - -template -typename IvcRowType::type -ivcRow(const Indices& indices) const { - return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic(derived().rows()),Specialized); -} - -template -typename IvcColType::type -ivcCol(const Indices& indices) const { - return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic(derived().cols()),Specialized); -} - -template -typename IvcColType::type -ivcSize(const Indices& indices) const { - return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic(derived().size()),Specialized); -} - -template -struct valid_indexed_view_overload { - // Here we use is_convertible to Index instead of is_integral in order to treat enums as Index. - // In c++11 we could use is_integral && is_enum if is_convertible appears to be too permissive. - enum { value = !(internal::is_convertible::value && internal::is_convertible::value) }; -}; - -public: - -#endif - -template -struct EIGEN_INDEXED_VIEW_METHOD_TYPE { - typedef IndexedView::type, - typename IvcColType::type> type; -}; - -// This is the generic version - -template -typename internal::enable_if::value - && internal::traits::type>::ReturnAsIndexedView, - typename EIGEN_INDEXED_VIEW_METHOD_TYPE::type >::type -operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return typename EIGEN_INDEXED_VIEW_METHOD_TYPE::type - (derived(), ivcRow(rowIndices), ivcCol(colIndices)); -} - -// The following overload returns a Block<> object - -template -typename internal::enable_if::value - && internal::traits::type>::ReturnAsBlock, - typename internal::traits::type>::BlockType>::type -operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - typedef typename internal::traits::type>::BlockType BlockType; - typename IvcRowType::type actualRowIndices = ivcRow(rowIndices); - typename IvcColType::type actualColIndices = ivcCol(colIndices); - return BlockType(derived(), - internal::first(actualRowIndices), - internal::first(actualColIndices), - internal::size(actualRowIndices), - internal::size(actualColIndices)); -} - -// The following overload returns a Scalar - -template -typename internal::enable_if::value - && internal::traits::type>::ReturnAsScalar, - CoeffReturnType >::type -operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return Base::operator()(internal::eval_expr_given_size(rowIndices,rows()),internal::eval_expr_given_size(colIndices,cols())); -} - -#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE - -// The folowing three overloads are needed to handle raw Index[N] arrays. - -template -IndexedView::type> -operator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return IndexedView::type> - (derived(), rowIndices, ivcCol(colIndices)); -} - -template -IndexedView::type, const ColIndicesT (&)[ColIndicesN]> -operator()(const RowIndices& rowIndices, const ColIndicesT (&colIndices)[ColIndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return IndexedView::type,const ColIndicesT (&)[ColIndicesN]> - (derived(), ivcRow(rowIndices), colIndices); -} - -template -IndexedView -operator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndicesT (&colIndices)[ColIndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return IndexedView - (derived(), rowIndices, colIndices); -} - -#endif // EIGEN_HAS_STATIC_ARRAY_TEMPLATE - -// Overloads for 1D vectors/arrays - -template -typename internal::enable_if< - IsRowMajor && (!(internal::get_compile_time_incr::type>::value==1 || internal::is_integral::value)), - IndexedView::type> >::type -operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return IndexedView::type> - (derived(), IvcIndex(0), ivcCol(indices)); -} - -template -typename internal::enable_if< - (!IsRowMajor) && (!(internal::get_compile_time_incr::type>::value==1 || internal::is_integral::value)), - IndexedView::type,IvcIndex> >::type -operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return IndexedView::type,IvcIndex> - (derived(), ivcRow(indices), IvcIndex(0)); -} - -template -typename internal::enable_if< - (internal::get_compile_time_incr::type>::value==1) && (!internal::is_integral::value) && (!Symbolic::is_symbolic::value), - VectorBlock::value> >::type -operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - typename IvcType::type actualIndices = ivcSize(indices); - return VectorBlock::value> - (derived(), internal::first(actualIndices), internal::size(actualIndices)); -} - -template -typename internal::enable_if::value, CoeffReturnType >::type -operator()(const IndexType& id) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return Base::operator()(internal::eval_expr_given_size(id,size())); -} - -#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE - -template -typename internal::enable_if >::type -operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return IndexedView - (derived(), IvcIndex(0), indices); -} - -template -typename internal::enable_if >::type -operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return IndexedView - (derived(), indices, IvcIndex(0)); -} - -#endif // EIGEN_HAS_STATIC_ARRAY_TEMPLATE - -#undef EIGEN_INDEXED_VIEW_METHOD_CONST -#undef EIGEN_INDEXED_VIEW_METHOD_TYPE - -#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS -#define EIGEN_INDEXED_VIEW_METHOD_2ND_PASS -#include "IndexedViewMethods.h" -#undef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS -#endif - -#else // EIGEN_PARSED_BY_DOXYGEN - -/** - * \returns a generic submatrix view defined by the rows and columns indexed \a rowIndices and \a colIndices respectively. - * - * Each parameter must either be: - * - An integer indexing a single row or column - * - Eigen::all indexing the full set of respective rows or columns in increasing order - * - An ArithmeticSequence as returned by the Eigen::seq and Eigen::seqN functions - * - Any %Eigen's vector/array of integers or expressions - * - Plain C arrays: \c int[N] - * - And more generally any type exposing the following two member functions: - * \code - * operator[]() const; - * size() const; - * \endcode - * where \c stands for any integer type compatible with Eigen::Index (i.e. \c std::ptrdiff_t). - * - * The last statement implies compatibility with \c std::vector, \c std::valarray, \c std::array, many of the Range-v3's ranges, etc. - * - * If the submatrix can be represented using a starting position \c (i,j) and positive sizes \c (rows,columns), then this - * method will returns a Block object after extraction of the relevant information from the passed arguments. This is the case - * when all arguments are either: - * - An integer - * - Eigen::all - * - An ArithmeticSequence with compile-time increment strictly equal to 1, as returned by Eigen::seq(a,b), and Eigen::seqN(a,N). - * - * Otherwise a more general IndexedView object will be returned, after conversion of the inputs - * to more suitable types \c RowIndices' and \c ColIndices'. - * - * For 1D vectors and arrays, you better use the operator()(const Indices&) overload, which behave the same way but taking a single parameter. - * - * \sa operator()(const Indices&), class Block, class IndexedView, DenseBase::block(Index,Index,Index,Index) - */ -template -IndexedView_or_Block -operator()(const RowIndices& rowIndices, const ColIndices& colIndices); - -/** This is an overload of operator()(const RowIndices&, const ColIndices&) for 1D vectors or arrays - * - * \only_for_vectors - */ -template -IndexedView_or_VectorBlock -operator()(const Indices& indices); - -#endif // EIGEN_PARSED_BY_DOXYGEN diff --git a/eigen/bench/btl/actions/basic_actions.hh b/eigen/bench/btl/actions/basic_actions.hh index 62442f0..a3333ea 100644 --- a/eigen/bench/btl/actions/basic_actions.hh +++ b/eigen/bench/btl/actions/basic_actions.hh @@ -6,7 +6,7 @@ #include "action_atv_product.hh" #include "action_matrix_matrix_product.hh" -#include "action_ata_product.hh" +// #include "action_ata_product.hh" #include "action_aat_product.hh" #include "action_trisolve.hh" diff --git a/eigen/bench/btl/libs/BLAS/blas_interface_impl.hh b/eigen/bench/btl/libs/BLAS/blas_interface_impl.hh index 9e0a649..fc4ba2a 100644 --- a/eigen/bench/btl/libs/BLAS/blas_interface_impl.hh +++ b/eigen/bench/btl/libs/BLAS/blas_interface_impl.hh @@ -46,9 +46,9 @@ public : BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); } - static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ - BLAS_FUNC(syrk)(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N); - } +// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ +// ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N); +// } static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){ BLAS_FUNC(syrk)(&lower,¬rans,&N,&N,&fone,A,&N,&fzero,X,&N); diff --git a/eigen/bench/btl/libs/BLAS/main.cpp b/eigen/bench/btl/libs/BLAS/main.cpp index fd99149..564d55e 100644 --- a/eigen/bench/btl/libs/BLAS/main.cpp +++ b/eigen/bench/btl/libs/BLAS/main.cpp @@ -48,7 +48,7 @@ int main() bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); bench > >(MIN_MM,MAX_MM,NB_POINT); bench > >(MIN_MM,MAX_MM,NB_POINT); diff --git a/eigen/bench/btl/libs/STL/STL_interface.hh b/eigen/bench/btl/libs/STL/STL_interface.hh index 16658c4..ef4cc92 100644 --- a/eigen/bench/btl/libs/STL/STL_interface.hh +++ b/eigen/bench/btl/libs/STL/STL_interface.hh @@ -78,18 +78,18 @@ public : cible[i][j]=source[i][j]; } - static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N) - { - real somme; - for (int j=0;j #include -#include // using namespace blaze; #include @@ -81,35 +80,35 @@ public : } } - static EIGEN_DONT_INLINE void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ + static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ X = (A*B); } - static EIGEN_DONT_INLINE void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ + static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ X = (trans(A)*trans(B)); } - static EIGEN_DONT_INLINE void ata_product(const gene_matrix & A, gene_matrix & X, int N){ + static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){ X = (trans(A)*A); } - static EIGEN_DONT_INLINE void aat_product(const gene_matrix & A, gene_matrix & X, int N){ + static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){ X = (A*trans(A)); } - static EIGEN_DONT_INLINE void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ X = (A*B); } - static EIGEN_DONT_INLINE void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ X = (trans(A)*B); } - static EIGEN_DONT_INLINE void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){ + static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){ Y += coef * X; } - static EIGEN_DONT_INLINE void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ Y = a*X + b*Y; } diff --git a/eigen/bench/btl/libs/blaze/main.cpp b/eigen/bench/btl/libs/blaze/main.cpp index ccae0cb..80e8f4e 100644 --- a/eigen/bench/btl/libs/blaze/main.cpp +++ b/eigen/bench/btl/libs/blaze/main.cpp @@ -30,9 +30,9 @@ int main() bench > >(MIN_MV,MAX_MV,NB_POINT); bench > >(MIN_MV,MAX_MV,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); return 0; } diff --git a/eigen/bench/btl/libs/eigen3/eigen3_interface.hh b/eigen/bench/btl/libs/eigen3/eigen3_interface.hh index 2e302d0..b821fd7 100644 --- a/eigen/bench/btl/libs/eigen3/eigen3_interface.hh +++ b/eigen/bench/btl/libs/eigen3/eigen3_interface.hh @@ -92,11 +92,9 @@ public : X.noalias() = A.transpose()*B.transpose(); } - static inline void ata_product(const gene_matrix & A, gene_matrix & X, int /*N*/){ - //X.noalias() = A.transpose()*A; - X.template triangularView().setZero(); - X.template selfadjointView().rankUpdate(A.transpose()); - } +// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int /*N*/){ +// X.noalias() = A.transpose()*A; +// } static inline void aat_product(const gene_matrix & A, gene_matrix & X, int /*N*/){ X.template triangularView().setZero(); diff --git a/eigen/bench/btl/libs/eigen3/main_matmat.cpp b/eigen/bench/btl/libs/eigen3/main_matmat.cpp index 052810a..926fa2b 100644 --- a/eigen/bench/btl/libs/eigen3/main_matmat.cpp +++ b/eigen/bench/btl/libs/eigen3/main_matmat.cpp @@ -25,7 +25,7 @@ BTL_MAIN; int main() { bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); bench > >(MIN_MM,MAX_MM,NB_POINT); bench > >(MIN_MM,MAX_MM,NB_POINT); diff --git a/eigen/bench/perf_monitoring/changesets.txt b/eigen/bench/perf_monitoring/changesets.txt deleted file mode 100644 index 960699c..0000000 --- a/eigen/bench/perf_monitoring/changesets.txt +++ /dev/null @@ -1,71 +0,0 @@ -#3.0.1 -#3.1.1 -#3.2.0 -3.2.4 -#5745:37f59e65eb6c -5891:d8652709345d # introduce AVX -#5893:24b4dc92c6d3 # merge -5895:997c2ef9fc8b # introduce FMA -#5904:e1eafd14eaa1 # complex and AVX -5908:f8ee3c721251 # improve packing with ptranspose -#5921:ca808bb456b0 # merge -#5927:8b1001f9e3ac -5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks -#5949:f3488f4e45b2 # merge -#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec -#5992:4a429f5e0483 # merge -before-evaluators -#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products -#6639:c9121c60b5c7 -#6655:06f163b5221f # Properly detect FMA support on ARM -#6677:700e023044e7 # FMA has been wrongly disabled -#6681:11d31dafb0e3 -#6699:5e6e8e10aad1 # merge default to tensors -#6726:ff2d2388e7b9 # merge default to tensors -#6742:0cbd6195e829 # merge default to tensors -#6747:853d2bafeb8f # Generalized the gebp apis -6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation -6781:9cc5a931b2c6 # generalized gemv -6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product -#6844:039efd86b75c # merge tensor -6845:7333ed40c6ef # change prefetching in gebp -#6856:b5be5e10eb7f # merge index conversion -6893:c3a64aba7c70 # clean blocking size computation -6899:877facace746 # rotating kernel for ARM only -#6904:c250623ae9fa # result_of -6921:915f1b1fc158 # fix prefetching change for ARM -6923:9ff25f6dacc6 # prefetching -6933:52572e60b5d3 # blocking size strategy -6937:c8c042f286b2 # avoid redundant pack_rhs -6981:7e5d6f78da59 # dynamic loop swapping -6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache -6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1. -7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) -7015:8aad8f35c955 # Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables -7016:a58d253e8c91 # Polish lookup tables generation -7018:9b27294a8186 # actual_panel_rows computation should always be resilient to parameters not consistent with the known L1 cache size, see comment -7019:c758b1e2c073 # Provide a empirical lookup table for blocking sizes measured on a Nexus 5. Only for float, only for Android on ARM 32bit for now. -7085:627e039fba68 # Bug 986: add support for coefficient-based product with 0 depth. -7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code -7591:09a8e2186610 # 3.3-alpha1 -7650:b0f3c8f43025 # help clang inlining -7708:dfc6ab9d9458 # Improve numerical accuracy in LLT and triangular solve by using true scalar divisions (instead of x * (1/y)) -#8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) -8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes -8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path -8985:d935df21a082 # Remove the rotating kernel. -8988:6c2dc56e73b3 # Bug 256: enable vectorization with unaligned loads/stores. -9148:b8b8c421e36c # Relax mixing-type constraints for binary coefficient-wise operators -9174:d228bc282ac9 # merge -9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955 -9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775 -9361:69d418c06999 # 3.3-beta2 -9583:bef509908b9d # 3.3-rc1 -9792:26667be4f70b # 3.3.0 -9942:b1d3eba60130 # Operators += and -= do not resize! -9943:79bb9887afd4 # Ease compiler job to generate clean and efficient code in mat*vec -9946:2213991340ea # Complete rewrite of column-major-matrix * vector product to deliver higher performance of modern CPU. -9955:630471c3298c # Improve performance of row-major-dense-matrix * vector products for recent CPUs. (this is the next changeset fixing a typo) -9975:2eeed9de710c # Revert vec/y to vec*(1/y) in row-major TRSM - - diff --git a/eigen/bench/perf_monitoring/gemm.cpp b/eigen/bench/perf_monitoring/gemm.cpp deleted file mode 100644 index 804139d..0000000 --- a/eigen/bench/perf_monitoring/gemm.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "gemm_common.h" - -EIGEN_DONT_INLINE -void gemm(const Mat &A, const Mat &B, Mat &C) -{ - C.noalias() += A * B; -} - -int main(int argc, char **argv) -{ - return main_gemm(argc, argv, gemm); -} diff --git a/eigen/bench/perf_monitoring/gemm/changesets.txt b/eigen/bench/perf_monitoring/gemm/changesets.txt new file mode 100644 index 0000000..af8eb9b --- /dev/null +++ b/eigen/bench/perf_monitoring/gemm/changesets.txt @@ -0,0 +1,61 @@ +#3.0.1 +#3.1.1 +#3.2.0 +3.2.4 +#5745:37f59e65eb6c +5891:d8652709345d # introduce AVX +#5893:24b4dc92c6d3 # merge +5895:997c2ef9fc8b # introduce FMA +#5904:e1eafd14eaa1 # complex and AVX +5908:f8ee3c721251 # improve packing with ptranspose +#5921:ca808bb456b0 # merge +#5927:8b1001f9e3ac +5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks +#5949:f3488f4e45b2 # merge +#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec +#5992:4a429f5e0483 # merge +before-evaluators +#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products +#6639:c9121c60b5c7 +#6655:06f163b5221f # Properly detect FMA support on ARM +#6677:700e023044e7 # FMA has been wrongly disabled +#6681:11d31dafb0e3 +#6699:5e6e8e10aad1 # merge default to tensors +#6726:ff2d2388e7b9 # merge default to tensors +#6742:0cbd6195e829 # merge default to tensors +#6747:853d2bafeb8f # Generalized the gebp apis +6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation +#6781:9cc5a931b2c6 # generalized gemv +#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product +#6844:039efd86b75c # merge tensor +6845:7333ed40c6ef # change prefetching in gebp +#6856:b5be5e10eb7f # merge index conversion +#6893:c3a64aba7c70 # clean blocking size computation +#6898:6fb31ebe6492 # rotating kernel for ARM +6899:877facace746 # rotating kernel for ARM only +#6904:c250623ae9fa # result_of +6921:915f1b1fc158 # fix prefetching change for ARM +6923:9ff25f6dacc6 # prefetching +6933:52572e60b5d3 # blocking size strategy +6937:c8c042f286b2 # avoid redundant pack_rhs +6981:7e5d6f78da59 # dynamic loop swapping +6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache +6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1. +7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) +7015:8aad8f35c955 # Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables +7016:a58d253e8c91 # Polish lookup tables generation +7018:9b27294a8186 # actual_panel_rows computation should always be resilient to parameters not consistent with the known L1 cache size, see comment +7019:c758b1e2c073 # Provide a empirical lookup table for blocking sizes measured on a Nexus 5. Only for float, only for Android on ARM 32bit for now. +7085:627e039fba68 # Bug 986: add support for coefficient-based product with 0 depth. +7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code +7591:09a8e2186610 # 3.3-alpha1 +7650:b0f3c8f43025 # help clang inlining +#8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) +8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes +8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path +8985:d935df21a082 # Remove the rotating kernel. +8988:6c2dc56e73b3 # Bug 256: enable vectorization with unaligned loads/stores. +9148:b8b8c421e36c # Relax mixing-type constraints for binary coefficient-wise operators +9174:d228bc282ac9 # merge +9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955 +9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775 diff --git a/eigen/bench/perf_monitoring/gemm/gemm.cpp b/eigen/bench/perf_monitoring/gemm/gemm.cpp new file mode 100644 index 0000000..614bd47 --- /dev/null +++ b/eigen/bench/perf_monitoring/gemm/gemm.cpp @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +typedef Matrix Mat; + +EIGEN_DONT_INLINE +void gemm(const Mat &A, const Mat &B, Mat &C) +{ + C.noalias() += A * B; +} + +EIGEN_DONT_INLINE +double bench(long m, long n, long k) +{ + Mat A(m,k); + Mat B(k,n); + Mat C(m,n); + A.setRandom(); + B.setRandom(); + C.setZero(); + + BenchTimer t; + + double up = 1e8*4/sizeof(Scalar); + double tm0 = 4, tm1 = 10; + if(NumTraits::IsComplex) + { + up /= 4; + tm0 = 2; + tm1 = 4; + } + + double flops = 2. * m * n * k; + long rep = std::max(1., std::min(100., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, gemm(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +int main(int argc, char **argv) +{ + std::vector results; + + std::ifstream settings("gemm_settings.txt"); + long m, n, k; + while(settings >> m >> n >> k) + { + //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; + results.push_back( bench(m, n, k) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/eigen/bench/perf_monitoring/gemm/gemm_settings.txt b/eigen/bench/perf_monitoring/gemm/gemm_settings.txt new file mode 100644 index 0000000..5c43e1c --- /dev/null +++ b/eigen/bench/perf_monitoring/gemm/gemm_settings.txt @@ -0,0 +1,15 @@ +8 8 8 +9 9 9 +24 24 24 +239 239 239 +240 240 240 +2400 24 24 +24 2400 24 +24 24 2400 +24 2400 2400 +2400 24 2400 +2400 2400 24 +2400 2400 64 +4800 23 160 +23 4800 160 +2400 2400 2400 diff --git a/eigen/bench/perf_monitoring/gemm/lazy_gemm.cpp b/eigen/bench/perf_monitoring/gemm/lazy_gemm.cpp new file mode 100644 index 0000000..6dc3701 --- /dev/null +++ b/eigen/bench/perf_monitoring/gemm/lazy_gemm.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +template +EIGEN_DONT_INLINE +void lazy_gemm(const MatA &A, const MatB &B, MatC &C) +{ +// escape((void*)A.data()); +// escape((void*)B.data()); + C.noalias() += A.lazyProduct(B); +// escape((void*)C.data()); +} + +template +EIGEN_DONT_INLINE +double bench() +{ + typedef Matrix MatA; + typedef Matrix MatB; + typedef Matrix MatC; + + MatA A(m,k); + MatB B(k,n); + MatC C(m,n); + A.setRandom(); + B.setRandom(); + C.setZero(); + + BenchTimer t; + + double up = 1e7*4/sizeof(Scalar); + double tm0 = 10, tm1 = 20; + + double flops = 2. * m * n * k; + long rep = std::max(10., std::min(10000., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, lazy_gemm(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +template +double bench_t(int t) +{ + if(t) + return bench(); + else + return bench(); +} + +EIGEN_DONT_INLINE +double bench_mnk(int m, int n, int k, int t) +{ + int id = m*10000 + n*100 + k; + switch(id) { + case 10101 : return bench_t< 1, 1, 1>(t); break; + case 20202 : return bench_t< 2, 2, 2>(t); break; + case 30303 : return bench_t< 3, 3, 3>(t); break; + case 40404 : return bench_t< 4, 4, 4>(t); break; + case 50505 : return bench_t< 5, 5, 5>(t); break; + case 60606 : return bench_t< 6, 6, 6>(t); break; + case 70707 : return bench_t< 7, 7, 7>(t); break; + case 80808 : return bench_t< 8, 8, 8>(t); break; + case 90909 : return bench_t< 9, 9, 9>(t); break; + case 101010 : return bench_t<10,10,10>(t); break; + case 111111 : return bench_t<11,11,11>(t); break; + case 121212 : return bench_t<12,12,12>(t); break; + } + return 0; +} + +int main(int argc, char **argv) +{ + std::vector results; + + std::ifstream settings("lazy_gemm_settings.txt"); + long m, n, k, t; + while(settings >> m >> n >> k >> t) + { + //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; + results.push_back( bench_mnk(m, n, k, t) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/eigen/bench/perf_monitoring/gemm/lazy_gemm_settings.txt b/eigen/bench/perf_monitoring/gemm/lazy_gemm_settings.txt new file mode 100644 index 0000000..407d5d4 --- /dev/null +++ b/eigen/bench/perf_monitoring/gemm/lazy_gemm_settings.txt @@ -0,0 +1,15 @@ +1 1 1 0 +2 2 2 0 +3 3 3 0 +4 4 4 0 +4 4 4 1 +5 5 5 0 +6 6 6 0 +7 7 7 0 +7 7 7 1 +8 8 8 0 +9 9 9 0 +10 10 10 0 +11 11 11 0 +12 12 12 0 +12 12 12 1 diff --git a/eigen/bench/perf_monitoring/gemm/make_plot.sh b/eigen/bench/perf_monitoring/gemm/make_plot.sh new file mode 100644 index 0000000..cd3214a --- /dev/null +++ b/eigen/bench/perf_monitoring/gemm/make_plot.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# base name of the bench +# it reads $1.out +# and generates $1.pdf +WHAT=$1 +bench=$2 + +header="rev " +while read line +do + if [ ! -z '$line' ]; then + header="$header \"$line\"" + fi +done < $bench"_settings.txt" + +echo $header > $WHAT.out.header +cat $WHAT.out >> $WHAT.out.header + + +echo "set title '$WHAT'" > $WHAT.gnuplot +echo "set key autotitle columnhead outside " >> $WHAT.gnuplot +echo "set xtics rotate 1" >> $WHAT.gnuplot + +echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot +echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot + +col=`cat $bench"_settings.txt" | wc -l` +echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot +echo " " >> $WHAT.gnuplot + +gnuplot -persist < $WHAT.gnuplot + +# generate a png file +# convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 $WHAT.ps -background white -flatten .$WHAT.png + +# clean +rm $WHAT.out.header $WHAT.gnuplot \ No newline at end of file diff --git a/eigen/bench/perf_monitoring/gemm/run.sh b/eigen/bench/perf_monitoring/gemm/run.sh new file mode 100644 index 0000000..9d6ee40 --- /dev/null +++ b/eigen/bench/perf_monitoring/gemm/run.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +# ./run.sh gemm +# ./run.sh lazy_gemm + +# Examples of environment variables to be set: +# PREFIX="haswell-fma-" +# CXX_FLAGS="-mfma" + +# Options: +# -up : enforce the recomputation of existing data, and keep best results as a merging strategy +# -s : recompute selected changesets only and keep bests + +bench=$1 + +if echo "$*" | grep '\-up' > /dev/null; then + update=true +else + update=false +fi + +if echo "$*" | grep '\-s' > /dev/null; then + selected=true +else + selected=false +fi + +global_args="$*" + +if [ $selected == true ]; then + echo "Recompute selected changesets only and keep bests" +elif [ $update == true ]; then + echo "(Re-)Compute all changesets and keep bests" +else + echo "Skip previously computed changesets" +fi + + + +if [ ! -d "eigen_src" ]; then + hg clone https://bitbucket.org/eigen/eigen eigen_src +else + cd eigen_src + hg pull -u + cd .. +fi + +if [ ! -z '$CXX' ]; then + CXX=g++ +fi + +function make_backup +{ + if [ -f "$1.out" ]; then + mv "$1.out" "$1.backup" + fi +} + +function merge +{ + count1=`echo $1 | wc -w` + count2=`echo $2 | wc -w` + + if [ $count1 == $count2 ]; then + a=( $1 ); b=( $2 ) + res="" + for (( i=0 ; i<$count1 ; i++ )); do + ai=${a[$i]}; bi=${b[$i]} + tmp=`echo "if ($ai > $bi) $ai else $bi " | bc -l` + res="$res $tmp" + done + echo $res + + else + echo $1 + fi +} + +function test_current +{ + rev=$1 + scalar=$2 + name=$3 + + prev="" + if [ -e "$name.backup" ]; then + prev=`grep $rev "$name.backup" | cut -c 14-` + fi + res=$prev + count_rev=`echo $prev | wc -w` + count_ref=`cat $bench"_settings.txt" | wc -l` + if echo "$global_args" | grep "$rev" > /dev/null; then + rev_found=true + else + rev_found=false + fi +# echo $update et $selected et $rev_found because $rev et "$global_args" +# echo $count_rev et $count_ref + if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then + if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then + curr=`./$name` + if [ $count_rev == $count_ref ]; then + echo "merge previous $prev" + echo "with new $curr" + else + echo "got $curr" + fi + res=`merge "$curr" "$prev"` +# echo $res + echo "$rev $res" >> $name.out + else + echo "Compilation failed, skip rev $rev" + fi + else + echo "Skip existing results for $rev / $name" + echo "$rev $res" >> $name.out + fi +} + +make_backup $PREFIX"s"$bench +make_backup $PREFIX"d"$bench +make_backup $PREFIX"c"$bench + +cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev +do + if [ ! -z '$rev' ]; then + echo "Testing rev $rev" + cd eigen_src + hg up -C $rev > /dev/null + actual_rev=`hg identify | cut -f1 -d' '` + cd .. + + test_current $actual_rev float $PREFIX"s"$bench + test_current $actual_rev double $PREFIX"d"$bench + test_current $actual_rev "std::complex" $PREFIX"c"$bench + fi + +done + +echo "Float:" +cat $PREFIX"s""$bench.out" +echo " " + +echo "Double:" +cat $PREFIX"d""$bench.out" +echo "" + +echo "Complex:" +cat $PREFIX"c""$bench.out" +echo "" + +./make_plot.sh $PREFIX"s"$bench $bench +./make_plot.sh $PREFIX"d"$bench $bench +./make_plot.sh $PREFIX"c"$bench $bench + + diff --git a/eigen/bench/perf_monitoring/gemm_common.h b/eigen/bench/perf_monitoring/gemm_common.h deleted file mode 100644 index 30dbc0d..0000000 --- a/eigen/bench/perf_monitoring/gemm_common.h +++ /dev/null @@ -1,67 +0,0 @@ -#include -#include -#include -#include -#include "eigen_src/Eigen/Core" -#include "../BenchTimer.h" -using namespace Eigen; - -#ifndef SCALAR -#error SCALAR must be defined -#endif - -typedef SCALAR Scalar; - -typedef Matrix Mat; - -template -EIGEN_DONT_INLINE -double bench(long m, long n, long k, const Func& f) -{ - Mat A(m,k); - Mat B(k,n); - Mat C(m,n); - A.setRandom(); - B.setRandom(); - C.setZero(); - - BenchTimer t; - - double up = 1e8*4/sizeof(Scalar); - double tm0 = 4, tm1 = 10; - if(NumTraits::IsComplex) - { - up /= 4; - tm0 = 2; - tm1 = 4; - } - - double flops = 2. * m * n * k; - long rep = std::max(1., std::min(100., up/flops) ); - long tries = std::max(tm0, std::min(tm1, up/flops) ); - - BENCH(t, tries, rep, f(A,B,C)); - - return 1e-9 * rep * flops / t.best(); -} - -template -int main_gemm(int argc, char **argv, const Func& f) -{ - std::vector results; - - std::string filename = std::string("gemm_settings.txt"); - if(argc>1) - filename = std::string(argv[1]); - std::ifstream settings(filename); - long m, n, k; - while(settings >> m >> n >> k) - { - //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; - results.push_back( bench(m, n, k, f) ); - } - - std::cout << RowVectorXd::Map(results.data(), results.size()); - - return 0; -} diff --git a/eigen/bench/perf_monitoring/gemm_settings.txt b/eigen/bench/perf_monitoring/gemm_settings.txt deleted file mode 100644 index 5c43e1c..0000000 --- a/eigen/bench/perf_monitoring/gemm_settings.txt +++ /dev/null @@ -1,15 +0,0 @@ -8 8 8 -9 9 9 -24 24 24 -239 239 239 -240 240 240 -2400 24 24 -24 2400 24 -24 24 2400 -24 2400 2400 -2400 24 2400 -2400 2400 24 -2400 2400 64 -4800 23 160 -23 4800 160 -2400 2400 2400 diff --git a/eigen/bench/perf_monitoring/gemm_square_settings.txt b/eigen/bench/perf_monitoring/gemm_square_settings.txt deleted file mode 100644 index 98474d1..0000000 --- a/eigen/bench/perf_monitoring/gemm_square_settings.txt +++ /dev/null @@ -1,11 +0,0 @@ -8 8 8 -9 9 9 -12 12 12 -15 15 15 -16 16 16 -24 24 24 -102 102 102 -239 239 239 -240 240 240 -2400 2400 2400 -2463 2463 2463 diff --git a/eigen/bench/perf_monitoring/gemv.cpp b/eigen/bench/perf_monitoring/gemv.cpp deleted file mode 100644 index 82e5ab9..0000000 --- a/eigen/bench/perf_monitoring/gemv.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "gemv_common.h" - -EIGEN_DONT_INLINE -void gemv(const Mat &A, const Vec &B, Vec &C) -{ - C.noalias() += A * B; -} - -int main(int argc, char **argv) -{ - return main_gemv(argc, argv, gemv); -} diff --git a/eigen/bench/perf_monitoring/gemv_common.h b/eigen/bench/perf_monitoring/gemv_common.h deleted file mode 100644 index cc32577..0000000 --- a/eigen/bench/perf_monitoring/gemv_common.h +++ /dev/null @@ -1,69 +0,0 @@ -#include -#include -#include -#include -#include -#include "eigen_src/Eigen/Core" -#include "../BenchTimer.h" -using namespace Eigen; - -#ifndef SCALAR -#error SCALAR must be defined -#endif - -typedef SCALAR Scalar; - -typedef Matrix Mat; -typedef Matrix Vec; - -template -EIGEN_DONT_INLINE -double bench(long m, long n, Func &f) -{ - Mat A(m,n); - Vec B(n); - Vec C(m); - A.setRandom(); - B.setRandom(); - C.setRandom(); - - BenchTimer t; - - double up = 1e8/sizeof(Scalar); - double tm0 = 4, tm1 = 10; - if(NumTraits::IsComplex) - { - up /= 4; - tm0 = 2; - tm1 = 4; - } - - double flops = 2. * m * n; - long rep = std::max(1., std::min(100., up/flops) ); - long tries = std::max(tm0, std::min(tm1, up/flops) ); - - BENCH(t, tries, rep, f(A,B,C)); - - return 1e-9 * rep * flops / t.best(); -} - -template -int main_gemv(int argc, char **argv, Func& f) -{ - std::vector results; - - std::string filename = std::string("gemv_settings.txt"); - if(argc>1) - filename = std::string(argv[1]); - std::ifstream settings(filename); - long m, n; - while(settings >> m >> n) - { - //std::cerr << " Testing " << m << " " << n << std::endl; - results.push_back( bench(m, n, f) ); - } - - std::cout << RowVectorXd::Map(results.data(), results.size()); - - return 0; -} diff --git a/eigen/bench/perf_monitoring/gemv_settings.txt b/eigen/bench/perf_monitoring/gemv_settings.txt deleted file mode 100644 index 21a5ee0..0000000 --- a/eigen/bench/perf_monitoring/gemv_settings.txt +++ /dev/null @@ -1,11 +0,0 @@ -8 8 -9 9 -24 24 -239 239 -240 240 -2400 24 -24 2400 -24 240 -2400 2400 -4800 23 -23 4800 diff --git a/eigen/bench/perf_monitoring/gemv_square_settings.txt b/eigen/bench/perf_monitoring/gemv_square_settings.txt deleted file mode 100644 index 5165759..0000000 --- a/eigen/bench/perf_monitoring/gemv_square_settings.txt +++ /dev/null @@ -1,13 +0,0 @@ -8 8 -9 9 -12 12 -15 15 -16 16 -24 24 -53 53 -74 74 -102 102 -239 239 -240 240 -2400 2400 -2463 2463 diff --git a/eigen/bench/perf_monitoring/gemvt.cpp b/eigen/bench/perf_monitoring/gemvt.cpp deleted file mode 100644 index fe94576..0000000 --- a/eigen/bench/perf_monitoring/gemvt.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "gemv_common.h" - -EIGEN_DONT_INLINE -void gemv(const Mat &A, Vec &B, const Vec &C) -{ - B.noalias() += A.transpose() * C; -} - -int main(int argc, char **argv) -{ - return main_gemv(argc, argv, gemv); -} diff --git a/eigen/bench/perf_monitoring/lazy_gemm.cpp b/eigen/bench/perf_monitoring/lazy_gemm.cpp deleted file mode 100644 index 7733060..0000000 --- a/eigen/bench/perf_monitoring/lazy_gemm.cpp +++ /dev/null @@ -1,101 +0,0 @@ -#include -#include -#include -#include -#include "../../BenchTimer.h" -using namespace Eigen; - -#ifndef SCALAR -#error SCALAR must be defined -#endif - -typedef SCALAR Scalar; - -template -EIGEN_DONT_INLINE -void lazy_gemm(const MatA &A, const MatB &B, MatC &C) -{ -// escape((void*)A.data()); -// escape((void*)B.data()); - C.noalias() += A.lazyProduct(B); -// escape((void*)C.data()); -} - -template -EIGEN_DONT_INLINE -double bench() -{ - typedef Matrix MatA; - typedef Matrix MatB; - typedef Matrix MatC; - - MatA A(m,k); - MatB B(k,n); - MatC C(m,n); - A.setRandom(); - B.setRandom(); - C.setZero(); - - BenchTimer t; - - double up = 1e7*4/sizeof(Scalar); - double tm0 = 10, tm1 = 20; - - double flops = 2. * m * n * k; - long rep = std::max(10., std::min(10000., up/flops) ); - long tries = std::max(tm0, std::min(tm1, up/flops) ); - - BENCH(t, tries, rep, lazy_gemm(A,B,C)); - - return 1e-9 * rep * flops / t.best(); -} - -template -double bench_t(int t) -{ - if(t) - return bench(); - else - return bench(); -} - -EIGEN_DONT_INLINE -double bench_mnk(int m, int n, int k, int t) -{ - int id = m*10000 + n*100 + k; - switch(id) { - case 10101 : return bench_t< 1, 1, 1>(t); break; - case 20202 : return bench_t< 2, 2, 2>(t); break; - case 30303 : return bench_t< 3, 3, 3>(t); break; - case 40404 : return bench_t< 4, 4, 4>(t); break; - case 50505 : return bench_t< 5, 5, 5>(t); break; - case 60606 : return bench_t< 6, 6, 6>(t); break; - case 70707 : return bench_t< 7, 7, 7>(t); break; - case 80808 : return bench_t< 8, 8, 8>(t); break; - case 90909 : return bench_t< 9, 9, 9>(t); break; - case 101010 : return bench_t<10,10,10>(t); break; - case 111111 : return bench_t<11,11,11>(t); break; - case 121212 : return bench_t<12,12,12>(t); break; - } - return 0; -} - -int main(int argc, char **argv) -{ - std::vector results; - - std::string filename = std::string("lazy_gemm_settings.txt"); - if(argc>1) - filename = std::string(argv[1]); - std::ifstream settings(filename); - long m, n, k, t; - while(settings >> m >> n >> k >> t) - { - //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; - results.push_back( bench_mnk(m, n, k, t) ); - } - - std::cout << RowVectorXd::Map(results.data(), results.size()); - - return 0; -} diff --git a/eigen/bench/perf_monitoring/lazy_gemm_settings.txt b/eigen/bench/perf_monitoring/lazy_gemm_settings.txt deleted file mode 100644 index 407d5d4..0000000 --- a/eigen/bench/perf_monitoring/lazy_gemm_settings.txt +++ /dev/null @@ -1,15 +0,0 @@ -1 1 1 0 -2 2 2 0 -3 3 3 0 -4 4 4 0 -4 4 4 1 -5 5 5 0 -6 6 6 0 -7 7 7 0 -7 7 7 1 -8 8 8 0 -9 9 9 0 -10 10 10 0 -11 11 11 0 -12 12 12 0 -12 12 12 1 diff --git a/eigen/bench/perf_monitoring/llt.cpp b/eigen/bench/perf_monitoring/llt.cpp deleted file mode 100644 index d55b7d8..0000000 --- a/eigen/bench/perf_monitoring/llt.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include "gemm_common.h" -#include - -EIGEN_DONT_INLINE -void llt(const Mat &A, const Mat &B, Mat &C) -{ - C = A; - C.diagonal().array() += 1000; - Eigen::internal::llt_inplace::blocked(C); -} - -int main(int argc, char **argv) -{ - return main_gemm(argc, argv, llt); -} diff --git a/eigen/bench/perf_monitoring/make_plot.sh b/eigen/bench/perf_monitoring/make_plot.sh deleted file mode 100644 index ca9fa96..0000000 --- a/eigen/bench/perf_monitoring/make_plot.sh +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/bash - -# base name of the bench -# it reads $1.out -# and generates $1.pdf -WHAT=$1 -bench=$2 -settings_file=$3 - -header="rev " -while read line -do - if [ ! -z '$line' ]; then - header="$header \"$line\"" - fi -done < $settings_file - -echo $header > $WHAT.out.header -cat $WHAT.out >> $WHAT.out.header - - -echo "set title '$WHAT'" > $WHAT.gnuplot -echo "set key autotitle columnhead outside " >> $WHAT.gnuplot -echo "set xtics rotate 1" >> $WHAT.gnuplot - -echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot -echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot - -col=`cat $settings_file | wc -l` -echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot -echo " " >> $WHAT.gnuplot - -gnuplot -persist < $WHAT.gnuplot - -# generate a png file (thumbnail) -convert -colors 256 -background white -density 300 -resize 300 -quality 0 $WHAT.pdf -background white -flatten $WHAT.png - -# clean -rm $WHAT.out.header $WHAT.gnuplot - - -# generate html/svg graph - -echo " " > $WHAT.html -cat resources/chart_header.html > $WHAT.html -echo 'var customSettings = {"TITLE":"","SUBTITLE":"","XLABEL":"","YLABEL":""};' >> $WHAT.html -# 'data' is an array of datasets (i.e. curves), each of which is an object of the form -# { -# key: , -# color: , -# values: [{ -# r: , -# v: -# }] -# } -echo 'var data = [' >> $WHAT.html - -col=2 -while read line -do - if [ ! -z '$line' ]; then - header="$header \"$line\"" - echo '{"key":"'$line'","values":[' >> $WHAT.html - i=0 - while read line2 - do - if [ ! -z '$line2' ]; then - echo '{"r":'$i',"v":'`echo $line2 | cut -f $col -d ' '`'},' >> $WHAT.html - fi - ((i++)) - done < $WHAT.out - echo ']},' >> $WHAT.html - fi - ((col++)) -done < $settings_file -echo '];' >> $WHAT.html - -echo 'var changesets = [' >> $WHAT.html -while read line2 -do - if [ ! -z '$line2' ]; then - echo '"'`echo $line2 | cut -f 1 -d ' '`'",' >> $WHAT.html - fi -done < $WHAT.out -echo '];' >> $WHAT.html - -echo 'var changesets_count = [' >> $WHAT.html -i=0 -while read line2 -do - if [ ! -z '$line2' ]; then - echo $i ',' >> $WHAT.html - fi - ((i++)) -done < $WHAT.out -echo '];' >> $WHAT.html - -cat resources/chart_footer.html >> $WHAT.html diff --git a/eigen/bench/perf_monitoring/resources/chart_footer.html b/eigen/bench/perf_monitoring/resources/chart_footer.html deleted file mode 100644 index 8acc69f..0000000 --- a/eigen/bench/perf_monitoring/resources/chart_footer.html +++ /dev/null @@ -1,37 +0,0 @@ - /* setup the chart and its options */ - var chart = nv.models.lineChart() - .color(d3.scale.category10().range()) - .margin({left: 75, bottom: 100}) - .forceX([0]).forceY([0]); - - chart.x(function(datum){ return datum.r; }) - .xAxis.options({ - axisLabel: customSettings.XLABEL || 'Changeset', - tickFormat: d3.format('.0f') - }); - chart.xAxis - .tickValues(changesets_count) - .tickFormat(function(d){return changesets[d]}) - .rotateLabels(-90); - - chart.y(function(datum){ return datum.v; }) - .yAxis.options({ - axisLabel: customSettings.YLABEL || 'GFlops'/*, - tickFormat: function(val){ return d3.format('.0f')(val) + ' GFlops'; }*/ - }); - - //chart.useInteractiveGuideline(true); - d3.select('#chart').datum(data).call(chart); - var plot = d3.select('#chart > g'); - - /* setup the title */ - plot.append('text') - .style('font-size', '24px') - .attr('text-anchor', 'middle').attr('x', '50%').attr('y', '20px') - .text(customSettings.TITLE || ''); - - /* ensure the chart is responsive */ - nv.utils.windowResize(chart.update); - - - \ No newline at end of file diff --git a/eigen/bench/perf_monitoring/resources/chart_header.html b/eigen/bench/perf_monitoring/resources/chart_header.html deleted file mode 100644 index bb9ddff..0000000 --- a/eigen/bench/perf_monitoring/resources/chart_header.html +++ /dev/null @@ -1,46 +0,0 @@ - - - - - - - - - - - - -