summaryrefslogtreecommitdiffhomepage
path: root/eigen/unsupported/Eigen/CXX11/src
diff options
context:
space:
mode:
Diffstat (limited to 'eigen/unsupported/Eigen/CXX11/src')
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/README.md1760
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h527
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h299
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h181
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h1012
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h392
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h384
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h361
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h628
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h56
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h1391
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h467
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h1043
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h279
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h1104
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h212
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h313
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h68
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h337
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h81
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h122
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h282
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h236
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h428
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h181
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h633
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h288
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h371
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h651
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h389
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h169
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h109
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h489
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h185
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h33
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h79
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h509
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h725
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h229
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h82
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h253
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h209
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h54
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h323
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h218
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h888
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h397
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h269
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h276
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h781
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h750
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h242
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h429
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h288
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h287
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h264
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h146
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h338
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h82
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h121
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h239
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h204
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h177
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h114
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h181
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h70
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h237
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h272
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h248
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h608
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h293
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h236
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h338
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h669
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h233
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h274
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h210
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h154
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h38
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h22
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h33
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h20
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h542
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h88
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h267
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h311
-rw-r--r--eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h141
87 files changed, 0 insertions, 29919 deletions
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md b/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md
deleted file mode 100644
index da70fa2..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md
+++ /dev/null
@@ -1,1760 +0,0 @@
-# Eigen Tensors {#eigen_tensors}
-
-Tensors are multidimensional arrays of elements. Elements are typically scalars,
-but more complex types such as strings are also supported.
-
-[TOC]
-
-## Tensor Classes
-
-You can manipulate a tensor with one of the following classes. They all are in
-the namespace `::Eigen.`
-
-
-### Class Tensor<data_type, rank>
-
-This is the class to use to create a tensor and allocate memory for it. The
-class is templatized with the tensor datatype, such as float or int, and the
-tensor rank. The rank is the number of dimensions, for example rank 2 is a
-matrix.
-
-Tensors of this class are resizable. For example, if you assign a tensor of a
-different size to a Tensor, that tensor is resized to match its new value.
-
-#### Constructor `Tensor<data_type, rank>(size0, size1, ...)`
-
-Constructor for a Tensor. The constructor must be passed `rank` integers
-indicating the sizes of the instance along each of the the `rank`
-dimensions.
-
- // Create a tensor of rank 3 of sizes 2, 3, 4. This tensor owns
- // memory to hold 24 floating point values (24 = 2 x 3 x 4).
- Tensor<float, 3> t_3d(2, 3, 4);
-
- // Resize t_3d by assigning a tensor of different sizes, but same rank.
- t_3d = Tensor<float, 3>(3, 4, 3);
-
-#### Constructor `Tensor<data_type, rank>(size_array)`
-
-Constructor where the sizes for the constructor are specified as an array of
-values instead of an explicitly list of parameters. The array type to use is
-`Eigen::array<Eigen::Index>`. The array can be constructed automatically
-from an initializer list.
-
- // Create a tensor of strings of rank 2 with sizes 5, 7.
- Tensor<string, 2> t_2d({5, 7});
-
-
-### Class `TensorFixedSize<data_type, Sizes<size0, size1, ...>>`
-
-Class to use for tensors of fixed size, where the size is known at compile
-time. Fixed sized tensors can provide very fast computations because all their
-dimensions are known by the compiler. FixedSize tensors are not resizable.
-
-If the total number of elements in a fixed size tensor is small enough the
-tensor data is held onto the stack and does not cause heap allocation and free.
-
- // Create a 4 x 3 tensor of floats.
- TensorFixedSize<float, Sizes<4, 3>> t_4x3;
-
-### Class `TensorMap<Tensor<data_type, rank>>`
-
-This is the class to use to create a tensor on top of memory allocated and
-owned by another part of your code. It allows to view any piece of allocated
-memory as a Tensor. Instances of this class do not own the memory where the
-data are stored.
-
-A TensorMap is not resizable because it does not own the memory where its data
-are stored.
-
-#### Constructor `TensorMap<Tensor<data_type, rank>>(data, size0, size1, ...)`
-
-Constructor for a Tensor. The constructor must be passed a pointer to the
-storage for the data, and "rank" size attributes. The storage has to be
-large enough to hold all the data.
-
- // Map a tensor of ints on top of stack-allocated storage.
- int storage[128]; // 2 x 4 x 2 x 8 = 128
- TensorMap<Tensor<int, 4>> t_4d(storage, 2, 4, 2, 8);
-
- // The same storage can be viewed as a different tensor.
- // You can also pass the sizes as an array.
- TensorMap<Tensor<int, 2>> t_2d(storage, 16, 8);
-
- // You can also map fixed-size tensors. Here we get a 1d view of
- // the 2d fixed-size tensor.
- TensorFixedSize<float, Sizes<4, 5>> t_4x3;
- TensorMap<Tensor<float, 1>> t_12(t_4x3.data(), 12);
-
-
-#### Class `TensorRef`
-
-See Assigning to a TensorRef below.
-
-## Accessing Tensor Elements
-
-#### `<data_type> tensor(index0, index1...)`
-
-Return the element at position `(index0, index1...)` in tensor
-`tensor`. You must pass as many parameters as the rank of `tensor`.
-The expression can be used as an l-value to set the value of the element at the
-specified position. The value returned is of the datatype of the tensor.
-
- // Set the value of the element at position (0, 1, 0);
- Tensor<float, 3> t_3d(2, 3, 4);
- t_3d(0, 1, 0) = 12.0f;
-
- // Initialize all elements to random values.
- for (int i = 0; i < 2; ++i) {
- for (int j = 0; j < 3; ++j) {
- for (int k = 0; k < 4; ++k) {
- t_3d(i, j, k) = ...some random value...;
- }
- }
- }
-
- // Print elements of a tensor.
- for (int i = 0; i < 2; ++i) {
- LOG(INFO) << t_3d(i, 0, 0);
- }
-
-
-## TensorLayout
-
-The tensor library supports 2 layouts: `ColMajor` (the default) and
-`RowMajor`. Only the default column major layout is currently fully
-supported, and it is therefore not recommended to attempt to use the row major
-layout at the moment.
-
-The layout of a tensor is optionally specified as part of its type. If not
-specified explicitly column major is assumed.
-
- Tensor<float, 3, ColMajor> col_major; // equivalent to Tensor<float, 3>
- TensorMap<Tensor<float, 3, RowMajor> > row_major(data, ...);
-
-All the arguments to an expression must use the same layout. Attempting to mix
-different layouts will result in a compilation error.
-
-It is possible to change the layout of a tensor or an expression using the
-`swap_layout()` method. Note that this will also reverse the order of the
-dimensions.
-
- Tensor<float, 2, ColMajor> col_major(2, 4);
- Tensor<float, 2, RowMajor> row_major(2, 4);
-
- Tensor<float, 2> col_major_result = col_major; // ok, layouts match
- Tensor<float, 2> col_major_result = row_major; // will not compile
-
- // Simple layout swap
- col_major_result = row_major.swap_layout();
- eigen_assert(col_major_result.dimension(0) == 4);
- eigen_assert(col_major_result.dimension(1) == 2);
-
- // Swap the layout and preserve the order of the dimensions
- array<int, 2> shuffle(1, 0);
- col_major_result = row_major.swap_layout().shuffle(shuffle);
- eigen_assert(col_major_result.dimension(0) == 2);
- eigen_assert(col_major_result.dimension(1) == 4);
-
-
-## Tensor Operations
-
-The Eigen Tensor library provides a vast library of operations on Tensors:
-numerical operations such as addition and multiplication, geometry operations
-such as slicing and shuffling, etc. These operations are available as methods
-of the Tensor classes, and in some cases as operator overloads. For example
-the following code computes the elementwise addition of two tensors:
-
- Tensor<float, 3> t1(2, 3, 4);
- ...set some values in t1...
- Tensor<float, 3> t2(2, 3, 4);
- ...set some values in t2...
- // Set t3 to the element wise sum of t1 and t2
- Tensor<float, 3> t3 = t1 + t2;
-
-While the code above looks easy enough, it is important to understand that the
-expression `t1 + t2` is not actually adding the values of the tensors. The
-expression instead constructs a "tensor operator" object of the class
-TensorCwiseBinaryOp<scalar_sum>, which has references to the tensors
-`t1` and `t2`. This is a small C++ object that knows how to add
-`t1` and `t2`. It is only when the value of the expression is assigned
-to the tensor `t3` that the addition is actually performed. Technically,
-this happens through the overloading of `operator=()` in the Tensor class.
-
-This mechanism for computing tensor expressions allows for lazy evaluation and
-optimizations which are what make the tensor library very fast.
-
-Of course, the tensor operators do nest, and the expression `t1 + t2 * 0.3f`
-is actually represented with the (approximate) tree of operators:
-
- TensorCwiseBinaryOp<scalar_sum>(t1, TensorCwiseUnaryOp<scalar_mul>(t2, 0.3f))
-
-
-### Tensor Operations and C++ "auto"
-
-Because Tensor operations create tensor operators, the C++ `auto` keyword
-does not have its intuitive meaning. Consider these 2 lines of code:
-
- Tensor<float, 3> t3 = t1 + t2;
- auto t4 = t1 + t2;
-
-In the first line we allocate the tensor `t3` and it will contain the
-result of the addition of `t1` and `t2`. In the second line, `t4`
-is actually the tree of tensor operators that will compute the addition of
-`t1` and `t2`. In fact, `t4` is *not* a tensor and you cannot get
-the values of its elements:
-
- Tensor<float, 3> t3 = t1 + t2;
- cout << t3(0, 0, 0); // OK prints the value of t1(0, 0, 0) + t2(0, 0, 0)
-
- auto t4 = t1 + t2;
- cout << t4(0, 0, 0); // Compilation error!
-
-When you use `auto` you do not get a Tensor as a result but instead a
-non-evaluated expression. So only use `auto` to delay evaluation.
-
-Unfortunately, there is no single underlying concrete type for holding
-non-evaluated expressions, hence you have to use auto in the case when you do
-want to hold non-evaluated expressions.
-
-When you need the results of set of tensor computations you have to assign the
-result to a Tensor that will be capable of holding onto them. This can be
-either a normal Tensor, a fixed size Tensor, or a TensorMap on an existing
-piece of memory. All the following will work:
-
- auto t4 = t1 + t2;
-
- Tensor<float, 3> result = t4; // Could also be: result(t4);
- cout << result(0, 0, 0);
-
- TensorMap<float, 4> result(<a float* with enough space>, <size0>, ...) = t4;
- cout << result(0, 0, 0);
-
- TensorFixedSize<float, Sizes<size0, ...>> result = t4;
- cout << result(0, 0, 0);
-
-Until you need the results, you can keep the operation around, and even reuse
-it for additional operations. As long as you keep the expression as an
-operation, no computation is performed.
-
- // One way to compute exp((t1 + t2) * 0.2f);
- auto t3 = t1 + t2;
- auto t4 = t3 * 0.2f;
- auto t5 = t4.exp();
- Tensor<float, 3> result = t5;
-
- // Another way, exactly as efficient as the previous one:
- Tensor<float, 3> result = ((t1 + t2) * 0.2f).exp();
-
-### Controlling When Expression are Evaluated
-
-There are several ways to control when expressions are evaluated:
-
-* Assignment to a Tensor, TensorFixedSize, or TensorMap.
-* Use of the eval() method.
-* Assignment to a TensorRef.
-
-#### Assigning to a Tensor, TensorFixedSize, or TensorMap.
-
-The most common way to evaluate an expression is to assign it to a Tensor. In
-the example below, the `auto` declarations make the intermediate values
-"Operations", not Tensors, and do not cause the expressions to be evaluated.
-The assignment to the Tensor `result` causes the evaluation of all the
-operations.
-
- auto t3 = t1 + t2; // t3 is an Operation.
- auto t4 = t3 * 0.2f; // t4 is an Operation.
- auto t5 = t4.exp(); // t5 is an Operation.
- Tensor<float, 3> result = t5; // The operations are evaluated.
-
-If you know the ranks and sizes of the Operation value you can assign the
-Operation to a TensorFixedSize instead of a Tensor, which is a bit more
-efficient.
-
- // We know that the result is a 4x4x2 tensor!
- TensorFixedSize<float, Sizes<4, 4, 2>> result = t5;
-
-Simiarly, assigning an expression to a TensorMap causes its evaluation. Like
-tensors of type TensorFixedSize, TensorMaps cannot be resized so they have to
-have the rank and sizes of the expression that are assigned to them.
-
-#### Calling `eval()`.
-
-When you compute large composite expressions, you sometimes want to tell Eigen
-that an intermediate value in the expression tree is worth evaluating ahead of
-time. This is done by inserting a call to the `eval()` method of the
-expression Operation.
-
- // The previous example could have been written:
- Tensor<float, 3> result = ((t1 + t2) * 0.2f).exp();
-
- // If you want to compute (t1 + t2) once ahead of time you can write:
- Tensor<float, 3> result = ((t1 + t2).eval() * 0.2f).exp();
-
-Semantically, calling `eval()` is equivalent to materializing the value of
-the expression in a temporary Tensor of the right size. The code above in
-effect does:
-
- // .eval() knows the size!
- TensorFixedSize<float, Sizes<4, 4, 2>> tmp = t1 + t2;
- Tensor<float, 3> result = (tmp * 0.2f).exp();
-
-Note that the return value of `eval()` is itself an Operation, so the
-following code does not do what you may think:
-
- // Here t3 is an evaluation Operation. t3 has not been evaluated yet.
- auto t3 = (t1 + t2).eval();
-
- // You can use t3 in another expression. Still no evaluation.
- auto t4 = (t3 * 0.2f).exp();
-
- // The value is evaluated when you assign the Operation to a Tensor, using
- // an intermediate tensor to represent t3.x
- Tensor<float, 3> result = t4;
-
-While in the examples above calling `eval()` does not make a difference in
-performance, in other cases it can make a huge difference. In the expression
-below the `broadcast()` expression causes the `X.maximum()` expression
-to be evaluated many times:
-
- Tensor<...> X ...;
- Tensor<...> Y = ((X - X.maximum(depth_dim).reshape(dims2d).broadcast(bcast))
- * beta).exp();
-
-Inserting a call to `eval()` between the `maximum()` and
-`reshape()` calls guarantees that maximum() is only computed once and
-greatly speeds-up execution:
-
- Tensor<...> Y =
- ((X - X.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast))
- * beta).exp();
-
-In the other example below, the tensor `Y` is both used in the expression
-and its assignment. This is an aliasing problem and if the evaluation is not
-done in the right order Y will be updated incrementally during the evaluation
-resulting in bogus results:
-
- Tensor<...> Y ...;
- Y = Y / (Y.sum(depth_dim).reshape(dims2d).broadcast(bcast));
-
-Inserting a call to `eval()` between the `sum()` and `reshape()`
-expressions ensures that the sum is computed before any updates to `Y` are
-done.
-
- Y = Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast));
-
-Note that an eval around the full right hand side expression is not needed
-because the generated has to compute the i-th value of the right hand side
-before assigning it to the left hand side.
-
-However, if you were assigning the expression value to a shuffle of `Y`
-then you would need to force an eval for correctness by adding an `eval()`
-call for the right hand side:
-
- Y.shuffle(...) =
- (Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast))).eval();
-
-
-#### Assigning to a `TensorRef`.
-
-If you need to access only a few elements from the value of an expression you
-can avoid materializing the value in a full tensor by using a TensorRef.
-
-A TensorRef is a small wrapper class for any Eigen Operation. It provides
-overloads for the `()` operator that let you access individual values in
-the expression. TensorRef is convenient, because the Operation themselves do
-not provide a way to access individual elements.
-
- // Create a TensorRef for the expression. The expression is not
- // evaluated yet.
- TensorRef<Tensor<float, 3> > ref = ((t1 + t2) * 0.2f).exp();
-
- // Use "ref" to access individual elements. The expression is evaluated
- // on the fly.
- float at_0 = ref(0, 0, 0);
- cout << ref(0, 1, 0);
-
-Only use TensorRef when you need a subset of the values of the expression.
-TensorRef only computes the values you access. However note that if you are
-going to access all the values it will be much faster to materialize the
-results in a Tensor first.
-
-In some cases, if the full Tensor result would be very large, you may save
-memory by accessing it as a TensorRef. But not always. So don't count on it.
-
-
-### Controlling How Expressions Are Evaluated
-
-The tensor library provides several implementations of the various operations
-such as contractions and convolutions. The implementations are optimized for
-different environments: single threaded on CPU, multi threaded on CPU, or on a
-GPU using cuda. Additional implementations may be added later.
-
-You can choose which implementation to use with the `device()` call. If
-you do not choose an implementation explicitly the default implementation that
-uses a single thread on the CPU is used.
-
-The default implementation has been optimized for recent Intel CPUs, taking
-advantage of SSE, AVX, and FMA instructions. Work is ongoing to tune the
-library on ARM CPUs. Note that you need to pass compiler-dependent flags
-to enable the use of SSE, AVX, and other instructions.
-
-For example, the following code adds two tensors using the default
-single-threaded CPU implementation:
-
- Tensor<float, 2> a(30, 40);
- Tensor<float, 2> b(30, 40);
- Tensor<float, 2> c = a + b;
-
-To choose a different implementation you have to insert a `device()` call
-before the assignment of the result. For technical C++ reasons this requires
-that the Tensor for the result be declared on its own. This means that you
-have to know the size of the result.
-
- Eigen::Tensor<float, 2> c(30, 40);
- c.device(...) = a + b;
-
-The call to `device()` must be the last call on the left of the operator=.
-
-You must pass to the `device()` call an Eigen device object. There are
-presently three devices you can use: DefaultDevice, ThreadPoolDevice and
-GpuDevice.
-
-
-#### Evaluating With the DefaultDevice
-
-This is exactly the same as not inserting a `device()` call.
-
- DefaultDevice my_device;
- c.device(my_device) = a + b;
-
-#### Evaluating with a Thread Pool
-
- // Create the Eigen ThreadPoolDevice.
- Eigen::ThreadPoolDevice my_device(4 /* number of threads to use */);
-
- // Now just use the device when evaluating expressions.
- Eigen::Tensor<float, 2> c(30, 50);
- c.device(my_device) = a.contract(b, dot_product_dims);
-
-
-#### Evaluating On GPU
-
-This is presently a bit more complicated than just using a thread pool device.
-You need to create a GPU device but you also need to explicitly allocate the
-memory for tensors with cuda.
-
-
-## API Reference
-
-### Datatypes
-
-In the documentation of the tensor methods and Operation we mention datatypes
-that are tensor-type specific:
-
-#### `<Tensor-Type>::``Dimensions`
-
-Acts like an array of ints. Has an `int size` attribute, and can be
-indexed like an array to access individual values. Used to represent the
-dimensions of a tensor. See `dimensions()`.
-
-#### `<Tensor-Type>::``Index`
-
-Acts like an `int`. Used for indexing tensors along their dimensions. See
-`operator()`, `dimension()`, and `size()`.
-
-#### `<Tensor-Type>::``Scalar`
-
-Represents the datatype of individual tensor elements. For example, for a
-`Tensor<float>`, `Scalar` is the type `float`. See
-`setConstant()`.
-
-#### `<Operation>`
-
-We use this pseudo type to indicate that a tensor Operation is returned by a
-method. We indicate in the text the type and dimensions of the tensor that the
-Operation returns after evaluation.
-
-The Operation will have to be evaluated, for example by assigning it to a
-tensor, before you can access the values of the resulting tensor. You can also
-access the values through a TensorRef.
-
-
-## Built-in Tensor Methods
-
-These are usual C++ methods that act on tensors immediately. They are not
-Operations which provide delayed evaluation of their results. Unless specified
-otherwise, all the methods listed below are available on all tensor classes:
-Tensor, TensorFixedSize, and TensorMap.
-
-## Metadata
-
-### `int NumDimensions`
-
-Constant value indicating the number of dimensions of a Tensor. This is also
-known as the tensor "rank".
-
- Eigen::Tensor<float, 2> a(3, 4);
- cout << "Dims " << a.NumDimensions;
- => Dims 2
-
-### `Dimensions dimensions()`
-
-Returns an array-like object representing the dimensions of the tensor.
-The actual type of the `dimensions()` result is `<Tensor-Type>::``Dimensions`.
-
- Eigen::Tensor<float, 2> a(3, 4);
- const Eigen::Tensor<float, 2>::Dimensions& d = a.dimensions();
- cout << "Dim size: " << d.size << ", dim 0: " << d[0]
- << ", dim 1: " << d[1];
- => Dim size: 2, dim 0: 3, dim 1: 4
-
-If you use a C++11 compiler, you can use `auto` to simplify the code:
-
- const auto& d = a.dimensions();
- cout << "Dim size: " << d.size << ", dim 0: " << d[0]
- << ", dim 1: " << d[1];
- => Dim size: 2, dim 0: 3, dim 1: 4
-
-### `Index dimension(Index n)`
-
-Returns the n-th dimension of the tensor. The actual type of the
-`dimension()` result is `<Tensor-Type>::``Index`, but you can
-always use it like an int.
-
- Eigen::Tensor<float, 2> a(3, 4);
- int dim1 = a.dimension(1);
- cout << "Dim 1: " << dim1;
- => Dim 1: 4
-
-### `Index size()`
-
-Returns the total number of elements in the tensor. This is the product of all
-the tensor dimensions. The actual type of the `size()` result is
-`<Tensor-Type>::``Index`, but you can always use it like an int.
-
- Eigen::Tensor<float, 2> a(3, 4);
- cout << "Size: " << a.size();
- => Size: 12
-
-
-### Getting Dimensions From An Operation
-
-A few operations provide `dimensions()` directly,
-e.g. `TensorReslicingOp`. Most operations defer calculating dimensions
-until the operation is being evaluated. If you need access to the dimensions
-of a deferred operation, you can wrap it in a TensorRef (see Assigning to a
-TensorRef above), which provides `dimensions()` and `dimension()` as
-above.
-
-TensorRef can also wrap the plain Tensor types, so this is a useful idiom in
-templated contexts where the underlying object could be either a raw Tensor
-or some deferred operation (e.g. a slice of a Tensor). In this case, the
-template code can wrap the object in a TensorRef and reason about its
-dimensionality while remaining agnostic to the underlying type.
-
-
-## Constructors
-
-### Tensor
-
-Creates a tensor of the specified size. The number of arguments must be equal
-to the rank of the tensor. The content of the tensor is not initialized.
-
- Eigen::Tensor<float, 2> a(3, 4);
- cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl;
- => NumRows: 3 NumCols: 4
-
-### TensorFixedSize
-
-Creates a tensor of the specified size. The number of arguments in the Sizes<>
-template parameter determines the rank of the tensor. The content of the tensor
-is not initialized.
-
- Eigen::TensorFixedSize<float, Sizes<3, 4>> a;
- cout << "Rank: " << a.rank() << endl;
- => Rank: 2
- cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl;
- => NumRows: 3 NumCols: 4
-
-### TensorMap
-
-Creates a tensor mapping an existing array of data. The data must not be freed
-until the TensorMap is discarded, and the size of the data must be large enough
-to accommodate the coefficients of the tensor.
-
- float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
- Eigen::TensorMap<Tensor<float, 2>> a(data, 3, 4);
- cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl;
- => NumRows: 3 NumCols: 4
- cout << "a(1, 2): " << a(1, 2) << endl;
- => a(1, 2): 7
-
-
-## Contents Initialization
-
-When a new Tensor or a new TensorFixedSize are created, memory is allocated to
-hold all the tensor elements, but the memory is not initialized. Similarly,
-when a new TensorMap is created on top of non-initialized memory the memory its
-contents are not initialized.
-
-You can use one of the methods below to initialize the tensor memory. These
-have an immediate effect on the tensor and return the tensor itself as a
-result. These are not tensor Operations which delay evaluation.
-
-### `<Tensor-Type> setConstant(const Scalar& val)`
-
-Sets all elements of the tensor to the constant value `val`. `Scalar`
-is the type of data stored in the tensor. You can pass any value that is
-convertible to that type.
-
-Returns the tensor itself in case you want to chain another call.
-
- a.setConstant(12.3f);
- cout << "Constant: " << endl << a << endl << endl;
- =>
- Constant:
- 12.3 12.3 12.3 12.3
- 12.3 12.3 12.3 12.3
- 12.3 12.3 12.3 12.3
-
-Note that `setConstant()` can be used on any tensor where the element type
-has a copy constructor and an `operator=()`:
-
- Eigen::Tensor<string, 2> a(2, 3);
- a.setConstant("yolo");
- cout << "String tensor: " << endl << a << endl << endl;
- =>
- String tensor:
- yolo yolo yolo
- yolo yolo yolo
-
-
-### `<Tensor-Type> setZero()`
-
-Fills the tensor with zeros. Equivalent to `setConstant(Scalar(0))`.
-Returns the tensor itself in case you want to chain another call.
-
- a.setZero();
- cout << "Zeros: " << endl << a << endl << endl;
- =>
- Zeros:
- 0 0 0 0
- 0 0 0 0
- 0 0 0 0
-
-
-### `<Tensor-Type> setValues({..initializer_list})`
-
-Fills the tensor with explicit values specified in a std::initializer_list.
-The type of the initializer list depends on the type and rank of the tensor.
-
-If the tensor has rank N, the initializer list must be nested N times. The
-most deeply nested lists must contains P scalars of the Tensor type where P is
-the size of the last dimension of the Tensor.
-
-For example, for a `TensorFixedSize<float, 2, 3>` the initializer list must
-contains 2 lists of 3 floats each.
-
-`setValues()` returns the tensor itself in case you want to chain another
-call.
-
- Eigen::Tensor<float, 2> a(2, 3);
- a.setValues({{0.0f, 1.0f, 2.0f}, {3.0f, 4.0f, 5.0f}});
- cout << "a" << endl << a << endl << endl;
- =>
- a
- 0 1 2
- 3 4 5
-
-If a list is too short, the corresponding elements of the tensor will not be
-changed. This is valid at each level of nesting. For example the following
-code only sets the values of the first row of the tensor.
-
- Eigen::Tensor<int, 2> a(2, 3);
- a.setConstant(1000);
- a.setValues({{10, 20, 30}});
- cout << "a" << endl << a << endl << endl;
- =>
- a
- 10 20 30
- 1000 1000 1000
-
-### `<Tensor-Type> setRandom()`
-
-Fills the tensor with random values. Returns the tensor itself in case you
-want to chain another call.
-
- a.setRandom();
- cout << "Random: " << endl << a << endl << endl;
- =>
- Random:
- 0.680375 0.59688 -0.329554 0.10794
- -0.211234 0.823295 0.536459 -0.0452059
- 0.566198 -0.604897 -0.444451 0.257742
-
-You can customize `setRandom()` by providing your own random number
-generator as a template argument:
-
- a.setRandom<MyRandomGenerator>();
-
-Here, `MyRandomGenerator` must be a struct with the following member
-functions, where Scalar and Index are the same as `<Tensor-Type>::``Scalar`
-and `<Tensor-Type>::``Index`.
-
-See `struct UniformRandomGenerator` in TensorFunctors.h for an example.
-
- // Custom number generator for use with setRandom().
- struct MyRandomGenerator {
- // Default and copy constructors. Both are needed
- MyRandomGenerator() { }
- MyRandomGenerator(const MyRandomGenerator& ) { }
-
- // Return a random value to be used. "element_location" is the
- // location of the entry to set in the tensor, it can typically
- // be ignored.
- Scalar operator()(Eigen::DenseIndex element_location,
- Eigen::DenseIndex /*unused*/ = 0) const {
- return <randomly generated value of type T>;
- }
-
- // Same as above but generates several numbers at a time.
- typename internal::packet_traits<Scalar>::type packetOp(
- Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const {
- return <a packet of randomly generated values>;
- }
- };
-
-You can also use one of the 2 random number generators that are part of the
-tensor library:
-* UniformRandomGenerator
-* NormalRandomGenerator
-
-
-## Data Access
-
-The Tensor, TensorFixedSize, and TensorRef classes provide the following
-accessors to access the tensor coefficients:
-
- const Scalar& operator()(const array<Index, NumIndices>& indices)
- const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
- Scalar& operator()(const array<Index, NumIndices>& indices)
- Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
-
-The number of indices must be equal to the rank of the tensor. Moreover, these
-accessors are not available on tensor expressions. In order to access the
-values of a tensor expression, the expression must either be evaluated or
-wrapped in a TensorRef.
-
-
-### `Scalar* data()` and `const Scalar* data() const`
-
-Returns a pointer to the storage for the tensor. The pointer is const if the
-tensor was const. This allows direct access to the data. The layout of the
-data depends on the tensor layout: RowMajor or ColMajor.
-
-This access is usually only needed for special cases, for example when mixing
-Eigen Tensor code with other libraries.
-
-Scalar is the type of data stored in the tensor.
-
- Eigen::Tensor<float, 2> a(3, 4);
- float* a_data = a.data();
- a_data[0] = 123.45f;
- cout << "a(0, 0): " << a(0, 0);
- => a(0, 0): 123.45
-
-
-## Tensor Operations
-
-All the methods documented below return non evaluated tensor `Operations`.
-These can be chained: you can apply another Tensor Operation to the value
-returned by the method.
-
-The chain of Operation is evaluated lazily, typically when it is assigned to a
-tensor. See "Controlling when Expression are Evaluated" for more details about
-their evaluation.
-
-### `<Operation> constant(const Scalar& val)`
-
-Returns a tensor of the same type and dimensions as the original tensor but
-where all elements have the value `val`.
-
-This is useful, for example, when you want to add or subtract a constant from a
-tensor, or multiply every element of a tensor by a scalar.
-
- Eigen::Tensor<float, 2> a(2, 3);
- a.setConstant(1.0f);
- Eigen::Tensor<float, 2> b = a + a.constant(2.0f);
- Eigen::Tensor<float, 2> c = b * b.constant(0.2f);
- cout << "a" << endl << a << endl << endl;
- cout << "b" << endl << b << endl << endl;
- cout << "c" << endl << c << endl << endl;
- =>
- a
- 1 1 1
- 1 1 1
-
- b
- 3 3 3
- 3 3 3
-
- c
- 0.6 0.6 0.6
- 0.6 0.6 0.6
-
-### `<Operation> random()`
-
-Returns a tensor of the same type and dimensions as the current tensor
-but where all elements have random values.
-
-This is for example useful to add random values to an existing tensor.
-The generation of random values can be customized in the same manner
-as for `setRandom()`.
-
- Eigen::Tensor<float, 2> a(2, 3);
- a.setConstant(1.0f);
- Eigen::Tensor<float, 2> b = a + a.random();
- cout << "a" << endl << a << endl << endl;
- cout << "b" << endl << b << endl << endl;
- =>
- a
- 1 1 1
- 1 1 1
-
- b
- 1.68038 1.5662 1.82329
- 0.788766 1.59688 0.395103
-
-
-## Unary Element Wise Operations
-
-All these operations take a single input tensor as argument and return a tensor
-of the same type and dimensions as the tensor to which they are applied. The
-requested operations are applied to each element independently.
-
-### `<Operation> operator-()`
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the opposite values of the original tensor.
-
- Eigen::Tensor<float, 2> a(2, 3);
- a.setConstant(1.0f);
- Eigen::Tensor<float, 2> b = -a;
- cout << "a" << endl << a << endl << endl;
- cout << "b" << endl << b << endl << endl;
- =>
- a
- 1 1 1
- 1 1 1
-
- b
- -1 -1 -1
- -1 -1 -1
-
-### `<Operation> sqrt()`
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the square roots of the original tensor.
-
-### `<Operation> rsqrt()`
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the inverse square roots of the original tensor.
-
-### `<Operation> square()`
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the squares of the original tensor values.
-
-### `<Operation> inverse()`
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the inverse of the original tensor values.
-
-### `<Operation> exp()`
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the exponential of the original tensor.
-
-### `<Operation> log()`
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the natural logarithms of the original tensor.
-
-### `<Operation> abs()`
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the absolute values of the original tensor.
-
-### `<Operation> pow(Scalar exponent)`
-
-Returns a tensor of the same type and dimensions as the original tensor
-containing the coefficients of the original tensor to the power of the
-exponent.
-
-The type of the exponent, Scalar, is always the same as the type of the
-tensor coefficients. For example, only integer exponents can be used in
-conjuntion with tensors of integer values.
-
-You can use cast() to lift this restriction. For example this computes
-cubic roots of an int Tensor:
-
- Eigen::Tensor<int, 2> a(2, 3);
- a.setValues({{0, 1, 8}, {27, 64, 125}});
- Eigen::Tensor<double, 2> b = a.cast<double>().pow(1.0 / 3.0);
- cout << "a" << endl << a << endl << endl;
- cout << "b" << endl << b << endl << endl;
- =>
- a
- 0 1 8
- 27 64 125
-
- b
- 0 1 2
- 3 4 5
-
-### `<Operation> operator * (Scalar scale)`
-
-Multiplies all the coefficients of the input tensor by the provided scale.
-
-### `<Operation> cwiseMax(Scalar threshold)`
-TODO
-
-### `<Operation> cwiseMin(Scalar threshold)`
-TODO
-
-### `<Operation> unaryExpr(const CustomUnaryOp& func)`
-TODO
-
-
-## Binary Element Wise Operations
-
-These operations take two input tensors as arguments. The 2 input tensors should
-be of the same type and dimensions. The result is a tensor of the same
-dimensions as the tensors to which they are applied, and unless otherwise
-specified it is also of the same type. The requested operations are applied to
-each pair of elements independently.
-
-### `<Operation> operator+(const OtherDerived& other)`
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise sums of the inputs.
-
-### `<Operation> operator-(const OtherDerived& other)`
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise differences of the inputs.
-
-### `<Operation> operator*(const OtherDerived& other)`
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise products of the inputs.
-
-### `<Operation> operator/(const OtherDerived& other)`
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise quotients of the inputs.
-
-This operator is not supported for integer types.
-
-### `<Operation> cwiseMax(const OtherDerived& other)`
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise maximums of the inputs.
-
-### `<Operation> cwiseMin(const OtherDerived& other)`
-
-Returns a tensor of the same type and dimensions as the input tensors
-containing the coefficient wise mimimums of the inputs.
-
-### `<Operation> Logical operators`
-
-The following logical operators are supported as well:
-
-* operator&&(const OtherDerived& other)
-* operator||(const OtherDerived& other)
-* operator<(const OtherDerived& other)
-* operator<=(const OtherDerived& other)
-* operator>(const OtherDerived& other)
-* operator>=(const OtherDerived& other)
-* operator==(const OtherDerived& other)
-* operator!=(const OtherDerived& other)
-
-They all return a tensor of boolean values.
-
-
-## Selection (select(const ThenDerived& thenTensor, const ElseDerived& elseTensor)
-
-Selection is a coefficient-wise ternary operator that is the tensor equivalent
-to the if-then-else operation.
-
- Tensor<bool, 3> if = ...;
- Tensor<float, 3> then = ...;
- Tensor<float, 3> else = ...;
- Tensor<float, 3> result = if.select(then, else);
-
-The 3 arguments must be of the same dimensions, which will also be the dimension
-of the result. The 'if' tensor must be of type boolean, the 'then' and the
-'else' tensor must be of the same type, which will also be the type of the
-result.
-
-Each coefficient in the result is equal to the corresponding coefficient in the
-'then' tensor if the corresponding value in the 'if' tensor is true. If not, the
-resulting coefficient will come from the 'else' tensor.
-
-
-## Contraction
-
-Tensor *contractions* are a generalization of the matrix product to the
-multidimensional case.
-
- // Create 2 matrices using tensors of rank 2
- Eigen::Tensor<int, 2> a(2, 3);
- a.setValues({{1, 2, 3}, {6, 5, 4}});
- Eigen::Tensor<int, 2> b(3, 2);
- b.setValues({{1, 2}, {4, 5}, {5, 6}});
-
- // Compute the traditional matrix product
- Eigen::array<Eigen::IndexPair<int>, 1> product_dims = { Eigen::IndexPair<int>(1, 0) };
- Eigen::Tensor<int, 2> AB = a.contract(b, product_dims);
-
- // Compute the product of the transpose of the matrices
- Eigen::array<Eigen::IndexPair<int>, 1> transposed_product_dims = { Eigen::IndexPair<int>(0, 1) };
- Eigen::Tensor<int, 2> AtBt = a.contract(b, transposed_product_dims);
-
- // Contraction to scalar value using a double contraction.
- // First coordinate of both tensors are contracted as well as both second coordinates, i.e., this computes the sum of the squares of the elements.
- Eigen::array<Eigen::IndexPair<int>, 2> double_contraction_product_dims = { Eigen::IndexPair<int>(0, 0), Eigen::IndexPair<int>(1, 1) };
- Eigen::Tensor<int, 0> AdoubleContractedA = a.contract(a, double_contraction_product_dims);
-
- // Extracting the scalar value of the tensor contraction for further usage
- int value = AdoubleContractedA(0);
-
-## Reduction Operations
-
-A *Reduction* operation returns a tensor with fewer dimensions than the
-original tensor. The values in the returned tensor are computed by applying a
-*reduction operator* to slices of values from the original tensor. You specify
-the dimensions along which the slices are made.
-
-The Eigen Tensor library provides a set of predefined reduction operators such
-as `maximum()` and `sum()` and lets you define additional operators by
-implementing a few methods from a reductor template.
-
-### Reduction Dimensions
-
-All reduction operations take a single parameter of type
-`<TensorType>::``Dimensions` which can always be specified as an array of
-ints. These are called the "reduction dimensions." The values are the indices
-of the dimensions of the input tensor over which the reduction is done. The
-parameter can have at most as many element as the rank of the input tensor;
-each element must be less than the tensor rank, as it indicates one of the
-dimensions to reduce.
-
-Each dimension of the input tensor should occur at most once in the reduction
-dimensions as the implementation does not remove duplicates.
-
-The order of the values in the reduction dimensions does not affect the
-results, but the code may execute faster if you list the dimensions in
-increasing order.
-
-Example: Reduction along one dimension.
-
- // Create a tensor of 2 dimensions
- Eigen::Tensor<int, 2> a(2, 3);
- a.setValues({{1, 2, 3}, {6, 5, 4}});
- // Reduce it along the second dimension (1)...
- Eigen::array<int, 1> dims({1 /* dimension to reduce */});
- // ...using the "maximum" operator.
- // The result is a tensor with one dimension. The size of
- // that dimension is the same as the first (non-reduced) dimension of a.
- Eigen::Tensor<int, 1> b = a.maximum(dims);
- cout << "a" << endl << a << endl << endl;
- cout << "b" << endl << b << endl << endl;
- =>
- a
- 1 2 3
- 6 5 4
-
- b
- 3
- 6
-
-Example: Reduction along two dimensions.
-
- Eigen::Tensor<float, 3, Eigen::ColMajor> a(2, 3, 4);
- a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f},
- {7.0f, 6.0f, 5.0f, 4.0f},
- {8.0f, 9.0f, 10.0f, 11.0f}},
- {{12.0f, 13.0f, 14.0f, 15.0f},
- {19.0f, 18.0f, 17.0f, 16.0f},
- {20.0f, 21.0f, 22.0f, 23.0f}}});
- // The tensor a has 3 dimensions. We reduce along the
- // first 2, resulting in a tensor with a single dimension
- // of size 4 (the last dimension of a.)
- // Note that we pass the array of reduction dimensions
- // directly to the maximum() call.
- Eigen::Tensor<float, 1, Eigen::ColMajor> b =
- a.maximum(Eigen::array<int, 2>({0, 1}));
- cout << "b" << endl << b << endl << endl;
- =>
- b
- 20
- 21
- 22
- 23
-
-#### Reduction along all dimensions
-
-As a special case, if you pass no parameter to a reduction operation the
-original tensor is reduced along *all* its dimensions. The result is a
-scalar, represented as a zero-dimension tensor.
-
- Eigen::Tensor<float, 3> a(2, 3, 4);
- a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f},
- {7.0f, 6.0f, 5.0f, 4.0f},
- {8.0f, 9.0f, 10.0f, 11.0f}},
- {{12.0f, 13.0f, 14.0f, 15.0f},
- {19.0f, 18.0f, 17.0f, 16.0f},
- {20.0f, 21.0f, 22.0f, 23.0f}}});
- // Reduce along all dimensions using the sum() operator.
- Eigen::Tensor<float, 0> b = a.sum();
- cout << "b" << endl << b << endl << endl;
- =>
- b
- 276
-
-
-### `<Operation> sum(const Dimensions& new_dims)`
-### `<Operation> sum()`
-
-Reduce a tensor using the sum() operator. The resulting values
-are the sum of the reduced values.
-
-### `<Operation> mean(const Dimensions& new_dims)`
-### `<Operation> mean()`
-
-Reduce a tensor using the mean() operator. The resulting values
-are the mean of the reduced values.
-
-### `<Operation> maximum(const Dimensions& new_dims)`
-### `<Operation> maximum()`
-
-Reduce a tensor using the maximum() operator. The resulting values are the
-largest of the reduced values.
-
-### `<Operation> minimum(const Dimensions& new_dims)`
-### `<Operation> minimum()`
-
-Reduce a tensor using the minimum() operator. The resulting values
-are the smallest of the reduced values.
-
-### `<Operation> prod(const Dimensions& new_dims)`
-### `<Operation> prod()`
-
-Reduce a tensor using the prod() operator. The resulting values
-are the product of the reduced values.
-
-### `<Operation> all(const Dimensions& new_dims)`
-### `<Operation> all()`
-Reduce a tensor using the all() operator. Casts tensor to bool and then checks
-whether all elements are true. Runs through all elements rather than
-short-circuiting, so may be significantly inefficient.
-
-### `<Operation> any(const Dimensions& new_dims)`
-### `<Operation> any()`
-Reduce a tensor using the any() operator. Casts tensor to bool and then checks
-whether any element is true. Runs through all elements rather than
-short-circuiting, so may be significantly inefficient.
-
-
-### `<Operation> reduce(const Dimensions& new_dims, const Reducer& reducer)`
-
-Reduce a tensor using a user-defined reduction operator. See `SumReducer`
-in TensorFunctors.h for information on how to implement a reduction operator.
-
-
-## Scan Operations
-
-A *Scan* operation returns a tensor with the same dimensions as the original
-tensor. The operation performs an inclusive scan along the specified
-axis, which means it computes a running total along the axis for a given
-reduction operation.
-If the reduction operation corresponds to summation, then this computes the
-prefix sum of the tensor along the given axis.
-
-Example:
-dd a comment to this line
-
- // Create a tensor of 2 dimensions
- Eigen::Tensor<int, 2> a(2, 3);
- a.setValues({{1, 2, 3}, {4, 5, 6}});
- // Scan it along the second dimension (1) using summation
- Eigen::Tensor<int, 2> b = a.cumsum(1);
- // The result is a tensor with the same size as the input
- cout << "a" << endl << a << endl << endl;
- cout << "b" << endl << b << endl << endl;
- =>
- a
- 1 2 3
- 4 5 6
-
- b
- 1 3 6
- 4 9 15
-
-### `<Operation> cumsum(const Index& axis)`
-
-Perform a scan by summing consecutive entries.
-
-### `<Operation> cumprod(const Index& axis)`
-
-Perform a scan by multiplying consecutive entries.
-
-
-## Convolutions
-
-### `<Operation> convolve(const Kernel& kernel, const Dimensions& dims)`
-
-Returns a tensor that is the output of the convolution of the input tensor with the kernel,
-along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor
-which were part of the convolution will be reduced by the formula:
-output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size).
-The dimension sizes for dimensions that were not part of the convolution will remain the same.
-Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the
-convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is
-for the last dimension).
-
- // Compute convolution along the second and third dimension.
- Tensor<float, 4, DataLayout> input(3, 3, 7, 11);
- Tensor<float, 2, DataLayout> kernel(2, 2);
- Tensor<float, 4, DataLayout> output(3, 2, 6, 11);
- input.setRandom();
- kernel.setRandom();
-
- Eigen::array<ptrdiff_t, 2> dims({1, 2}); // Specify second and third dimension for convolution.
- output = input.convolve(kernel, dims);
-
- for (int i = 0; i < 3; ++i) {
- for (int j = 0; j < 2; ++j) {
- for (int k = 0; k < 6; ++k) {
- for (int l = 0; l < 11; ++l) {
- const float result = output(i,j,k,l);
- const float expected = input(i,j+0,k+0,l) * kernel(0,0) +
- input(i,j+1,k+0,l) * kernel(1,0) +
- input(i,j+0,k+1,l) * kernel(0,1) +
- input(i,j+1,k+1,l) * kernel(1,1);
- VERIFY_IS_APPROX(result, expected);
- }
- }
- }
- }
-
-
-## Geometrical Operations
-
-These operations return a Tensor with different dimensions than the original
-Tensor. They can be used to access slices of tensors, see them with different
-dimensions, or pad tensors with additional data.
-
-### `<Operation> reshape(const Dimensions& new_dims)`
-
-Returns a view of the input tensor that has been reshaped to the specified
-new dimensions. The argument new_dims is an array of Index values. The
-rank of the resulting tensor is equal to the number of elements in new_dims.
-
-The product of all the sizes in the new dimension array must be equal to
-the number of elements in the input tensor.
-
- // Increase the rank of the input tensor by introducing a new dimension
- // of size 1.
- Tensor<float, 2> input(7, 11);
- array<int, 3> three_dims{{7, 11, 1}};
- Tensor<float, 3> result = input.reshape(three_dims);
-
- // Decrease the rank of the input tensor by merging 2 dimensions;
- array<int, 1> one_dim{{7 * 11}};
- Tensor<float, 1> result = input.reshape(one_dim);
-
-This operation does not move any data in the input tensor, so the resulting
-contents of a reshaped Tensor depend on the data layout of the original Tensor.
-
-For example this is what happens when you `reshape()` a 2D ColMajor tensor
-to one dimension:
-
- Eigen::Tensor<float, 2, Eigen::ColMajor> a(2, 3);
- a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}});
- Eigen::array<Eigen::DenseIndex, 1> one_dim({3 * 2});
- Eigen::Tensor<float, 1, Eigen::ColMajor> b = a.reshape(one_dim);
- cout << "b" << endl << b << endl;
- =>
- b
- 0
- 300
- 100
- 400
- 200
- 500
-
-This is what happens when the 2D Tensor is RowMajor:
-
- Eigen::Tensor<float, 2, Eigen::RowMajor> a(2, 3);
- a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}});
- Eigen::array<Eigen::DenseIndex, 1> one_dim({3 * 2});
- Eigen::Tensor<float, 1, Eigen::RowMajor> b = a.reshape(one_dim);
- cout << "b" << endl << b << endl;
- =>
- b
- 0
- 100
- 200
- 300
- 400
- 500
-
-The reshape operation is a lvalue. In other words, it can be used on the left
-side of the assignment operator.
-
-The previous example can be rewritten as follow:
-
- Eigen::Tensor<float, 2, Eigen::ColMajor> a(2, 3);
- a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}});
- Eigen::array<Eigen::DenseIndex, 2> two_dim({2, 3});
- Eigen::Tensor<float, 1, Eigen::ColMajor> b(6);
- b.reshape(two_dim) = a;
- cout << "b" << endl << b << endl;
- =>
- b
- 0
- 300
- 100
- 400
- 200
- 500
-
-Note that "b" itself was not reshaped but that instead the assignment is done to
-the reshape view of b.
-
-
-### `<Operation> shuffle(const Shuffle& shuffle)`
-
-Returns a copy of the input tensor whose dimensions have been
-reordered according to the specified permutation. The argument shuffle
-is an array of Index values. Its size is the rank of the input
-tensor. It must contain a permutation of 0, 1, ..., rank - 1. The i-th
-dimension of the output tensor equals to the size of the shuffle[i]-th
-dimension of the input tensor. For example:
-
- // Shuffle all dimensions to the left by 1.
- Tensor<float, 3> input(20, 30, 50);
- // ... set some values in input.
- Tensor<float, 3> output = input.shuffle({1, 2, 0})
-
- eigen_assert(output.dimension(0) == 30);
- eigen_assert(output.dimension(1) == 50);
- eigen_assert(output.dimension(2) == 20);
-
-Indices into the output tensor are shuffled accordingly to formulate
-indices into the input tensor. For example, one can assert in the above
-code snippet that:
-
- eigen_assert(output(3, 7, 11) == input(11, 3, 7));
-
-In general, one can assert that
-
- eigen_assert(output(..., indices[shuffle[i]], ...) ==
- input(..., indices[i], ...))
-
-The shuffle operation results in a lvalue, which means that it can be assigned
-to. In other words, it can be used on the left side of the assignment operator.
-
-Let's rewrite the previous example to take advantage of this feature:
-
- // Shuffle all dimensions to the left by 1.
- Tensor<float, 3> input(20, 30, 50);
- // ... set some values in input.
- Tensor<float, 3> output(30, 50, 20);
- output.shuffle({2, 0, 1}) = input;
-
-
-### `<Operation> stride(const Strides& strides)`
-
-Returns a view of the input tensor that strides (skips stride-1
-elements) along each of the dimensions. The argument strides is an
-array of Index values. The dimensions of the resulting tensor are
-ceil(input_dimensions[i] / strides[i]).
-
-For example this is what happens when you `stride()` a 2D tensor:
-
- Eigen::Tensor<int, 2> a(4, 3);
- a.setValues({{0, 100, 200}, {300, 400, 500}, {600, 700, 800}, {900, 1000, 1100}});
- Eigen::array<Eigen::DenseIndex, 2> strides({3, 2});
- Eigen::Tensor<int, 2> b = a.stride(strides);
- cout << "b" << endl << b << endl;
- =>
- b
- 0 200
- 900 1100
-
-It is possible to assign a tensor to a stride:
- Tensor<float, 3> input(20, 30, 50);
- // ... set some values in input.
- Tensor<float, 3> output(40, 90, 200);
- output.stride({2, 3, 4}) = input;
-
-
-### `<Operation> slice(const StartIndices& offsets, const Sizes& extents)`
-
-Returns a sub-tensor of the given tensor. For each dimension i, the slice is
-made of the coefficients stored between offset[i] and offset[i] + extents[i] in
-the input tensor.
-
- Eigen::Tensor<int, 2> a(4, 3);
- a.setValues({{0, 100, 200}, {300, 400, 500},
- {600, 700, 800}, {900, 1000, 1100}});
- Eigen::array<int, 2> offsets = {1, 0};
- Eigen::array<int, 2> extents = {2, 2};
- Eigen::Tensor<int, 1> slice = a.slice(offsets, extents);
- cout << "a" << endl << a << endl;
- =>
- a
- 0 100 200
- 300 400 500
- 600 700 800
- 900 1000 1100
- cout << "slice" << endl << slice << endl;
- =>
- slice
- 300 400
- 600 700
-
-
-### `<Operation> chip(const Index offset, const Index dim)`
-
-A chip is a special kind of slice. It is the subtensor at the given offset in
-the dimension dim. The returned tensor has one fewer dimension than the input
-tensor: the dimension dim is removed.
-
-For example, a matrix chip would be either a row or a column of the input
-matrix.
-
- Eigen::Tensor<int, 2> a(4, 3);
- a.setValues({{0, 100, 200}, {300, 400, 500},
- {600, 700, 800}, {900, 1000, 1100}});
- Eigen::Tensor<int, 1> row_3 = a.chip(2, 0);
- Eigen::Tensor<int, 1> col_2 = a.chip(1, 1);
- cout << "a" << endl << a << endl;
- =>
- a
- 0 100 200
- 300 400 500
- 600 700 800
- 900 1000 1100
- cout << "row_3" << endl << row_3 << endl;
- =>
- row_3
- 600 700 800
- cout << "col_2" << endl << col_2 << endl;
- =>
- col_2
- 100 400 700 1000
-
-It is possible to assign values to a tensor chip since the chip operation is a
-lvalue. For example:
-
- Eigen::Tensor<int, 1> a(3);
- a.setValues({{100, 200, 300}});
- Eigen::Tensor<int, 2> b(2, 3);
- b.setZero();
- b.chip(0, 0) = a;
- cout << "a" << endl << a << endl;
- =>
- a
- 100
- 200
- 300
- cout << "b" << endl << b << endl;
- =>
- b
- 100 200 300
- 0 0 0
-
-
-### `<Operation> reverse(const ReverseDimensions& reverse)`
-
-Returns a view of the input tensor that reverses the order of the coefficients
-along a subset of the dimensions. The argument reverse is an array of boolean
-values that indicates whether or not the order of the coefficients should be
-reversed along each of the dimensions. This operation preserves the dimensions
-of the input tensor.
-
-For example this is what happens when you `reverse()` the first dimension
-of a 2D tensor:
-
- Eigen::Tensor<int, 2> a(4, 3);
- a.setValues({{0, 100, 200}, {300, 400, 500},
- {600, 700, 800}, {900, 1000, 1100}});
- Eigen::array<bool, 2> reverse({true, false});
- Eigen::Tensor<int, 2> b = a.reverse(reverse);
- cout << "a" << endl << a << endl << "b" << endl << b << endl;
- =>
- a
- 0 100 200
- 300 400 500
- 600 700 800
- 900 1000 1100
- b
- 900 1000 1100
- 600 700 800
- 300 400 500
- 0 100 200
-
-
-### `<Operation> broadcast(const Broadcast& broadcast)`
-
-Returns a view of the input tensor in which the input is replicated one to many
-times.
-The broadcast argument specifies how many copies of the input tensor need to be
-made in each of the dimensions.
-
- Eigen::Tensor<int, 2> a(2, 3);
- a.setValues({{0, 100, 200}, {300, 400, 500}});
- Eigen::array<int, 2> bcast({3, 2});
- Eigen::Tensor<int, 2> b = a.broadcast(bcast);
- cout << "a" << endl << a << endl << "b" << endl << b << endl;
- =>
- a
- 0 100 200
- 300 400 500
- b
- 0 100 200 0 100 200
- 300 400 500 300 400 500
- 0 100 200 0 100 200
- 300 400 500 300 400 500
- 0 100 200 0 100 200
- 300 400 500 300 400 500
-
-### `<Operation> concatenate(const OtherDerived& other, Axis axis)`
-
-TODO
-
-### `<Operation> pad(const PaddingDimensions& padding)`
-
-Returns a view of the input tensor in which the input is padded with zeros.
-
- Eigen::Tensor<int, 2> a(2, 3);
- a.setValues({{0, 100, 200}, {300, 400, 500}});
- Eigen::array<pair<int, int>, 2> paddings;
- paddings[0] = make_pair(0, 1);
- paddings[1] = make_pair(2, 3);
- Eigen::Tensor<int, 2> b = a.pad(paddings);
- cout << "a" << endl << a << endl << "b" << endl << b << endl;
- =>
- a
- 0 100 200
- 300 400 500
- b
- 0 0 0 0
- 0 0 0 0
- 0 100 200 0
- 300 400 500 0
- 0 0 0 0
- 0 0 0 0
- 0 0 0 0
-
-
-### `<Operation> extract_patches(const PatchDims& patch_dims)`
-
-Returns a tensor of coefficient patches extracted from the input tensor, where
-each patch is of dimension specified by 'patch_dims'. The returned tensor has
-one greater dimension than the input tensor, which is used to index each patch.
-The patch index in the output tensor depends on the data layout of the input
-tensor: the patch index is the last dimension ColMajor layout, and the first
-dimension in RowMajor layout.
-
-For example, given the following input tensor:
-
- Eigen::Tensor<float, 2, DataLayout> tensor(3,4);
- tensor.setValues({{0.0f, 1.0f, 2.0f, 3.0f},
- {4.0f, 5.0f, 6.0f, 7.0f},
- {8.0f, 9.0f, 10.0f, 11.0f}});
-
- cout << "tensor: " << endl << tensor << endl;
-=>
-tensor:
- 0 1 2 3
- 4 5 6 7
- 8 9 10 11
-
-Six 2x2 patches can be extracted and indexed using the following code:
-
- Eigen::Tensor<float, 3, DataLayout> patch;
- Eigen::array<ptrdiff_t, 2> patch_dims;
- patch_dims[0] = 2;
- patch_dims[1] = 2;
- patch = tensor.extract_patches(patch_dims);
- for (int k = 0; k < 6; ++k) {
- cout << "patch index: " << k << endl;
- for (int i = 0; i < 2; ++i) {
- for (int j = 0; j < 2; ++j) {
- if (DataLayout == ColMajor) {
- cout << patch(i, j, k) << " ";
- } else {
- cout << patch(k, i, j) << " ";
- }
- }
- cout << endl;
- }
- }
-
-This code results in the following output when the data layout is ColMajor:
-
-patch index: 0
-0 1
-4 5
-patch index: 1
-4 5
-8 9
-patch index: 2
-1 2
-5 6
-patch index: 3
-5 6
-9 10
-patch index: 4
-2 3
-6 7
-patch index: 5
-6 7
-10 11
-
-This code results in the following output when the data layout is RowMajor:
-(NOTE: the set of patches is the same as in ColMajor, but are indexed differently).
-
-patch index: 0
-0 1
-4 5
-patch index: 1
-1 2
-5 6
-patch index: 2
-2 3
-6 7
-patch index: 3
-4 5
-8 9
-patch index: 4
-5 6
-9 10
-patch index: 5
-6 7
-10 11
-
-### `<Operation> extract_image_patches(const Index patch_rows, const Index patch_cols, const Index row_stride, const Index col_stride, const PaddingType padding_type)`
-
-Returns a tensor of coefficient image patches extracted from the input tensor,
-which is expected to have dimensions ordered as follows (depending on the data
-layout of the input tensor, and the number of additional dimensions 'N'):
-
-*) ColMajor
-1st dimension: channels (of size d)
-2nd dimension: rows (of size r)
-3rd dimension: columns (of size c)
-4th-Nth dimension: time (for video) or batch (for bulk processing).
-
-*) RowMajor (reverse order of ColMajor)
-1st-Nth dimension: time (for video) or batch (for bulk processing).
-N+1'th dimension: columns (of size c)
-N+2'th dimension: rows (of size r)
-N+3'th dimension: channels (of size d)
-
-The returned tensor has one greater dimension than the input tensor, which is
-used to index each patch. The patch index in the output tensor depends on the
-data layout of the input tensor: the patch index is the 4'th dimension in
-ColMajor layout, and the 4'th from the last dimension in RowMajor layout.
-
-For example, given the following input tensor with the following dimension
-sizes:
- *) depth: 2
- *) rows: 3
- *) columns: 5
- *) batch: 7
-
- Tensor<float, 4> tensor(2,3,5,7);
- Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
-
-2x2 image patches can be extracted and indexed using the following code:
-
-*) 2D patch: ColMajor (patch indexed by second-to-last dimension)
- Tensor<float, 5> twod_patch;
- twod_patch = tensor.extract_image_patches<2, 2>();
- // twod_patch.dimension(0) == 2
- // twod_patch.dimension(1) == 2
- // twod_patch.dimension(2) == 2
- // twod_patch.dimension(3) == 3*5
- // twod_patch.dimension(4) == 7
-
-*) 2D patch: RowMajor (patch indexed by the second dimension)
- Tensor<float, 5, RowMajor> twod_patch_row_major;
- twod_patch_row_major = tensor_row_major.extract_image_patches<2, 2>();
- // twod_patch_row_major.dimension(0) == 7
- // twod_patch_row_major.dimension(1) == 3*5
- // twod_patch_row_major.dimension(2) == 2
- // twod_patch_row_major.dimension(3) == 2
- // twod_patch_row_major.dimension(4) == 2
-
-## Special Operations
-
-### `<Operation> cast<T>()`
-
-Returns a tensor of type T with the same dimensions as the original tensor.
-The returned tensor contains the values of the original tensor converted to
-type T.
-
- Eigen::Tensor<float, 2> a(2, 3);
- Eigen::Tensor<int, 2> b = a.cast<int>();
-
-This can be useful for example if you need to do element-wise division of
-Tensors of integers. This is not currently supported by the Tensor library
-but you can easily cast the tensors to floats to do the division:
-
- Eigen::Tensor<int, 2> a(2, 3);
- a.setValues({{0, 1, 2}, {3, 4, 5}});
- Eigen::Tensor<int, 2> b =
- (a.cast<float>() / a.constant(2).cast<float>()).cast<int>();
- cout << "a" << endl << a << endl << endl;
- cout << "b" << endl << b << endl << endl;
- =>
- a
- 0 1 2
- 3 4 5
-
- b
- 0 0 1
- 1 2 2
-
-
-### `<Operation> eval()`
-
-TODO
-
-
-## Representation of scalar values
-
-Scalar values are often represented by tensors of size 1 and rank 0.For example
-Tensor<T, N>::maximum() currently returns a Tensor<T, 0>. Similarly, the inner
-product of 2 1d tensors (through contractions) returns a 0d tensor.
-
-## Limitations
-
-* The number of tensor dimensions is currently limited to 250 when using a
- compiler that supports cxx11. It is limited to only 5 for older compilers.
-* The IndexList class requires a cxx11 compliant compiler. You can use an
- array of indices instead if you don't have access to a modern compiler.
-* On GPUs only floating point values are properly tested and optimized for.
-* Complex and integer values are known to be broken on GPUs. If you try to use
- them you'll most likely end up triggering a static assertion failure such as
- EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
-
-
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
deleted file mode 100644
index 00295a2..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h
+++ /dev/null
@@ -1,527 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_H
-#define EIGEN_CXX11_TENSOR_TENSOR_H
-
-namespace Eigen {
-
-/** \class Tensor
- * \ingroup CXX11_Tensor_Module
- *
- * \brief The tensor class.
- *
- * The %Tensor class is the work-horse for all \em dense tensors within Eigen.
- *
- * The %Tensor class encompasses only dynamic-size objects so far.
- *
- * The first two template parameters are required:
- * \tparam Scalar_ Numeric type, e.g. float, double, int or `std::complex<float>`.
- * User defined scalar types are supported as well (see \ref user_defined_scalars "here").
- * \tparam NumIndices_ Number of indices (i.e. rank of the tensor)
- *
- * The remaining template parameters are optional -- in most cases you don't have to worry about them.
- * \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of either
- * \b #AutoAlign or \b #DontAlign.
- * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
- * for vectorization. It defaults to aligning tensors. Note that tensors currently do not support any operations that profit from vectorization.
- * Support for such operations (i.e. adding two tensors etc.) is planned.
- *
- * You can access elements of tensors using normal subscripting:
- *
- * \code
- * Eigen::Tensor<double, 4> t(10, 10, 10, 10);
- * t(0, 1, 2, 3) = 42.0;
- * \endcode
- *
- * This class can be extended with the help of the plugin mechanism described on the page
- * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN.
- *
- * <i><b>Some notes:</b></i>
- *
- * <dl>
- * <dt><b>Relation to other parts of Eigen:</b></dt>
- * <dd>The midterm development goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that
- * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code
- * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor
- * class does not provide any of these features and is only available as a stand-alone class that just allows for
- * coefficient access. Also, when fixed-size tensors are implemented, the number of template arguments is likely to
- * change dramatically.</dd>
- * </dl>
- *
- * \ref TopicStorageOrders
- */
-
-template<typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
-class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
-{
- public:
- typedef Tensor<Scalar_, NumIndices_, Options_, IndexType_> Self;
- typedef TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > Base;
- typedef typename Eigen::internal::nested<Self>::type Nested;
- typedef typename internal::traits<Self>::StorageKind StorageKind;
- typedef typename internal::traits<Self>::Index Index;
- typedef Scalar_ Scalar;
- typedef typename NumTraits<Scalar>::Real RealScalar;
- typedef typename Base::CoeffReturnType CoeffReturnType;
-
- enum {
- IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign),
- Layout = Options_ & RowMajor ? RowMajor : ColMajor,
- CoordAccess = true,
- RawAccess = true
- };
-
- static const int Options = Options_;
- static const int NumIndices = NumIndices_;
- typedef DSizes<Index, NumIndices_> Dimensions;
-
- protected:
- TensorStorage<Scalar, Dimensions, Options> m_storage;
-
-#ifdef EIGEN_HAS_SFINAE
- template<typename CustomIndices>
- struct isOfNormalIndex{
- static const bool is_array = internal::is_base_of<array<Index, NumIndices>, CustomIndices>::value;
- static const bool is_int = NumTraits<CustomIndices>::IsInteger;
- static const bool value = is_array | is_int;
- };
-#endif
-
- public:
- // Metadata
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); }
-
- // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
- // work, because that uses base().coeffRef() - and we don't yet
- // implement a similar class hierarchy
- inline Self& base() { return *this; }
- inline const Self& base() const { return *this; }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes>
- EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
- {
- // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return coeff(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
- }
-#endif
-
- // normal indices
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
- {
- eigen_internal_assert(checkIndexRange(indices));
- return m_storage.data()[linearizedIndex(indices)];
- }
-
- // custom indices
-#ifdef EIGEN_HAS_SFINAE
- template<typename CustomIndices,
- EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
- >
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(CustomIndices& indices) const
- {
- return coeff(internal::customIndices2Array<Index,NumIndices>(indices));
- }
-#endif
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return m_storage.data()[0];
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
- {
- eigen_internal_assert(index >= 0 && index < size());
- return m_storage.data()[index];
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes>
- inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
- {
- // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return coeffRef(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
- }
-#endif
-
- // normal indices
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
- {
- eigen_internal_assert(checkIndexRange(indices));
- return m_storage.data()[linearizedIndex(indices)];
- }
-
- // custom indices
-#ifdef EIGEN_HAS_SFINAE
- template<typename CustomIndices,
- EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
- >
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(CustomIndices& indices)
- {
- return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices));
- }
-#endif
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef()
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return m_storage.data()[0];
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
- {
- eigen_internal_assert(index >= 0 && index < size());
- return m_storage.data()[index];
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes>
- inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
- {
- // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return this->operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
- }
-#else
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
- {
- return coeff(array<Index, 2>(i0, i1));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
- {
- return coeff(array<Index, 3>(i0, i1, i2));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
- {
- return coeff(array<Index, 4>(i0, i1, i2, i3));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
- {
- return coeff(array<Index, 5>(i0, i1, i2, i3, i4));
- }
-#endif
-
- // custom indices
-#ifdef EIGEN_HAS_SFINAE
- template<typename CustomIndices,
- EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
- >
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(CustomIndices& indices) const
- {
- return coeff(internal::customIndices2Array<Index,NumIndices>(indices));
- }
-#endif
-
- // normal indices
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
- {
- return coeff(indices);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const
- {
- eigen_internal_assert(index >= 0 && index < size());
- return coeff(index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()() const
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return coeff();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const
- {
- // The bracket operator is only for vectors, use the parenthesis operator instead.
- EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return coeff(index);
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes>
- inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
- {
- // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return operator()(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
- }
-#else
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
- {
- return coeffRef(array<Index, 2>(i0, i1));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
- {
- return coeffRef(array<Index, 3>(i0, i1, i2));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
- {
- return coeffRef(array<Index, 4>(i0, i1, i2, i3));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
- {
- return coeffRef(array<Index, 5>(i0, i1, i2, i3, i4));
- }
-#endif
-
- // normal indices
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
- {
- return coeffRef(indices);
- }
-
- // custom indices
-#ifdef EIGEN_HAS_SFINAE
- template<typename CustomIndices,
- EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomIndices>::value) )
- >
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(CustomIndices& indices)
- {
- return coeffRef(internal::customIndices2Array<Index,NumIndices>(indices));
- }
-#endif
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index)
- {
- eigen_assert(index >= 0 && index < size());
- return coeffRef(index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()()
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return coeffRef();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index)
- {
- // The bracket operator is only for vectors, use the parenthesis operator instead
- EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return coeffRef(index);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Tensor()
- : m_storage()
- {
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Tensor(const Self& other)
- : m_storage(other.m_storage)
- {
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions)
- : m_storage(firstDimension, otherDimensions...)
- {
- // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
-#else
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1)
- : m_storage(dim1, array<Index, 1>(dim1))
- {
- EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2)
- : m_storage(dim1*dim2, array<Index, 2>(dim1, dim2))
- {
- EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3)
- : m_storage(dim1*dim2*dim3, array<Index, 3>(dim1, dim2, dim3))
- {
- EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4)
- : m_storage(dim1*dim2*dim3*dim4, array<Index, 4>(dim1, dim2, dim3, dim4))
- {
- EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5)
- : m_storage(dim1*dim2*dim3*dim4*dim5, array<Index, 5>(dim1, dim2, dim3, dim4, dim5))
- {
- EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
-#endif
-
- /** Normal Dimension */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array<Index, NumIndices>& dimensions)
- : m_storage(internal::array_prod(dimensions), dimensions)
- {
- EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, ReadOnlyAccessors>& other)
- {
- typedef TensorAssignOp<Tensor, const OtherDerived> Assign;
- Assign assign(*this, other.derived());
- resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- }
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Tensor(const TensorBase<OtherDerived, WriteAccessors>& other)
- {
- typedef TensorAssignOp<Tensor, const OtherDerived> Assign;
- Assign assign(*this, other.derived());
- resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other)
- {
- typedef TensorAssignOp<Tensor, const Tensor> Assign;
- Assign assign(*this, other);
- resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Tensor& operator=(const OtherDerived& other)
- {
- typedef TensorAssignOp<Tensor, const OtherDerived> Assign;
- Assign assign(*this, other);
- resize(TensorEvaluator<const Assign, DefaultDevice>(assign, DefaultDevice()).dimensions());
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes> EIGEN_DEVICE_FUNC
- void resize(Index firstDimension, IndexTypes... otherDimensions)
- {
- // The number of dimensions used to resize a tensor must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- resize(array<Index, NumIndices>{{firstDimension, otherDimensions...}});
- }
-#endif
-
- /** Normal Dimension */
- EIGEN_DEVICE_FUNC void resize(const array<Index, NumIndices>& dimensions)
- {
- int i;
- Index size = Index(1);
- for (i = 0; i < NumIndices; i++) {
- internal::check_rows_cols_for_overflow<Dynamic>::run(size, dimensions[i]);
- size *= dimensions[i];
- }
- #ifdef EIGEN_INITIALIZE_COEFFS
- bool size_changed = size != this->size();
- m_storage.resize(size, dimensions);
- if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
- #else
- m_storage.resize(size, dimensions);
- #endif
- }
-
- // Why this overload, DSizes is derived from array ??? //
- EIGEN_DEVICE_FUNC void resize(const DSizes<Index, NumIndices>& dimensions) {
- array<Index, NumIndices> dims;
- for (int i = 0; i < NumIndices; ++i) {
- dims[i] = dimensions[i];
- }
- resize(dims);
- }
-
- EIGEN_DEVICE_FUNC
- void resize()
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
- // Nothing to do: rank 0 tensors have fixed size
- }
-
- /** Custom Dimension */
-#ifdef EIGEN_HAS_SFINAE
- template<typename CustomDimension,
- EIGEN_SFINAE_ENABLE_IF( !(isOfNormalIndex<CustomDimension>::value) )
- >
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(CustomDimension& dimensions)
- {
- resize(internal::customIndices2Array<Index,NumIndices>(dimensions));
- }
-#endif
-
-#ifndef EIGEN_EMULATE_CXX11_META_H
- template <typename std::ptrdiff_t... Indices>
- EIGEN_DEVICE_FUNC
- void resize(const Sizes<Indices...>& dimensions) {
- array<Index, NumIndices> dims;
- for (int i = 0; i < NumIndices; ++i) {
- dims[i] = static_cast<Index>(dimensions[i]);
- }
- resize(dims);
- }
-#else
- template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
- EIGEN_DEVICE_FUNC
- void resize(const Sizes<V1, V2, V3, V4, V5>& dimensions) {
- array<Index, NumIndices> dims;
- for (int i = 0; i < NumIndices; ++i) {
- dims[i] = static_cast<Index>(dimensions[i]);
- }
- resize(dims);
- }
-#endif
-
- protected:
-
- bool checkIndexRange(const array<Index, NumIndices>& indices) const
- {
- using internal::array_apply_and_reduce;
- using internal::array_zip_and_reduce;
- using internal::greater_equal_zero_op;
- using internal::logical_and_op;
- using internal::lesser_op;
-
- return
- // check whether the indices are all >= 0
- array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) &&
- // check whether the indices fit in the dimensions
- array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions());
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const
- {
- if (Options&RowMajor) {
- return m_storage.dimensions().IndexOfRowMajor(indices);
- } else {
- return m_storage.dimensions().IndexOfColMajor(indices);
- }
- }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
deleted file mode 100644
index d06f40c..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h
+++ /dev/null
@@ -1,299 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com>
-// Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
-#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
-
-namespace Eigen {
-namespace internal {
-
-/** \class TensorIndexTuple
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor + Index Tuple class.
- *
- *
- */
-template<typename XprType>
-struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType>
-{
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef Tuple<Index, typename XprTraits::Scalar> Scalar;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename XprType>
-struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense>
-{
- typedef const TensorIndexTupleOp<XprType>& type;
-};
-
-template<typename XprType>
-struct nested<TensorIndexTupleOp<XprType>, 1,
- typename eval<TensorIndexTupleOp<XprType> >::type>
-{
- typedef TensorIndexTupleOp<XprType> type;
-};
-
-} // end namespace internal
-
-template<typename XprType>
-class TensorIndexTupleOp : public TensorBase<TensorIndexTupleOp<XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename Eigen::internal::nested<TensorIndexTupleOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorIndexTupleOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorIndexTupleOp>::Index Index;
- typedef Tuple<Index, typename XprType::CoeffReturnType> CoeffReturnType;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr)
- : m_xpr(expr) {}
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
-};
-
-// Eval as rvalue
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
-{
- typedef TensorIndexTupleOp<ArgType> XprType;
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
-
- typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
- static const int NumDims = internal::array_size<Dimensions>::value;
-
- enum {
- IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
- PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
- BlockAccess = false,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device) { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
- return m_impl.dimensions();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return CoeffReturnType(index, m_impl.coeff(index));
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, 1);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
- TensorEvaluator<ArgType, Device> m_impl;
-};
-
-namespace internal {
-
-/** \class TensorTupleIndex
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Converts to Tensor<Tuple<Index, Scalar> > and reduces to Tensor<Index>.
- *
- */
-template<typename ReduceOp, typename Dims, typename XprType>
-struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<XprType>
-{
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef Index Scalar;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename ReduceOp, typename Dims, typename XprType>
-struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
-{
- typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>& type;
-};
-
-template<typename ReduceOp, typename Dims, typename XprType>
-struct nested<TensorTupleReducerOp<ReduceOp, Dims, XprType>, 1,
- typename eval<TensorTupleReducerOp<ReduceOp, Dims, XprType> >::type>
-{
- typedef TensorTupleReducerOp<ReduceOp, Dims, XprType> type;
-};
-
-} // end namespace internal
-
-template<typename ReduceOp, typename Dims, typename XprType>
-class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Dims, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename Eigen::internal::nested<TensorTupleReducerOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorTupleReducerOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorTupleReducerOp>::Index Index;
- typedef Index CoeffReturnType;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr,
- const ReduceOp& reduce_op,
- const int return_dim,
- const Dims& reduce_dims)
- : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {}
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC
- const ReduceOp& reduce_op() const { return m_reduce_op; }
-
- EIGEN_DEVICE_FUNC
- const Dims& reduce_dims() const { return m_reduce_dims; }
-
- EIGEN_DEVICE_FUNC
- int return_dim() const { return m_return_dim; }
-
- protected:
- typename XprType::Nested m_xpr;
- const ReduceOp m_reduce_op;
- const int m_return_dim;
- const Dims m_reduce_dims;
-};
-
-// Eval as rvalue
-template<typename ReduceOp, typename Dims, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Device>
-{
- typedef TensorTupleReducerOp<ReduceOp, Dims, ArgType> XprType;
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename TensorIndexTupleOp<ArgType>::CoeffReturnType TupleType;
- typedef typename TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Dimensions Dimensions;
- typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions;
- static const int NumDims = internal::array_size<InputDimensions>::value;
- typedef array<Index, NumDims> StrideDims;
-
- enum {
- IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
- PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
- BlockAccess = false,
- Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_orig_impl(op.expression(), device),
- m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device),
- m_return_dim(op.return_dim()) {
-
- gen_strides(m_orig_impl.dimensions(), m_strides);
- if (Layout == static_cast<int>(ColMajor)) {
- const Index total_size = internal::array_prod(m_orig_impl.dimensions());
- m_stride_mod = (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : total_size;
- } else {
- const Index total_size = internal::array_prod(m_orig_impl.dimensions());
- m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size;
- }
- m_stride_div = m_strides[m_return_dim];
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
- return m_impl.dimensions();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
- const TupleType v = m_impl.coeff(index);
- return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div;
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- const double compute_cost = 1.0 +
- (m_return_dim < 0 ? 0.0 : (TensorOpCost::ModCost<Index>() + TensorOpCost::DivCost<Index>()));
- return m_orig_impl.costPerCoeff(vectorized) +
- m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost);
- }
-
- private:
- EIGEN_DEVICE_FUNC void gen_strides(const InputDimensions& dims, StrideDims& strides) {
- if (m_return_dim < 0) {
- return; // Won't be using the strides.
- }
- eigen_assert(m_return_dim < NumDims &&
- "Asking to convert index to a dimension outside of the rank");
-
- // Calculate m_stride_div and m_stride_mod, which are used to
- // calculate the value of an index w.r.t. the m_return_dim.
- if (Layout == static_cast<int>(ColMajor)) {
- strides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- strides[i] = strides[i-1] * dims[i-1];
- }
- } else {
- strides[NumDims-1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- strides[i] = strides[i+1] * dims[i+1];
- }
- }
- }
-
- protected:
- TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> m_orig_impl;
- TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device> m_impl;
- const int m_return_dim;
- StrideDims m_strides;
- Index m_stride_mod;
- Index m_stride_div;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
deleted file mode 100644
index 166be20..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h
+++ /dev/null
@@ -1,181 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
-#define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
-
-namespace Eigen {
-
-/** \class TensorAssign
- * \ingroup CXX11_Tensor_Module
- *
- * \brief The tensor assignment class.
- *
- * This class is represents the assignment of the values resulting from the evaluation of
- * the rhs expression to the memory locations denoted by the lhs expression.
- */
-namespace internal {
-template<typename LhsXprType, typename RhsXprType>
-struct traits<TensorAssignOp<LhsXprType, RhsXprType> >
-{
- typedef typename LhsXprType::Scalar Scalar;
- typedef typename traits<LhsXprType>::StorageKind StorageKind;
- typedef typename promote_index_type<typename traits<LhsXprType>::Index,
- typename traits<RhsXprType>::Index>::type Index;
- typedef typename LhsXprType::Nested LhsNested;
- typedef typename RhsXprType::Nested RhsNested;
- typedef typename remove_reference<LhsNested>::type _LhsNested;
- typedef typename remove_reference<RhsNested>::type _RhsNested;
- static const std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions;
- static const int Layout = internal::traits<LhsXprType>::Layout;
-
- enum {
- Flags = 0
- };
-};
-
-template<typename LhsXprType, typename RhsXprType>
-struct eval<TensorAssignOp<LhsXprType, RhsXprType>, Eigen::Dense>
-{
- typedef const TensorAssignOp<LhsXprType, RhsXprType>& type;
-};
-
-template<typename LhsXprType, typename RhsXprType>
-struct nested<TensorAssignOp<LhsXprType, RhsXprType>, 1, typename eval<TensorAssignOp<LhsXprType, RhsXprType> >::type>
-{
- typedef TensorAssignOp<LhsXprType, RhsXprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename LhsXprType, typename RhsXprType>
-class TensorAssignOp : public TensorBase<TensorAssignOp<LhsXprType, RhsXprType> >
-{
- public:
- typedef typename Eigen::internal::traits<TensorAssignOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename LhsXprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorAssignOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorAssignOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorAssignOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs)
- : m_lhs_xpr(lhs), m_rhs_xpr(rhs) {}
-
- /** \returns the nested expressions */
- EIGEN_DEVICE_FUNC
- typename internal::remove_all<typename LhsXprType::Nested>::type&
- lhsExpression() const { return *((typename internal::remove_all<typename LhsXprType::Nested>::type*)&m_lhs_xpr); }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename RhsXprType::Nested>::type&
- rhsExpression() const { return m_rhs_xpr; }
-
- protected:
- typename internal::remove_all<typename LhsXprType::Nested>::type& m_lhs_xpr;
- const typename internal::remove_all<typename RhsXprType::Nested>::type& m_rhs_xpr;
-};
-
-
-template<typename LeftArgType, typename RightArgType, typename Device>
-struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
-{
- typedef TensorAssignOp<LeftArgType, RightArgType> XprType;
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<LeftArgType, Device>::Layout,
- RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess
- };
-
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
- m_leftImpl(op.lhsExpression(), device),
- m_rightImpl(op.rhsExpression(), device)
- {
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
- }
-
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
- {
- // The dimensions of the lhs and the rhs tensors should be equal to prevent
- // overflows and ensure the result is fully initialized.
- // TODO: use left impl instead if right impl dimensions are known at compile time.
- return m_rightImpl.dimensions();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
- eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
- m_leftImpl.evalSubExprsIfNeeded(NULL);
- // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non
- // null value), attempt to evaluate the rhs expression in place. Returns true iff in place
- // evaluation isn't supported and the caller still needs to manually assign the values generated
- // by the rhs to the lhs.
- return m_rightImpl.evalSubExprsIfNeeded(m_leftImpl.data());
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_leftImpl.cleanup();
- m_rightImpl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
- m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) {
- const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned;
- const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned;
- m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i));
- }
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
- {
- return m_leftImpl.coeff(index);
- }
- template<int LoadMode>
- EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
- {
- return m_leftImpl.template packet<LoadMode>(index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- // We assume that evalPacket or evalScalar is called to perform the
- // assignment and account for the cost of the write here, but reduce left
- // cost by one load because we are using m_leftImpl.coeffRef.
- TensorOpCost left = m_leftImpl.costPerCoeff(vectorized);
- return m_rightImpl.costPerCoeff(vectorized) +
- TensorOpCost(
- numext::maxi(0.0, left.bytes_loaded() - sizeof(CoeffReturnType)),
- left.bytes_stored(), left.compute_cycles()) +
- TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
- }
-
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
-
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); }
-
- private:
- TensorEvaluator<LeftArgType, Device> m_leftImpl;
- TensorEvaluator<RightArgType, Device> m_rightImpl;
-};
-
-}
-
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
deleted file mode 100644
index f573608..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ /dev/null
@@ -1,1012 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_BASE_H
-#define EIGEN_CXX11_TENSOR_TENSOR_BASE_H
-
-// clang-format off
-
-namespace Eigen {
-
-/** \class TensorBase
- * \ingroup CXX11_Tensor_Module
- *
- * \brief The tensor base class.
- *
- * This class is the common parent of the Tensor and TensorMap class, thus
- * making it possible to use either class interchangably in expressions.
- */
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-// FIXME Doxygen does not like the inheritance with different template parameters
-// Since there is no doxygen documentation inside, we disable it for now
-template<typename Derived>
-class TensorBase<Derived, ReadOnlyAccessors>
-{
- public:
- typedef internal::traits<Derived> DerivedTraits;
- typedef typename DerivedTraits::Scalar Scalar;
- typedef typename DerivedTraits::Index Index;
- typedef typename internal::remove_const<Scalar>::type CoeffReturnType;
- static const int NumDimensions = DerivedTraits::NumDimensions;
-
- // Generic nullary operation support.
- template <typename CustomNullaryOp> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<CustomNullaryOp, const Derived>
- nullaryExpr(const CustomNullaryOp& func) const {
- return TensorCwiseNullaryOp<CustomNullaryOp, const Derived>(derived(), func);
- }
-
- // Coefficient-wise nullary operators
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived>
- constant(const Scalar& value) const {
- return nullaryExpr(internal::scalar_constant_op<Scalar>(value));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<internal::UniformRandomGenerator<Scalar>, const Derived>
- random() const {
- return nullaryExpr(internal::UniformRandomGenerator<Scalar>());
- }
- template <typename RandomGenerator> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<RandomGenerator, const Derived>
- random(const RandomGenerator& gen = RandomGenerator()) const {
- return nullaryExpr(gen);
- }
-
- // Tensor generation
- template <typename Generator> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorGeneratorOp<Generator, const Derived>
- generate(const Generator& generator) const {
- return TensorGeneratorOp<Generator, const Derived>(derived(), generator);
- }
-
- // Generic unary operation support.
- template <typename CustomUnaryOp> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<CustomUnaryOp, const Derived>
- unaryExpr(const CustomUnaryOp& func) const {
- return TensorCwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
- }
-
- // Coefficient-wise unary operators
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived>
- operator-() const {
- return unaryExpr(internal::scalar_opposite_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived>
- sqrt() const {
- return unaryExpr(internal::scalar_sqrt_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived>
- sign() const {
- return unaryExpr(internal::scalar_sign_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_rsqrt_op<Scalar>, const Derived>
- rsqrt() const {
- return unaryExpr(internal::scalar_rsqrt_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived>
- square() const {
- return unaryExpr(internal::scalar_square_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived>
- cube() const {
- return unaryExpr(internal::scalar_cube_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived>
- inverse() const {
- return unaryExpr(internal::scalar_inverse_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived>
- tanh() const {
- return unaryExpr(internal::scalar_tanh_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived>
- lgamma() const {
- return unaryExpr(internal::scalar_lgamma_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived>
- digamma() const {
- return unaryExpr(internal::scalar_digamma_op<Scalar>());
- }
-
- // igamma(a = this, x = other)
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_igamma_op<Scalar>, const Derived, const OtherDerived>
- igamma(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_igamma_op<Scalar>());
- }
-
- // igammac(a = this, x = other)
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_igammac_op<Scalar>, const Derived, const OtherDerived>
- igammac(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_igammac_op<Scalar>());
- }
-
- // zeta(x = this, q = other)
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const OtherDerived>
- zeta(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_zeta_op<Scalar>());
- }
-
- // polygamma(n = this, x = other)
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const Derived, const OtherDerived>
- polygamma(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_polygamma_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived>
- erf() const {
- return unaryExpr(internal::scalar_erf_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived>
- erfc() const {
- return unaryExpr(internal::scalar_erfc_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived>
- sigmoid() const {
- return unaryExpr(internal::scalar_sigmoid_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived>
- exp() const {
- return unaryExpr(internal::scalar_exp_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived>
- log() const {
- return unaryExpr(internal::scalar_log_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived>
- log1p() const {
- return unaryExpr(internal::scalar_log1p_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived>
- abs() const {
- return unaryExpr(internal::scalar_abs_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>
- conjugate() const {
- return unaryExpr(internal::scalar_conjugate_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >, const Derived>
- pow(Scalar exponent) const {
- return unaryExpr(internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >(exponent));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>
- real() const {
- return unaryExpr(internal::scalar_real_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived>
- imag() const {
- return unaryExpr(internal::scalar_imag_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_sum_op<Scalar,Scalar> >, const Derived>
- operator+ (Scalar rhs) const {
- return unaryExpr(internal::bind2nd_op<internal::scalar_sum_op<Scalar,Scalar> >(rhs));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE friend
- const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_sum_op<Scalar> >, const Derived>
- operator+ (Scalar lhs, const Derived& rhs) {
- return rhs.unaryExpr(internal::bind1st_op<internal::scalar_sum_op<Scalar> >(lhs));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_difference_op<Scalar,Scalar> >, const Derived>
- operator- (Scalar rhs) const {
- EIGEN_STATIC_ASSERT((NumTraits<Scalar>::IsSigned || internal::is_same<Scalar, const std::complex<float> >::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
- return unaryExpr(internal::bind2nd_op<internal::scalar_difference_op<Scalar,Scalar> >(rhs));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE friend
- const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_difference_op<Scalar> >, const Derived>
- operator- (Scalar lhs, const Derived& rhs) {
- return rhs.unaryExpr(internal::bind1st_op<internal::scalar_difference_op<Scalar> >(lhs));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_product_op<Scalar,Scalar> >, const Derived>
- operator* (Scalar rhs) const {
- return unaryExpr(internal::bind2nd_op<internal::scalar_product_op<Scalar,Scalar> >(rhs));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE friend
- const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_product_op<Scalar> >, const Derived>
- operator* (Scalar lhs, const Derived& rhs) {
- return rhs.unaryExpr(internal::bind1st_op<internal::scalar_product_op<Scalar> >(lhs));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_quotient_op<Scalar,Scalar> >, const Derived>
- operator/ (Scalar rhs) const {
- return unaryExpr(internal::bind2nd_op<internal::scalar_quotient_op<Scalar,Scalar> >(rhs));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE friend
- const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_quotient_op<Scalar> >, const Derived>
- operator/ (Scalar lhs, const Derived& rhs) {
- return rhs.unaryExpr(internal::bind1st_op<internal::scalar_quotient_op<Scalar> >(lhs));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_mod_op<Scalar>, const Derived>
- operator% (Scalar rhs) const {
- EIGEN_STATIC_ASSERT(NumTraits<Scalar>::IsInteger, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD);
- return unaryExpr(internal::scalar_mod_op<Scalar>(rhs));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
- cwiseMax(Scalar threshold) const {
- return cwiseMax(constant(threshold));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
- cwiseMin(Scalar threshold) const {
- return cwiseMin(constant(threshold));
- }
-
- template <typename NewType> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorConversionOp<NewType, const Derived>
- cast() const {
- return TensorConversionOp<NewType, const Derived>(derived());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_round_op<Scalar>, const Derived>
- round() const {
- return unaryExpr(internal::scalar_round_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_ceil_op<Scalar>, const Derived>
- ceil() const {
- return unaryExpr(internal::scalar_ceil_op<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_floor_op<Scalar>, const Derived>
- floor() const {
- return unaryExpr(internal::scalar_floor_op<Scalar>());
- }
-
- // Generic binary operation support.
- template <typename CustomBinaryOp, typename OtherDerived> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
- binaryExpr(const OtherDerived& other, const CustomBinaryOp& func) const {
- return TensorCwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other, func);
- }
-
- // Coefficient-wise binary operators.
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const Derived, const OtherDerived>
- operator+(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_sum_op<Scalar>());
- }
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const Derived, const OtherDerived>
- operator-(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_difference_op<Scalar>());
- }
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_product_op<Scalar>, const Derived, const OtherDerived>
- operator*(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_product_op<Scalar>());
- }
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
- operator/(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_quotient_op<Scalar>());
- }
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const OtherDerived>
- cwiseMax(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_max_op<Scalar>());
- }
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const OtherDerived>
- cwiseMin(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_min_op<Scalar>());
- }
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
- operator&&(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_boolean_and_op());
- }
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
- operator||(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_boolean_or_op());
- }
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>
- operator^(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_boolean_xor_op());
- }
-
- // Comparisons and tests.
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const OtherDerived>
- operator<(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>());
- }
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const OtherDerived>
- operator<=(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>());
- }
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const OtherDerived>
- operator>(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>());
- }
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const OtherDerived>
- operator>=(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>());
- }
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const OtherDerived>
- operator==(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>());
- }
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived>
- operator!=(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>());
- }
-
- // comparisons and tests for Scalars
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
- operator<(Scalar threshold) const {
- return operator<(constant(threshold));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
- operator<=(Scalar threshold) const {
- return operator<=(constant(threshold));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
- operator>(Scalar threshold) const {
- return operator>(constant(threshold));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
- operator>=(Scalar threshold) const {
- return operator>=(constant(threshold));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
- operator==(Scalar threshold) const {
- return operator==(constant(threshold));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
- operator!=(Scalar threshold) const {
- return operator!=(constant(threshold));
- }
-
- // Checks
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isnan_op<Scalar>, const Derived>
- (isnan)() const {
- return unaryExpr(internal::scalar_isnan_op<Scalar>());
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived>
- (isinf)() const {
- return unaryExpr(internal::scalar_isinf_op<Scalar>());
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_isfinite_op<Scalar>, const Derived>
- (isfinite)() const {
- return unaryExpr(internal::scalar_isfinite_op<Scalar>());
- }
-
- // Coefficient-wise ternary operators.
- template<typename ThenDerived, typename ElseDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorSelectOp<const Derived, const ThenDerived, const ElseDerived>
- select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) const {
- return TensorSelectOp<const Derived, const ThenDerived, const ElseDerived>(derived(), thenTensor.derived(), elseTensor.derived());
- }
-
- // Contractions.
- typedef Eigen::IndexPair<Index> DimensionPair;
-
- template<typename OtherDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorContractionOp<const Dimensions, const Derived, const OtherDerived>
- contract(const OtherDerived& other, const Dimensions& dims) const {
- return TensorContractionOp<const Dimensions, const Derived, const OtherDerived>(derived(), other.derived(), dims);
- }
-
- // Convolutions.
- template<typename KernelDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorConvolutionOp<const Dimensions, const Derived, const KernelDerived>
- convolve(const KernelDerived& kernel, const Dimensions& dims) const {
- return TensorConvolutionOp<const Dimensions, const Derived, const KernelDerived>(derived(), kernel.derived(), dims);
- }
-
- // Fourier transforms
- template <int FFTDataType, int FFTDirection, typename FFT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>
- fft(const FFT& fft) const {
- return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), fft);
- }
-
- // Scan.
- typedef TensorScanOp<internal::SumReducer<CoeffReturnType>, const Derived> TensorScanSumOp;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorScanSumOp
- cumsum(const Index& axis, bool exclusive = false) const {
- return TensorScanSumOp(derived(), axis, exclusive);
- }
-
- typedef TensorScanOp<internal::ProdReducer<CoeffReturnType>, const Derived> TensorScanProdOp;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorScanProdOp
- cumprod(const Index& axis, bool exclusive = false) const {
- return TensorScanProdOp(derived(), axis, exclusive);
- }
-
- template <typename Reducer>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorScanOp<Reducer, const Derived>
- scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const {
- return TensorScanOp<Reducer, const Derived>(derived(), axis, exclusive, reducer);
- }
-
- // Reductions.
- template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived>
- sum(const Dims& dims) const {
- return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::SumReducer<CoeffReturnType>());
- }
-
- const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
- sum() const {
- DimensionList<Index, NumDimensions> in_dims;
- return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::SumReducer<CoeffReturnType>());
- }
-
- template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived>
- mean(const Dims& dims) const {
- return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MeanReducer<CoeffReturnType>());
- }
-
- const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
- mean() const {
- DimensionList<Index, NumDimensions> in_dims;
- return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MeanReducer<CoeffReturnType>());
- }
-
- template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived>
- prod(const Dims& dims) const {
- return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::ProdReducer<CoeffReturnType>());
- }
-
- const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
- prod() const {
- DimensionList<Index, NumDimensions> in_dims;
- return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::ProdReducer<CoeffReturnType>());
- }
-
- template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived>
- maximum(const Dims& dims) const {
- return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MaxReducer<CoeffReturnType>());
- }
-
- const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
- maximum() const {
- DimensionList<Index, NumDimensions> in_dims;
- return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MaxReducer<CoeffReturnType>());
- }
-
- template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived>
- minimum(const Dims& dims) const {
- return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MinReducer<CoeffReturnType>());
- }
-
- const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
- minimum() const {
- DimensionList<Index, NumDimensions> in_dims;
- return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MinReducer<CoeffReturnType>());
- }
-
- template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReductionOp<internal::AndReducer, const Dims, const TensorConversionOp<bool, const Derived> >
- all(const Dims& dims) const {
- return cast<bool>().reduce(dims, internal::AndReducer());
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReductionOp<internal::AndReducer, const DimensionList<Index, NumDimensions>, const TensorConversionOp<bool, const Derived> >
- all() const {
- DimensionList<Index, NumDimensions> in_dims;
- return cast<bool>().reduce(in_dims, internal::AndReducer());
- }
-
- template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReductionOp<internal::OrReducer, const Dims, const TensorConversionOp<bool, const Derived> >
- any(const Dims& dims) const {
- return cast<bool>().reduce(dims, internal::OrReducer());
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReductionOp<internal::OrReducer, const DimensionList<Index, NumDimensions>, const TensorConversionOp<bool, const Derived> >
- any() const {
- DimensionList<Index, NumDimensions> in_dims;
- return cast<bool>().reduce(in_dims, internal::OrReducer());
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorTupleReducerOp<
- internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
- const array<Index, NumDimensions>, const Derived>
- argmax() const {
- array<Index, NumDimensions> in_dims;
- for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d;
- return TensorTupleReducerOp<
- internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
- const array<Index, NumDimensions>,
- const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorTupleReducerOp<
- internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
- const array<Index, NumDimensions>, const Derived>
- argmin() const {
- array<Index, NumDimensions> in_dims;
- for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d;
- return TensorTupleReducerOp<
- internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
- const array<Index, NumDimensions>,
- const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), -1, in_dims);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorTupleReducerOp<
- internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
- const array<Index, 1>, const Derived>
- argmax(const int return_dim) const {
- array<Index, 1> in_dims;
- in_dims[0] = return_dim;
- return TensorTupleReducerOp<
- internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >,
- const array<Index, 1>,
- const Derived>(derived(), internal::ArgMaxTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorTupleReducerOp<
- internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
- const array<Index, 1>, const Derived>
- argmin(const int return_dim) const {
- array<Index, 1> in_dims;
- in_dims[0] = return_dim;
- return TensorTupleReducerOp<
- internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >,
- const array<Index, 1>,
- const Derived>(derived(), internal::ArgMinTupleReducer<Tuple<Index, CoeffReturnType> >(), return_dim, in_dims);
- }
-
- template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReductionOp<Reducer, const Dims, const Derived>
- reduce(const Dims& dims, const Reducer& reducer) const {
- return TensorReductionOp<Reducer, const Dims, const Derived>(derived(), dims, reducer);
- }
-
- template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorBroadcastingOp<const Broadcast, const Derived>
- broadcast(const Broadcast& broadcast) const {
- return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast);
- }
-
- template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorConcatenationOp<Axis, const Derived, const OtherDerived>
- concatenate(const OtherDerived& other, Axis axis) const {
- return TensorConcatenationOp<Axis, const Derived, const OtherDerived>(derived(), other.derived(), axis);
- }
-
- template <typename PatchDims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorPatchOp<const PatchDims, const Derived>
- extract_patches(const PatchDims& patch_dims) const {
- return TensorPatchOp<const PatchDims, const Derived>(derived(), patch_dims);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorImagePatchOp<Dynamic, Dynamic, const Derived>
- extract_image_patches(const Index patch_rows = 1, const Index patch_cols = 1,
- const Index row_stride = 1, const Index col_stride = 1,
- const Index in_row_stride = 1, const Index in_col_stride = 1,
- const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = Scalar(0)) const {
- return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride,
- in_row_stride, in_col_stride, 1, 1, padding_type, padding_value);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorImagePatchOp<Dynamic, Dynamic, const Derived>
- extract_image_patches(const Index patch_rows, const Index patch_cols,
- const Index row_stride, const Index col_stride,
- const Index in_row_stride, const Index in_col_stride,
- const Index row_inflate_stride, const Index col_inflate_stride,
- const Index padding_top, const Index padding_bottom,
- const Index padding_left,const Index padding_right,
- const Scalar padding_value) const {
- return TensorImagePatchOp<Dynamic, Dynamic, const Derived>(derived(), patch_rows, patch_cols, row_stride, col_stride,
- in_row_stride, in_col_stride, row_inflate_stride, col_inflate_stride,
- padding_top, padding_bottom, padding_left, padding_right, padding_value);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>
- extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols,
- const Index plane_stride = 1, const Index row_stride = 1, const Index col_stride = 1,
- const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = Scalar(0)) const {
- return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, 1, 1, 1, padding_type, padding_value);
- }
-
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>
- extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols,
- const Index plane_stride, const Index row_stride, const Index col_stride,
- const Index plane_inflate_stride, const Index row_inflate_stride, const Index col_inflate_stride,
- const Index padding_top_z, const Index padding_bottom_z,
- const Index padding_top, const Index padding_bottom,
- const Index padding_left, const Index padding_right, const Scalar padding_value = Scalar(0)) const {
- return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, plane_inflate_stride, row_inflate_stride, col_inflate_stride, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right, padding_value);
- }
-
- // Morphing operators.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorLayoutSwapOp<const Derived>
- swap_layout() const {
- return TensorLayoutSwapOp<const Derived>(derived());
- }
- template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReshapingOp<const NewDimensions, const Derived>
- reshape(const NewDimensions& newDimensions) const {
- return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions);
- }
- template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorSlicingOp<const StartIndices, const Sizes, const Derived>
- slice(const StartIndices& startIndices, const Sizes& sizes) const {
- return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes);
- }
- template <typename StartIndices, typename StopIndices, typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides, const Derived>
- stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const {
- return TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides,
- const Derived>(derived(), startIndices, stopIndices, strides);
- }
- template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorChippingOp<DimId, const Derived>
- chip(const Index offset) const {
- return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorChippingOp<Dynamic, const Derived>
- chip(const Index offset, const Index dim) const {
- return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim);
- }
- template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReverseOp<const ReverseDimensions, const Derived>
- reverse(const ReverseDimensions& rev) const {
- return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev);
- }
- template <typename PaddingDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorPaddingOp<const PaddingDimensions, const Derived>
- pad(const PaddingDimensions& padding) const {
- return TensorPaddingOp<const PaddingDimensions, const Derived>(derived(), padding, internal::scalar_cast_op<int, Scalar>()(0));
- }
- template <typename PaddingDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorPaddingOp<const PaddingDimensions, const Derived>
- pad(const PaddingDimensions& padding, const Scalar padding_value) const {
- return TensorPaddingOp<const PaddingDimensions, const Derived>(derived(), padding, padding_value);
- }
- template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorShufflingOp<const Shuffle, const Derived>
- shuffle(const Shuffle& shuffle) const {
- return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
- }
- template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorStridingOp<const Strides, const Derived>
- stride(const Strides& strides) const {
- return TensorStridingOp<const Strides, const Derived>(derived(), strides);
- }
- template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorInflationOp<const Strides, const Derived>
- inflate(const Strides& strides) const {
- return TensorInflationOp<const Strides, const Derived>(derived(), strides);
- }
-
- // Returns a tensor containing index/value tuples
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorIndexTupleOp<const Derived>
- index_tuples() const {
- return TensorIndexTupleOp<const Derived>(derived());
- }
-
- // Support for custom unary and binary operations
- template <typename CustomUnaryFunc>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCustomUnaryOp<const CustomUnaryFunc, const Derived> customOp(const CustomUnaryFunc& op) const {
- return TensorCustomUnaryOp<const CustomUnaryFunc, const Derived>(derived(), op);
- }
- template <typename OtherDerived, typename CustomBinaryFunc>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived> customOp(const OtherDerived& other, const CustomBinaryFunc& op) const {
- return TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived>(derived(), other, op);
- }
-
- // Force the evaluation of the expression.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorForcedEvalOp<const Derived> eval() const {
- return TensorForcedEvalOp<const Derived>(derived());
- }
-
- protected:
- template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
- template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
- template <typename OtherDerived, int AccessLevel> friend class TensorBase;
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); }
-};
-
-template<typename Derived, int AccessLevel = internal::accessors_level<Derived>::value>
-class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
- public:
- typedef internal::traits<Derived> DerivedTraits;
- typedef typename DerivedTraits::Scalar Scalar;
- typedef typename DerivedTraits::Index Index;
- typedef Scalar CoeffReturnType;
- static const int NumDimensions = DerivedTraits::NumDimensions;
-
- template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
- template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
- template <typename OtherDerived, int OtherAccessLevel> friend class TensorBase;
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Derived& setZero() {
- return setConstant(Scalar(0));
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Derived& setConstant(const Scalar& val) {
- return derived() = this->constant(val);
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Derived& setRandom() {
- return derived() = this->random();
- }
- template <typename RandomGenerator> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Derived& setRandom() {
- return derived() = this->template random<RandomGenerator>();
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Derived& setValues(
- const typename internal::Initializer<Derived, NumDimensions>::InitList& vals) {
- TensorEvaluator<Derived, DefaultDevice> eval(derived(), DefaultDevice());
- internal::initialize_tensor<Derived, NumDimensions>(eval, vals);
- return derived();
- }
-#endif // EIGEN_HAS_VARIADIC_TEMPLATES
-
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Derived& operator+=(const OtherDerived& other) {
- return derived() = derived() + other.derived();
- }
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Derived& operator-=(const OtherDerived& other) {
- return derived() = derived() - other.derived();
- }
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Derived& operator*=(const OtherDerived& other) {
- return derived() = derived() * other.derived();
- }
- template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Derived& operator/=(const OtherDerived& other) {
- return derived() = derived() / other.derived();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorLayoutSwapOp<const Derived>
- swap_layout() const {
- return TensorLayoutSwapOp<const Derived>(derived());
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorLayoutSwapOp<Derived>
- swap_layout() {
- return TensorLayoutSwapOp<Derived>(derived());
- }
-
- template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorConcatenationOp<const Axis, const Derived, const OtherDerived>
- concatenate(const OtherDerived& other, const Axis& axis) const {
- return TensorConcatenationOp<const Axis, const Derived, const OtherDerived>(derived(), other, axis);
- }
- template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorConcatenationOp<const Axis, Derived, OtherDerived>
- concatenate(const OtherDerived& other, const Axis& axis) {
- return TensorConcatenationOp<const Axis, Derived, OtherDerived>(derived(), other, axis);
- }
-
- template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReshapingOp<const NewDimensions, const Derived>
- reshape(const NewDimensions& newDimensions) const {
- return TensorReshapingOp<const NewDimensions, const Derived>(derived(), newDimensions);
- }
- template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorReshapingOp<const NewDimensions, Derived>
- reshape(const NewDimensions& newDimensions) {
- return TensorReshapingOp<const NewDimensions, Derived>(derived(), newDimensions);
- }
-
- template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorSlicingOp<const StartIndices, const Sizes, const Derived>
- slice(const StartIndices& startIndices, const Sizes& sizes) const {
- return TensorSlicingOp<const StartIndices, const Sizes, const Derived>(derived(), startIndices, sizes);
- }
- template <typename StartIndices, typename Sizes> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorSlicingOp<const StartIndices, const Sizes, Derived>
- slice(const StartIndices& startIndices, const Sizes& sizes) {
- return TensorSlicingOp<const StartIndices, const Sizes, Derived>(derived(), startIndices, sizes);
- }
-
- template <typename StartIndices, typename StopIndices, typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides, const Derived>
- stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const {
- return TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides,
- const Derived>(derived(), startIndices, stopIndices, strides);
- }
- template <typename StartIndices, typename StopIndices, typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides, Derived>
- stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) {
- return TensorStridingSlicingOp<const StartIndices, const StopIndices, const Strides,
- Derived>(derived(), startIndices, stopIndices, strides);
- }
-
- template <DenseIndex DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorChippingOp<DimId, const Derived>
- chip(const Index offset) const {
- return TensorChippingOp<DimId, const Derived>(derived(), offset, DimId);
- }
- template <Index DimId> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorChippingOp<DimId, Derived>
- chip(const Index offset) {
- return TensorChippingOp<DimId, Derived>(derived(), offset, DimId);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorChippingOp<Dynamic, const Derived>
- chip(const Index offset, const Index dim) const {
- return TensorChippingOp<Dynamic, const Derived>(derived(), offset, dim);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorChippingOp<Dynamic, Derived>
- chip(const Index offset, const Index dim) {
- return TensorChippingOp<Dynamic, Derived>(derived(), offset, dim);
- }
-
- template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorReverseOp<const ReverseDimensions, const Derived>
- reverse(const ReverseDimensions& rev) const {
- return TensorReverseOp<const ReverseDimensions, const Derived>(derived(), rev);
- }
- template <typename ReverseDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorReverseOp<const ReverseDimensions, Derived>
- reverse(const ReverseDimensions& rev) {
- return TensorReverseOp<const ReverseDimensions, Derived>(derived(), rev);
- }
-
- template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorShufflingOp<const Shuffle, const Derived>
- shuffle(const Shuffle& shuffle) const {
- return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
- }
- template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorShufflingOp<const Shuffle, Derived>
- shuffle(const Shuffle& shuffle) {
- return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle);
- }
-
- template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorStridingOp<const Strides, const Derived>
- stride(const Strides& strides) const {
- return TensorStridingOp<const Strides, const Derived>(derived(), strides);
- }
- template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorStridingOp<const Strides, Derived>
- stride(const Strides& strides) {
- return TensorStridingOp<const Strides, Derived>(derived(), strides);
- }
-
- // Select the device on which to evaluate the expression.
- template <typename DeviceType>
- TensorDevice<Derived, DeviceType> device(const DeviceType& device) {
- return TensorDevice<Derived, DeviceType>(device, derived());
- }
-
- protected:
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Derived& derived() { return *static_cast<Derived*>(this); }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); }
-};
-#endif // EIGEN_PARSED_BY_DOXYGEN
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_BASE_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
deleted file mode 100644
index 4cfe300..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ /dev/null
@@ -1,392 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
-
-namespace Eigen {
-
-/** \class TensorBroadcasting
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor broadcasting class.
- *
- *
- */
-namespace internal {
-template<typename Broadcast, typename XprType>
-struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename Broadcast, typename XprType>
-struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense>
-{
- typedef const TensorBroadcastingOp<Broadcast, XprType>& type;
-};
-
-template<typename Broadcast, typename XprType>
-struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorBroadcastingOp<Broadcast, XprType> >::type>
-{
- typedef TensorBroadcastingOp<Broadcast, XprType> type;
-};
-
-template <typename Dims>
-struct is_input_scalar {
- static const bool value = false;
-};
-template <>
-struct is_input_scalar<Sizes<> > {
- static const bool value = true;
-};
-#ifndef EIGEN_EMULATE_CXX11_META_H
-template <typename std::size_t... Indices>
-struct is_input_scalar<Sizes<Indices...> > {
- static const bool value = (Sizes<Indices...>::total_size == 1);
-};
-#endif
-
-} // end namespace internal
-
-
-
-template<typename Broadcast, typename XprType>
-class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorBroadcastingOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorBroadcastingOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType& expr, const Broadcast& broadcast)
- : m_xpr(expr), m_broadcast(broadcast) {}
-
- EIGEN_DEVICE_FUNC
- const Broadcast& broadcast() const { return m_broadcast; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
- const Broadcast m_broadcast;
-};
-
-
-// Eval as rvalue
-template<typename Broadcast, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
-{
- typedef TensorBroadcastingOp<Broadcast, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = true,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_broadcast(op.broadcast()),m_impl(op.expression(), device)
- {
- // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
- // and store the result in a scalar. Instead one should reshape the scalar into a a N-D
- // tensor with N >= 1 of 1 element first and then broadcast.
- EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
- const InputDimensions& input_dims = m_impl.dimensions();
- const Broadcast& broadcast = op.broadcast();
- for (int i = 0; i < NumDims; ++i) {
- eigen_assert(input_dims[i] > 0);
- m_dimensions[i] = input_dims[i] * broadcast[i];
- }
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_inputStrides[0] = 1;
- m_outputStrides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
- m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
- }
- } else {
- m_inputStrides[NumDims-1] = 1;
- m_outputStrides[NumDims-1] = 1;
- for (int i = NumDims-2; i >= 0; --i) {
- m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
- m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
- {
- if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
- return m_impl.coeff(0);
- }
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- return coeffColMajor(index);
- } else {
- return coeffRowMajor(index);
- }
- }
-
- // TODO: attempt to speed this up. The integer divisions and modulo are slow
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const
- {
- Index inputIndex = 0;
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_outputStrides[i];
- if (internal::index_statically_eq<Broadcast>(i, 1)) {
- eigen_assert(idx < m_impl.dimensions()[i]);
- inputIndex += idx * m_inputStrides[i];
- } else {
- if (internal::index_statically_eq<InputDimensions>(i, 1)) {
- eigen_assert(idx % m_impl.dimensions()[i] == 0);
- } else {
- inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
- }
- }
- index -= idx * m_outputStrides[i];
- }
- if (internal::index_statically_eq<Broadcast>(0, 1)) {
- eigen_assert(index < m_impl.dimensions()[0]);
- inputIndex += index;
- } else {
- if (internal::index_statically_eq<InputDimensions>(0, 1)) {
- eigen_assert(index % m_impl.dimensions()[0] == 0);
- } else {
- inputIndex += (index % m_impl.dimensions()[0]);
- }
- }
- return m_impl.coeff(inputIndex);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const
- {
- Index inputIndex = 0;
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = index / m_outputStrides[i];
- if (internal::index_statically_eq<Broadcast>(i, 1)) {
- eigen_assert(idx < m_impl.dimensions()[i]);
- inputIndex += idx * m_inputStrides[i];
- } else {
- if (internal::index_statically_eq<InputDimensions>(i, 1)) {
- eigen_assert(idx % m_impl.dimensions()[i] == 0);
- } else {
- inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
- }
- }
- index -= idx * m_outputStrides[i];
- }
- if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) {
- eigen_assert(index < m_impl.dimensions()[NumDims-1]);
- inputIndex += index;
- } else {
- if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) {
- eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0);
- } else {
- inputIndex += (index % m_impl.dimensions()[NumDims-1]);
- }
- }
- return m_impl.coeff(inputIndex);
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
- {
- if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) {
- return internal::pset1<PacketReturnType>(m_impl.coeff(0));
- }
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- return packetColMajor<LoadMode>(index);
- } else {
- return packetRowMajor<LoadMode>(index);
- }
- }
-
- // Ignore the LoadMode and always use unaligned loads since we can't guarantee
- // the alignment at compile time.
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- const Index originalIndex = index;
-
- Index inputIndex = 0;
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_outputStrides[i];
- if (internal::index_statically_eq<Broadcast>(i, 1)) {
- eigen_assert(idx < m_impl.dimensions()[i]);
- inputIndex += idx * m_inputStrides[i];
- } else {
- if (internal::index_statically_eq<InputDimensions>(i, 1)) {
- eigen_assert(idx % m_impl.dimensions()[i] == 0);
- } else {
- inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
- }
- }
- index -= idx * m_outputStrides[i];
- }
- Index innermostLoc;
- if (internal::index_statically_eq<Broadcast>(0, 1)) {
- eigen_assert(index < m_impl.dimensions()[0]);
- innermostLoc = index;
- } else {
- if (internal::index_statically_eq<InputDimensions>(0, 1)) {
- eigen_assert(index % m_impl.dimensions()[0] == 0);
- innermostLoc = 0;
- } else {
- innermostLoc = index % m_impl.dimensions()[0];
- }
- }
- inputIndex += innermostLoc;
-
- // Todo: this could be extended to the second dimension if we're not
- // broadcasting alongside the first dimension, and so on.
- if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) {
- return m_impl.template packet<Unaligned>(inputIndex);
- } else {
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- values[0] = m_impl.coeff(inputIndex);
- for (int i = 1; i < PacketSize; ++i) {
- values[i] = coeffColMajor(originalIndex+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- const Index originalIndex = index;
-
- Index inputIndex = 0;
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = index / m_outputStrides[i];
- if (internal::index_statically_eq<Broadcast>(i, 1)) {
- eigen_assert(idx < m_impl.dimensions()[i]);
- inputIndex += idx * m_inputStrides[i];
- } else {
- if (internal::index_statically_eq<InputDimensions>(i, 1)) {
- eigen_assert(idx % m_impl.dimensions()[i] == 0);
- } else {
- inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
- }
- }
- index -= idx * m_outputStrides[i];
- }
- Index innermostLoc;
- if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) {
- eigen_assert(index < m_impl.dimensions()[NumDims-1]);
- innermostLoc = index;
- } else {
- if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) {
- eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0);
- innermostLoc = 0;
- } else {
- innermostLoc = index % m_impl.dimensions()[NumDims-1];
- }
- }
- inputIndex += innermostLoc;
-
- // Todo: this could be extended to the second dimension if we're not
- // broadcasting alongside the first dimension, and so on.
- if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) {
- return m_impl.template packet<Unaligned>(inputIndex);
- } else {
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- values[0] = m_impl.coeff(inputIndex);
- for (int i = 1; i < PacketSize; ++i) {
- values[i] = coeffRowMajor(originalIndex+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- double compute_cost = TensorOpCost::AddCost<Index>();
- if (NumDims > 0) {
- for (int i = NumDims - 1; i > 0; --i) {
- compute_cost += TensorOpCost::DivCost<Index>();
- if (internal::index_statically_eq<Broadcast>(i, 1)) {
- compute_cost +=
- TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
- } else {
- if (!internal::index_statically_eq<InputDimensions>(i, 1)) {
- compute_cost += TensorOpCost::MulCost<Index>() +
- TensorOpCost::ModCost<Index>() +
- TensorOpCost::AddCost<Index>();
- }
- }
- compute_cost +=
- TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
- }
- }
- return m_impl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
-
- Broadcast functor() const { return m_broadcast; }
-
- protected:
- const Broadcast m_broadcast;
- Dimensions m_dimensions;
- array<Index, NumDims> m_outputStrides;
- array<Index, NumDims> m_inputStrides;
- TensorEvaluator<ArgType, Device> m_impl;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
deleted file mode 100644
index 1ba7ef1..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ /dev/null
@@ -1,384 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
-
-namespace Eigen {
-
-/** \class TensorKChippingReshaping
- * \ingroup CXX11_Tensor_Module
- *
- * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor.
- *
- *
- */
-
-namespace internal {
-template<DenseIndex DimId, typename XprType>
-struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions - 1;
- static const int Layout = XprTraits::Layout;
-};
-
-template<DenseIndex DimId, typename XprType>
-struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense>
-{
- typedef const TensorChippingOp<DimId, XprType>& type;
-};
-
-template<DenseIndex DimId, typename XprType>
-struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type>
-{
- typedef TensorChippingOp<DimId, XprType> type;
-};
-
-template <DenseIndex DimId>
-struct DimensionId
-{
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) {
- eigen_assert(dim == DimId);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
- return DimId;
- }
-};
-template <>
-struct DimensionId<Dynamic>
-{
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) {
- eigen_assert(dim >= 0);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
- return actual_dim;
- }
- private:
- const DenseIndex actual_dim;
-};
-
-
-} // end namespace internal
-
-
-
-template<DenseIndex DimId, typename XprType>
-class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
-{
- public:
- typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim)
- : m_xpr(expr), m_offset(offset), m_dim(dim) {
- }
-
- EIGEN_DEVICE_FUNC
- const Index offset() const { return m_offset; }
- EIGEN_DEVICE_FUNC
- const Index dim() const { return m_dim.actualDim(); }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other)
- {
- typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- protected:
- typename XprType::Nested m_xpr;
- const Index m_offset;
- const internal::DimensionId<DimId> m_dim;
-};
-
-
-// Eval as rvalue
-template<DenseIndex DimId, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
-{
- typedef TensorChippingOp<DimId, ArgType> XprType;
- static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- static const int NumDims = NumInputDims-1;
- typedef typename XprType::Index Index;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
-
- enum {
- // Alignment can't be guaranteed at compile time since it depends on the
- // slice offsets.
- IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
- {
- EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
- eigen_assert(NumInputDims > m_dim.actualDim());
-
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
- eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
-
- int j = 0;
- for (int i = 0; i < NumInputDims; ++i) {
- if (i != m_dim.actualDim()) {
- m_dimensions[j] = input_dims[i];
- ++j;
- }
- }
-
- m_stride = 1;
- m_inputStride = 1;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = 0; i < m_dim.actualDim(); ++i) {
- m_stride *= input_dims[i];
- m_inputStride *= input_dims[i];
- }
- } else {
- for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) {
- m_stride *= input_dims[i];
- m_inputStride *= input_dims[i];
- }
- }
- m_inputStride *= input_dims[m_dim.actualDim()];
- m_inputOffset = m_stride * op.offset();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return m_impl.coeff(srcCoeff(index));
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
- // m_stride is equal to 1, so let's avoid the integer division.
- eigen_assert(m_stride == 1);
- Index inputIndex = index * m_inputStride + m_inputOffset;
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = m_impl.coeff(inputIndex);
- inputIndex += m_inputStride;
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
- // m_stride is aways greater than index, so let's avoid the integer division.
- eigen_assert(m_stride > index);
- return m_impl.template packet<LoadMode>(index + m_inputOffset);
- } else {
- const Index idx = index / m_stride;
- const Index rem = index - idx * m_stride;
- if (rem + PacketSize <= m_stride) {
- Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
- return m_impl.template packet<LoadMode>(inputIndex);
- } else {
- // Cross the stride boundary. Fallback to slow path.
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = coeff(index);
- ++index;
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- double cost = 0;
- if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
- m_dim.actualDim() == 0) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
- m_dim.actualDim() == NumInputDims - 1)) {
- cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
- } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
- m_dim.actualDim() == NumInputDims - 1) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
- m_dim.actualDim() == 0)) {
- cost += TensorOpCost::AddCost<Index>();
- } else {
- cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
- 3 * TensorOpCost::AddCost<Index>();
- }
-
- return m_impl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, cost, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const {
- CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data());
- if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) &&
- result) {
- return result + m_inputOffset;
- } else {
- return NULL;
- }
- }
-
- protected:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
- {
- Index inputIndex;
- if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
- // m_stride is equal to 1, so let's avoid the integer division.
- eigen_assert(m_stride == 1);
- inputIndex = index * m_inputStride + m_inputOffset;
- } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims-1) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
- // m_stride is aways greater than index, so let's avoid the integer division.
- eigen_assert(m_stride > index);
- inputIndex = index + m_inputOffset;
- } else {
- const Index idx = index / m_stride;
- inputIndex = idx * m_inputStride + m_inputOffset;
- index -= idx * m_stride;
- inputIndex += index;
- }
- return inputIndex;
- }
-
- Dimensions m_dimensions;
- Index m_stride;
- Index m_inputOffset;
- Index m_inputStride;
- TensorEvaluator<ArgType, Device> m_impl;
- const internal::DimensionId<DimId> m_dim;
- const Device& m_device;
-};
-
-
-// Eval as lvalue
-template<DenseIndex DimId, typename ArgType, typename Device>
-struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
- : public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
-{
- typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base;
- typedef TensorChippingOp<DimId, ArgType> XprType;
- static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- static const int NumDims = NumInputDims-1;
- typedef typename XprType::Index Index;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : Base(op, device)
- { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
- {
- return this->m_impl.coeffRef(this->srcCoeff(index));
- }
-
- template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void writePacket(Index index, const PacketReturnType& x)
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
-
- if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == 0) ||
- (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) {
- // m_stride is equal to 1, so let's avoid the integer division.
- eigen_assert(this->m_stride == 1);
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
- Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
- for (int i = 0; i < PacketSize; ++i) {
- this->m_impl.coeffRef(inputIndex) = values[i];
- inputIndex += this->m_inputStride;
- }
- } else if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) ||
- (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == 0)) {
- // m_stride is aways greater than index, so let's avoid the integer division.
- eigen_assert(this->m_stride > index);
- this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x);
- } else {
- const Index idx = index / this->m_stride;
- const Index rem = index - idx * this->m_stride;
- if (rem + PacketSize <= this->m_stride) {
- const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
- this->m_impl.template writePacket<StoreMode>(inputIndex, x);
- } else {
- // Cross stride boundary. Fallback to slow path.
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
- for (int i = 0; i < PacketSize; ++i) {
- this->coeffRef(index) = values[i];
- ++index;
- }
- }
- }
- }
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
deleted file mode 100644
index 59bf90d..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
+++ /dev/null
@@ -1,361 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
-
-namespace Eigen {
-
-/** \class TensorConcatenationOp
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor concatenation class.
- *
- *
- */
-namespace internal {
-template<typename Axis, typename LhsXprType, typename RhsXprType>
-struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >
-{
- // Type promotion to handle the case where the types of the lhs and the rhs are different.
- typedef typename promote_storage_type<typename LhsXprType::Scalar,
- typename RhsXprType::Scalar>::ret Scalar;
- typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
- typename traits<RhsXprType>::StorageKind>::ret StorageKind;
- typedef typename promote_index_type<typename traits<LhsXprType>::Index,
- typename traits<RhsXprType>::Index>::type Index;
- typedef typename LhsXprType::Nested LhsNested;
- typedef typename RhsXprType::Nested RhsNested;
- typedef typename remove_reference<LhsNested>::type _LhsNested;
- typedef typename remove_reference<RhsNested>::type _RhsNested;
- static const int NumDimensions = traits<LhsXprType>::NumDimensions;
- static const int Layout = traits<LhsXprType>::Layout;
- enum { Flags = 0 };
-};
-
-template<typename Axis, typename LhsXprType, typename RhsXprType>
-struct eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, Eigen::Dense>
-{
- typedef const TensorConcatenationOp<Axis, LhsXprType, RhsXprType>& type;
-};
-
-template<typename Axis, typename LhsXprType, typename RhsXprType>
-struct nested<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, 1, typename eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >::type>
-{
- typedef TensorConcatenationOp<Axis, LhsXprType, RhsXprType> type;
-};
-
-} // end namespace internal
-
-
-template<typename Axis, typename LhsXprType, typename RhsXprType>
-class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, WriteAccessors>
-{
- public:
- typedef typename internal::traits<TensorConcatenationOp>::Scalar Scalar;
- typedef typename internal::traits<TensorConcatenationOp>::StorageKind StorageKind;
- typedef typename internal::traits<TensorConcatenationOp>::Index Index;
- typedef typename internal::nested<TensorConcatenationOp>::type Nested;
- typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType,
- typename RhsXprType::CoeffReturnType>::ret CoeffReturnType;
- typedef typename NumTraits<Scalar>::Real RealScalar;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis)
- : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {}
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename LhsXprType::Nested>::type&
- lhsExpression() const { return m_lhs_xpr; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename RhsXprType::Nested>::type&
- rhsExpression() const { return m_rhs_xpr; }
-
- EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const TensorConcatenationOp& other)
- {
- typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- protected:
- typename LhsXprType::Nested m_lhs_xpr;
- typename RhsXprType::Nested m_rhs_xpr;
- const Axis m_axis;
-};
-
-
-// Eval as rvalue
-template<typename Axis, typename LeftArgType, typename RightArgType, typename Device>
-struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device>
-{
- typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
- static const int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- enum {
- IsAligned = false,
- PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<LeftArgType, Device>::Layout,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis())
- {
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
- EIGEN_STATIC_ASSERT((NumDims == RightNumDims), YOU_MADE_A_PROGRAMMING_MISTAKE);
- EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- eigen_assert(0 <= m_axis && m_axis < NumDims);
- const Dimensions& lhs_dims = m_leftImpl.dimensions();
- const Dimensions& rhs_dims = m_rightImpl.dimensions();
- {
- int i = 0;
- for (; i < m_axis; ++i) {
- eigen_assert(lhs_dims[i] > 0);
- eigen_assert(lhs_dims[i] == rhs_dims[i]);
- m_dimensions[i] = lhs_dims[i];
- }
- eigen_assert(lhs_dims[i] > 0); // Now i == m_axis.
- eigen_assert(rhs_dims[i] > 0);
- m_dimensions[i] = lhs_dims[i] + rhs_dims[i];
- for (++i; i < NumDims; ++i) {
- eigen_assert(lhs_dims[i] > 0);
- eigen_assert(lhs_dims[i] == rhs_dims[i]);
- m_dimensions[i] = lhs_dims[i];
- }
- }
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_leftStrides[0] = 1;
- m_rightStrides[0] = 1;
- m_outputStrides[0] = 1;
-
- for (int j = 1; j < NumDims; ++j) {
- m_leftStrides[j] = m_leftStrides[j-1] * lhs_dims[j-1];
- m_rightStrides[j] = m_rightStrides[j-1] * rhs_dims[j-1];
- m_outputStrides[j] = m_outputStrides[j-1] * m_dimensions[j-1];
- }
- } else {
- m_leftStrides[NumDims - 1] = 1;
- m_rightStrides[NumDims - 1] = 1;
- m_outputStrides[NumDims - 1] = 1;
-
- for (int j = NumDims - 2; j >= 0; --j) {
- m_leftStrides[j] = m_leftStrides[j+1] * lhs_dims[j+1];
- m_rightStrides[j] = m_rightStrides[j+1] * rhs_dims[j+1];
- m_outputStrides[j] = m_outputStrides[j+1] * m_dimensions[j+1];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear?
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/)
- {
- m_leftImpl.evalSubExprsIfNeeded(NULL);
- m_rightImpl.evalSubExprsIfNeeded(NULL);
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
- {
- m_leftImpl.cleanup();
- m_rightImpl.cleanup();
- }
-
- // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow.
- // See CL/76180724 comments for more ideas.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- // Collect dimension-wise indices (subs).
- array<Index, NumDims> subs;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- subs[i] = index / m_outputStrides[i];
- index -= subs[i] * m_outputStrides[i];
- }
- subs[0] = index;
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- subs[i] = index / m_outputStrides[i];
- index -= subs[i] * m_outputStrides[i];
- }
- subs[NumDims - 1] = index;
- }
-
- const Dimensions& left_dims = m_leftImpl.dimensions();
- if (subs[m_axis] < left_dims[m_axis]) {
- Index left_index;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- left_index = subs[0];
- for (int i = 1; i < NumDims; ++i) {
- left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
- }
- } else {
- left_index = subs[NumDims - 1];
- for (int i = NumDims - 2; i >= 0; --i) {
- left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
- }
- }
- return m_leftImpl.coeff(left_index);
- } else {
- subs[m_axis] -= left_dims[m_axis];
- const Dimensions& right_dims = m_rightImpl.dimensions();
- Index right_index;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- right_index = subs[0];
- for (int i = 1; i < NumDims; ++i) {
- right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
- }
- } else {
- right_index = subs[NumDims - 1];
- for (int i = NumDims - 2; i >= 0; --i) {
- right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
- }
- }
- return m_rightImpl.coeff(right_index);
- }
- }
-
- // TODO(phli): Add a real vectorization.
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
- EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
-
- EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
- for (int i = 0; i < packetSize; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
- 2 * TensorOpCost::MulCost<Index>() +
- TensorOpCost::DivCost<Index>() +
- TensorOpCost::ModCost<Index>());
- const double lhs_size = m_leftImpl.dimensions().TotalSize();
- const double rhs_size = m_rightImpl.dimensions().TotalSize();
- return (lhs_size / (lhs_size + rhs_size)) *
- m_leftImpl.costPerCoeff(vectorized) +
- (rhs_size / (lhs_size + rhs_size)) *
- m_rightImpl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, compute_cost);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
- Dimensions m_dimensions;
- array<Index, NumDims> m_outputStrides;
- array<Index, NumDims> m_leftStrides;
- array<Index, NumDims> m_rightStrides;
- TensorEvaluator<LeftArgType, Device> m_leftImpl;
- TensorEvaluator<RightArgType, Device> m_rightImpl;
- const Axis m_axis;
-};
-
-// Eval as lvalue
-template<typename Axis, typename LeftArgType, typename RightArgType, typename Device>
- struct TensorEvaluator<TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device>
- : public TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device>
-{
- typedef TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device> Base;
- typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
- typedef typename Base::Dimensions Dimensions;
- enum {
- IsAligned = false,
- PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<LeftArgType, Device>::Layout,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device)
- : Base(op, device)
- {
- EIGEN_STATIC_ASSERT((static_cast<int>(Layout) == static_cast<int>(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE);
- }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
- {
- // Collect dimension-wise indices (subs).
- array<Index, Base::NumDims> subs;
- for (int i = Base::NumDims - 1; i > 0; --i) {
- subs[i] = index / this->m_outputStrides[i];
- index -= subs[i] * this->m_outputStrides[i];
- }
- subs[0] = index;
-
- const Dimensions& left_dims = this->m_leftImpl.dimensions();
- if (subs[this->m_axis] < left_dims[this->m_axis]) {
- Index left_index = subs[0];
- for (int i = 1; i < Base::NumDims; ++i) {
- left_index += (subs[i] % left_dims[i]) * this->m_leftStrides[i];
- }
- return this->m_leftImpl.coeffRef(left_index);
- } else {
- subs[this->m_axis] -= left_dims[this->m_axis];
- const Dimensions& right_dims = this->m_rightImpl.dimensions();
- Index right_index = subs[0];
- for (int i = 1; i < Base::NumDims; ++i) {
- right_index += (subs[i] % right_dims[i]) * this->m_rightStrides[i];
- }
- return this->m_rightImpl.coeffRef(right_index);
- }
- }
-
- template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void writePacket(Index index, const PacketReturnType& x)
- {
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
- EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize());
-
- EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
- internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
- for (int i = 0; i < packetSize; ++i) {
- coeffRef(index+i) = values[i];
- }
- }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
deleted file mode 100644
index 20b29e5..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ /dev/null
@@ -1,628 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
-
-namespace Eigen {
-
-/** \class TensorContraction
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor contraction class.
- *
- *
- */
-namespace internal {
-
-template<typename Dimensions, typename LhsXprType, typename RhsXprType>
-struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >
-{
- // Type promotion to handle the case where the types of the lhs and the rhs are different.
- typedef typename gebp_traits<typename remove_const<typename LhsXprType::Scalar>::type,
- typename remove_const<typename RhsXprType::Scalar>::type>::ResScalar Scalar;
-
- typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
- typename traits<RhsXprType>::StorageKind>::ret StorageKind;
- typedef typename promote_index_type<typename traits<LhsXprType>::Index,
- typename traits<RhsXprType>::Index>::type Index;
- typedef typename LhsXprType::Nested LhsNested;
- typedef typename RhsXprType::Nested RhsNested;
- typedef typename remove_reference<LhsNested>::type _LhsNested;
- typedef typename remove_reference<RhsNested>::type _RhsNested;
-
- // From NumDims below.
- static const int NumDimensions = traits<RhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value;
- static const int Layout = traits<LhsXprType>::Layout;
-
- enum {
- Flags = 0
- };
-};
-
-template<typename Dimensions, typename LhsXprType, typename RhsXprType>
-struct eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType>, Eigen::Dense>
-{
- typedef const TensorContractionOp<Dimensions, LhsXprType, RhsXprType>& type;
-};
-
-template<typename Dimensions, typename LhsXprType, typename RhsXprType>
-struct nested<TensorContractionOp<Dimensions, LhsXprType, RhsXprType>, 1, typename eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >::type>
-{
- typedef TensorContractionOp<Dimensions, LhsXprType, RhsXprType> type;
-};
-
-template<typename Indices_, typename LeftArgType_, typename RightArgType_, typename Device_>
-struct traits<TensorEvaluator<const TensorContractionOp<Indices_, LeftArgType_, RightArgType_>, Device_> > {
- typedef Indices_ Indices;
- typedef LeftArgType_ LeftArgType;
- typedef RightArgType_ RightArgType;
- typedef Device_ Device;
-
- // From NumDims below.
- static const int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value;
-};
-
-} // end namespace internal
-
-template<typename Indices, typename LhsXprType, typename RhsXprType>
-class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXprType, RhsXprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorContractionOp>::Scalar Scalar;
- typedef typename internal::gebp_traits<typename LhsXprType::CoeffReturnType,
- typename RhsXprType::CoeffReturnType>::ResScalar CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorContractionOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorContractionOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorContractionOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionOp(
- const LhsXprType& lhs, const RhsXprType& rhs, const Indices& dims)
- : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_indices(dims) {}
-
- EIGEN_DEVICE_FUNC
- const Indices& indices() const { return m_indices; }
-
- /** \returns the nested expressions */
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename LhsXprType::Nested>::type&
- lhsExpression() const { return m_lhs_xpr; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename RhsXprType::Nested>::type&
- rhsExpression() const { return m_rhs_xpr; }
-
- protected:
- typename LhsXprType::Nested m_lhs_xpr;
- typename RhsXprType::Nested m_rhs_xpr;
- const Indices m_indices;
-};
-
-
-template<typename Derived>
-struct TensorContractionEvaluatorBase
-{
- typedef typename internal::traits<Derived>::Indices Indices;
- typedef typename internal::traits<Derived>::LeftArgType LeftArgType;
- typedef typename internal::traits<Derived>::RightArgType RightArgType;
- typedef typename internal::traits<Derived>::Device Device;
-
- typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
- typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
- typedef typename XprType::Index Index;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
- enum {
- IsAligned = true,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
- Layout = TensorEvaluator<LeftArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = true
- };
-
- // Most of the code is assuming that both input tensors are ColMajor. If the
- // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
- // If we want to compute A * B = C, where A is LHS and B is RHS, the code
- // will pretend B is LHS and A is RHS.
- typedef typename internal::conditional<
- static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
- typedef typename internal::conditional<
- static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
-
- static const int LDims =
- internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
- static const int RDims =
- internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
- static const int ContractDims = internal::array_size<Indices>::value;
- static const int NumDims = LDims + RDims - 2 * ContractDims;
-
- typedef array<Index, ContractDims> contract_t;
- typedef array<Index, LDims - ContractDims> left_nocontract_t;
- typedef array<Index, RDims - ContractDims> right_nocontract_t;
-
- typedef DSizes<Index, NumDims> Dimensions;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorContractionEvaluatorBase(const XprType& op, const Device& device)
- : m_leftImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(),
- op.lhsExpression(), op.rhsExpression()), device),
- m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(),
- op.rhsExpression(), op.lhsExpression()), device),
- m_device(device),
- m_result(NULL) {
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) ==
- static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)),
- YOU_MADE_A_PROGRAMMING_MISTAKE);
-
-
- DSizes<Index, LDims> eval_left_dims;
- DSizes<Index, RDims> eval_right_dims;
- array<IndexPair<Index>, ContractDims> eval_op_indices;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- // For ColMajor, we keep using the existing dimensions
- for (int i = 0; i < LDims; i++) {
- eval_left_dims[i] = m_leftImpl.dimensions()[i];
- }
- for (int i = 0; i < RDims; i++) {
- eval_right_dims[i] = m_rightImpl.dimensions()[i];
- }
- // We keep the pairs of contracting indices.
- for (int i = 0; i < ContractDims; i++) {
- eval_op_indices[i].first = op.indices()[i].first;
- eval_op_indices[i].second = op.indices()[i].second;
- }
- } else {
- // For RowMajor, we need to reverse the existing dimensions
- for (int i = 0; i < LDims; i++) {
- eval_left_dims[i] = m_leftImpl.dimensions()[LDims - i - 1];
- }
- for (int i = 0; i < RDims; i++) {
- eval_right_dims[i] = m_rightImpl.dimensions()[RDims - i - 1];
- }
- // We need to flip all the pairs of contracting indices as well as
- // reversing the dimensions.
- for (int i = 0; i < ContractDims; i++) {
- eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second;
- eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first;
- }
- }
-
- // Check for duplicate axes and make sure the first index in eval_op_indices
- // is increasing. Using O(n^2) sorting is OK since ContractDims is small
- for (int i = 0; i < ContractDims; i++) {
- for (int j = i + 1; j < ContractDims; j++) {
- eigen_assert(eval_op_indices[j].first != eval_op_indices[i].first &&
- eval_op_indices[j].second != eval_op_indices[i].second &&
- "contraction axes should be unique");
- if (eval_op_indices[j].first < eval_op_indices[i].first) {
- numext::swap(eval_op_indices[j], eval_op_indices[i]);
- }
- }
- }
-
- array<Index, LDims> lhs_strides;
- lhs_strides[0] = 1;
- for (int i = 0; i < LDims-1; ++i) {
- lhs_strides[i+1] = lhs_strides[i] * eval_left_dims[i];
- }
-
- array<Index, RDims> rhs_strides;
- rhs_strides[0] = 1;
- for (int i = 0; i < RDims-1; ++i) {
- rhs_strides[i+1] = rhs_strides[i] * eval_right_dims[i];
- }
-
- if (m_i_strides.size() > 0) m_i_strides[0] = 1;
- if (m_j_strides.size() > 0) m_j_strides[0] = 1;
- if (m_k_strides.size() > 0) m_k_strides[0] = 1;
-
- m_i_size = 1;
- m_j_size = 1;
- m_k_size = 1;
-
- // To compute the dimension, we simply concatenate the non-contracting
- // dimensions of the left and then the right tensor. Additionally, we also
- // compute the strides corresponding to the left non-contracting
- // dimensions and right non-contracting dimensions.
- m_lhs_inner_dim_contiguous = true;
- int dim_idx = 0;
- unsigned int nocontract_idx = 0;
-
- for (int i = 0; i < LDims; i++) {
- // find if we are contracting on index i of left tensor
- bool contracting = false;
- for (int j = 0; j < ContractDims; j++) {
- if (eval_op_indices[j].first == i) {
- contracting = true;
- break;
- }
- }
- if (!contracting) {
- // add dimension size to output dimensions
- m_dimensions[dim_idx] = eval_left_dims[i];
- m_left_nocontract_strides[nocontract_idx] = lhs_strides[i];
- if (dim_idx != i) {
- m_lhs_inner_dim_contiguous = false;
- }
- if (nocontract_idx+1 < internal::array_size<left_nocontract_t>::value) {
- m_i_strides[nocontract_idx+1] =
- m_i_strides[nocontract_idx] * eval_left_dims[i];
- } else {
- m_i_size = m_i_strides[nocontract_idx] * eval_left_dims[i];
- }
- dim_idx++;
- nocontract_idx++;
- }
- }
-
- nocontract_idx = 0;
- for (int i = 0; i < RDims; i++) {
- bool contracting = false;
- // find if we are contracting on index i of right tensor
- for (int j = 0; j < ContractDims; j++) {
- if (eval_op_indices[j].second == i) {
- contracting = true;
- break;
- }
- }
- if (!contracting) {
- m_dimensions[dim_idx] = eval_right_dims[i];
- if (nocontract_idx+1 < internal::array_size<right_nocontract_t>::value) {
- m_j_strides[nocontract_idx+1] =
- m_j_strides[nocontract_idx] * eval_right_dims[i];
- } else {
- m_j_size = m_j_strides[nocontract_idx] * eval_right_dims[i];
- }
- m_right_nocontract_strides[nocontract_idx] = rhs_strides[i];
- dim_idx++;
- nocontract_idx++;
- }
- }
-
- // Now compute the strides corresponding to the contracting dimensions. We
- // assumed above that non-contracting axes are represented in the same order
- // in the matrix as they are in the tensor. This is not the case for
- // contracting axes. As the contracting axes must be of the same size in
- // each tensor, we'll only look at the first tensor here.
- m_rhs_inner_dim_contiguous = true;
- m_rhs_inner_dim_reordered = false;
- for (int i = 0; i < ContractDims; i++) {
- Index left = eval_op_indices[i].first;
- Index right = eval_op_indices[i].second;
-
- Index size = eval_left_dims[left];
- eigen_assert(size == eval_right_dims[right] &&
- "Contraction axes must be same size");
-
- if (i+1 < static_cast<int>(internal::array_size<contract_t>::value)) {
- m_k_strides[i+1] = m_k_strides[i] * size;
- } else {
- m_k_size = m_k_strides[i] * size;
- }
- m_left_contracting_strides[i] = lhs_strides[left];
- m_right_contracting_strides[i] = rhs_strides[right];
-
- if (i > 0 && right < eval_op_indices[i-1].second) {
- m_rhs_inner_dim_reordered = true;
- }
- if (right != i) {
- m_rhs_inner_dim_contiguous = false;
- }
- }
-
- // If the layout is RowMajor, we need to reverse the m_dimensions
- if (static_cast<int>(Layout) == static_cast<int>(RowMajor)) {
- for (int i = 0, j = NumDims - 1; i < j; i++, j--) {
- numext::swap(m_dimensions[i], m_dimensions[j]);
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
- m_leftImpl.evalSubExprsIfNeeded(NULL);
- m_rightImpl.evalSubExprsIfNeeded(NULL);
- if (data) {
- evalTo(data);
- return false;
- } else {
- m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
- evalTo(m_result);
- return true;
- }
- }
-
- EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const {
- if (this->m_lhs_inner_dim_contiguous) {
- if (this->m_rhs_inner_dim_contiguous) {
- if (this->m_rhs_inner_dim_reordered) {
- static_cast<const Derived*>(this)->template evalProduct<true, true, true, Unaligned>(buffer);
- }
- else {
- static_cast<const Derived*>(this)->template evalProduct<true, true, false, Unaligned>(buffer);
- }
- }
- else {
- if (this->m_rhs_inner_dim_reordered) {
- static_cast<const Derived*>(this)->template evalProduct<true, false, true, Unaligned>(buffer);
- }
- else {
- static_cast<const Derived*>(this)->template evalProduct<true, false, false, Unaligned>(buffer);
- }
- }
- }
- else {
- if (this->m_rhs_inner_dim_contiguous) {
- if (this->m_rhs_inner_dim_reordered) {
- static_cast<const Derived*>(this)->template evalProduct<false, true, true, Unaligned>(buffer);
- }
- else {
- static_cast<const Derived*>(this)->template evalProduct<false, true, false, Unaligned>(buffer);
- }
- }
- else {
- if (this->m_rhs_inner_dim_reordered) {
- static_cast<const Derived*>(this)->template evalProduct<false, false, true, Unaligned>(buffer);
- }
- else {
- static_cast<const Derived*>(this)->template evalProduct<false, false, false, Unaligned>(buffer);
- }
- }
- }
- }
-
- template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
- EIGEN_DEVICE_FUNC void evalGemv(Scalar* buffer) const {
- const Index rows = m_i_size;
- const Index cols = m_k_size;
-
- typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
- typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
- typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
- typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
- const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size;
- const Index rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size;
- const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned;
- const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned;
- typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
- LeftEvaluator, left_nocontract_t,
- contract_t, lhs_packet_size,
- lhs_inner_dim_contiguous,
- false, lhs_alignment> LhsMapper;
-
- typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
- RightEvaluator, right_nocontract_t,
- contract_t, rhs_packet_size,
- rhs_inner_dim_contiguous,
- rhs_inner_dim_reordered, rhs_alignment> RhsMapper;
-
- LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides,
- m_left_contracting_strides, m_k_strides);
- RhsMapper rhs(m_rightImpl, m_right_nocontract_strides, m_j_strides,
- m_right_contracting_strides, m_k_strides);
-
- const Scalar alpha(1);
- const Index resIncr(1);
-
- // zero out the result buffer (which must be of size at least rows * sizeof(Scalar)
- m_device.memset(buffer, 0, rows * sizeof(Scalar));
-
- internal::general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,false,RhsScalar,RhsMapper,false>::run(
- rows, cols, lhs, rhs,
- buffer, resIncr, alpha);
- }
-
- template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
- EIGEN_DEVICE_FUNC void evalGemm(Scalar* buffer) const {
- // columns in left side, rows in right side
- const Index k = this->m_k_size;
-
- // rows in left side
- const Index m = this->m_i_size;
-
- // columns in right side
- const Index n = this->m_j_size;
-
- // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
- this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
-
- // define mr, nr, and all of my data mapper types
- typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
- typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
- typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits;
-
- const Index nr = Traits::nr;
- const Index mr = Traits::mr;
-
- typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
- typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
-
- const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size;
- const Index rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size;
-
- typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
- LeftEvaluator, left_nocontract_t,
- contract_t, lhs_packet_size,
- lhs_inner_dim_contiguous,
- false, Unaligned> LhsMapper;
-
- typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
- RightEvaluator, right_nocontract_t,
- contract_t, rhs_packet_size,
- rhs_inner_dim_contiguous,
- rhs_inner_dim_reordered, Unaligned> RhsMapper;
-
- typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
-
- // Declare GEBP packing and kernel structs
- internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, mr, Traits::LhsProgress, ColMajor> pack_lhs;
- internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, nr, ColMajor> pack_rhs;
-
- internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper, mr, nr, false, false> gebp;
-
- // initialize data mappers
- LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides,
- this->m_left_contracting_strides, this->m_k_strides);
-
- RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides,
- this->m_right_contracting_strides, this->m_k_strides);
-
- OutputMapper output(buffer, m);
-
- // Sizes of the blocks to load in cache. See the Goto paper for details.
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, 1);
- const Index kc = blocking.kc();
- const Index mc = numext::mini(m, blocking.mc());
- const Index nc = numext::mini(n, blocking.nc());
- const Index sizeA = mc * kc;
- const Index sizeB = kc * nc;
-
- LhsScalar* blockA = static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar)));
- RhsScalar* blockB = static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar)));
-
- for(Index i2=0; i2<m; i2+=mc)
- {
- const Index actual_mc = numext::mini(i2+mc,m)-i2;
- for (Index k2 = 0; k2 < k; k2 += kc) {
- // make sure we don't overshoot right edge of left matrix, then pack vertical panel
- const Index actual_kc = numext::mini(k2 + kc, k) - k2;
- pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc, 0, 0);
-
- // series of horizontal blocks
- for (Index j2 = 0; j2 < n; j2 += nc) {
- // make sure we don't overshoot right edge of right matrix, then pack block
- const Index actual_nc = numext::mini(j2 + nc, n) - j2;
- pack_rhs(blockB, rhs.getSubMapper(k2, j2), actual_kc, actual_nc, 0, 0);
-
- // call gebp (matrix kernel)
- // The parameters here are copied from Eigen's GEMM implementation
- gebp(output.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, Scalar(1), -1, -1, 0, 0);
- }
- }
- }
-
- this->m_device.deallocate(blockA);
- this->m_device.deallocate(blockB);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_leftImpl.cleanup();
- m_rightImpl.cleanup();
-
- if (m_result != NULL) {
- m_device.deallocate(m_result);
- m_result = NULL;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
- return m_result[index];
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const {
- return TensorOpCost(sizeof(CoeffReturnType), 0, 0);
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {
- return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { return m_result; }
-
- protected:
- // Prevent assignment
- TensorContractionEvaluatorBase& operator = (const TensorContractionEvaluatorBase&);
- Dimensions m_dimensions;
-
- contract_t m_k_strides;
- contract_t m_left_contracting_strides;
- contract_t m_right_contracting_strides;
-
- bool m_lhs_inner_dim_contiguous;
- bool m_rhs_inner_dim_contiguous;
- bool m_rhs_inner_dim_reordered;
-
- left_nocontract_t m_i_strides;
- right_nocontract_t m_j_strides;
- left_nocontract_t m_left_nocontract_strides;
- right_nocontract_t m_right_nocontract_strides;
-
- Index m_i_size;
- Index m_j_size;
- Index m_k_size;
-
- TensorEvaluator<EvalLeftArgType, Device> m_leftImpl;
- TensorEvaluator<EvalRightArgType, Device> m_rightImpl;
- const Device& m_device;
- Scalar* m_result;
-};
-
-
-// evaluator for default device
-template<typename Indices, typename LeftArgType, typename RightArgType, typename Device>
-struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> :
- public TensorContractionEvaluatorBase<
- TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> > {
- typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self;
- typedef TensorContractionEvaluatorBase<Self> Base;
-
- typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
- typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
- typedef typename XprType::Index Index;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
- enum {
- Layout = TensorEvaluator<LeftArgType, Device>::Layout
- };
-
- // Most of the code is assuming that both input tensors are ColMajor. If the
- // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
- // If we want to compute A * B = C, where A is LHS and B is RHS, the code
- // will pretend B is LHS and A is RHS.
- typedef typename internal::conditional<
- static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
- typedef typename internal::conditional<
- static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
-
- static const int LDims =
- internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
- static const int RDims =
- internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
- static const int ContractDims = internal::array_size<Indices>::value;
-
- typedef array<Index, ContractDims> contract_t;
- typedef array<Index, LDims - ContractDims> left_nocontract_t;
- typedef array<Index, RDims - ContractDims> right_nocontract_t;
-
- static const int NumDims = LDims + RDims - 2 * ContractDims;
-
- // Could we use NumDimensions here?
- typedef DSizes<Index, NumDims> Dimensions;
-
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
- Base(op, device) { }
-
- template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
- EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const {
- if (this->m_j_size == 1) {
- this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
- return;
- }
-
- this->template evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
- }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
deleted file mode 100644
index 5cf7b4f..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H
-
-
-namespace Eigen {
-namespace internal {
-
-enum {
- ShardByRow = 0,
- ShardByCol = 1
-};
-
-
-// Default Blocking Strategy
-template <typename LhsMapper, typename RhsMapper, typename Index, int ShardingType=ShardByCol>
-class TensorContractionBlocking {
- public:
-
- typedef typename LhsMapper::Scalar LhsScalar;
- typedef typename RhsMapper::Scalar RhsScalar;
-
- EIGEN_DEVICE_FUNC TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) :
- kc_(k), mc_(m), nc_(n)
- {
- if (ShardingType == ShardByCol) {
- computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, mc_, nc_, num_threads);
- }
- else {
- computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, nc_, mc_, num_threads);
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index kc() const { return kc_; }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index mc() const { return mc_; }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index nc() const { return nc_; }
-
- private:
- Index kc_;
- Index mc_;
- Index nc_;
-};
-
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
deleted file mode 100644
index d65dbb4..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
+++ /dev/null
@@ -1,1391 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014-2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-// Copyright (C) 2015 Navdeep Jaitly <ndjaitly@google.com>
-// Copyright (C) 2014 Eric Martin <eric@ericmart.in>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H
-
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
-
-namespace Eigen {
-
-template<typename Scalar, typename Index, typename LhsMapper,
- typename RhsMapper, typename OutputMapper, bool needs_edge_check>
-__device__ EIGEN_STRONG_INLINE void
-EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
- const OutputMapper output, Scalar* lhs_shmem, Scalar* rhs_shmem,
- const Index m_size, const Index n_size, const Index k_size) {
-
- const Index m_block_idx = blockIdx.x;
- const Index n_block_idx = blockIdx.y;
-
- const Index base_m = 64 * m_block_idx;
- const Index base_n = 64 * n_block_idx;
-
- // declare and initialize 64 registers for output 8x8 block
-
- // prefetch registers
- Scalar lhs_pf0;
- Scalar lhs_pf1;
- Scalar lhs_pf2;
- Scalar lhs_pf3;
- Scalar lhs_pf4;
- Scalar lhs_pf5;
- Scalar lhs_pf6;
- Scalar lhs_pf7;
-
- Scalar rhs_pf0;
- Scalar rhs_pf1;
- Scalar rhs_pf2;
- Scalar rhs_pf3;
- Scalar rhs_pf4;
- Scalar rhs_pf5;
- Scalar rhs_pf6;
- Scalar rhs_pf7;
-
- // shared memory is formatted
- // (contract idx in block, nocontract idx in block, block idx)
- // where block idx is column major. This transposition limits the number of
- // bank conflicts when reading the LHS. The core idea is that since the contracting
- // index is shared by both sides, then the contracting index should be in threadIdx.x.
-
- // On the LHS, we pad each row inside of each block with an extra element. This makes
- // each block 8 rows of 9 elements, which is 72 elements. This gives no bank conflicts
- // on writes and very few 2-way conflicts on reads. There is an 8x8 grid of these blocks.
-
- // On the RHS we just add 8 padding elements to the end of each block. This gives no bank
- // conflicts on writes and also none on reads.
-
- // storage indices
- const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z;
- const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x;
-
- const Index lhs_store_idx_0 = lhs_store_idx_base + 576 * 0;
- const Index lhs_store_idx_1 = lhs_store_idx_base + 576 * 1;
- const Index lhs_store_idx_2 = lhs_store_idx_base + 576 * 2;
- const Index lhs_store_idx_3 = lhs_store_idx_base + 576 * 3;
- const Index lhs_store_idx_4 = lhs_store_idx_base + 576 * 4;
- const Index lhs_store_idx_5 = lhs_store_idx_base + 576 * 5;
- const Index lhs_store_idx_6 = lhs_store_idx_base + 576 * 6;
- const Index lhs_store_idx_7 = lhs_store_idx_base + 576 * 7;
-
- const Index rhs_store_idx_0 = rhs_store_idx_base + 576 * 0;
- const Index rhs_store_idx_1 = rhs_store_idx_base + 576 * 1;
- const Index rhs_store_idx_2 = rhs_store_idx_base + 576 * 2;
- const Index rhs_store_idx_3 = rhs_store_idx_base + 576 * 3;
- const Index rhs_store_idx_4 = rhs_store_idx_base + 576 * 4;
- const Index rhs_store_idx_5 = rhs_store_idx_base + 576 * 5;
- const Index rhs_store_idx_6 = rhs_store_idx_base + 576 * 6;
- const Index rhs_store_idx_7 = rhs_store_idx_base + 576 * 7;
-
- // in the loading code, the following variables are important:
- // threadIdx.x: the vertical position in an 8x8 block
- // threadIdx.y: the vertical index of the 8x8 block in the grid
- // threadIdx.z: the horizontal position in an 8x8 block
- // k: the horizontal index of the 8x8 block in the grid
- //
- // The k parameter is implicit (it was the loop counter for a loop that went
- // from 0 to <8, but now that loop is unrolled in the below code.
-
- const Index load_idx_vert = threadIdx.x + 8 * threadIdx.y;
- const Index lhs_vert = base_m + load_idx_vert;
-
-#define prefetchIntoRegisters(base_k) \
- { \
- lhs_pf0 = conv(0); \
- lhs_pf1 = conv(0); \
- lhs_pf2 = conv(0); \
- lhs_pf3 = conv(0); \
- lhs_pf4 = conv(0); \
- lhs_pf5 = conv(0); \
- lhs_pf6 = conv(0); \
- lhs_pf7 = conv(0); \
- \
- rhs_pf0 = conv(0); \
- rhs_pf1 = conv(0); \
- rhs_pf2 = conv(0); \
- rhs_pf3 = conv(0); \
- rhs_pf4 = conv(0); \
- rhs_pf5 = conv(0); \
- rhs_pf6 = conv(0); \
- rhs_pf7 = conv(0); \
- \
- if (!needs_edge_check || lhs_vert < m_size) { \
- const Index lhs_horiz_0 = base_k + threadIdx.z + 0 * 8; \
- const Index lhs_horiz_1 = base_k + threadIdx.z + 1 * 8; \
- const Index lhs_horiz_2 = base_k + threadIdx.z + 2 * 8; \
- const Index lhs_horiz_3 = base_k + threadIdx.z + 3 * 8; \
- const Index lhs_horiz_4 = base_k + threadIdx.z + 4 * 8; \
- const Index lhs_horiz_5 = base_k + threadIdx.z + 5 * 8; \
- const Index lhs_horiz_6 = base_k + threadIdx.z + 6 * 8; \
- const Index lhs_horiz_7 = base_k + threadIdx.z + 7 * 8; \
- \
- if (!needs_edge_check || lhs_horiz_7 < k_size) { \
- lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \
- lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \
- lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \
- lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \
- lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \
- lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \
- lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \
- lhs_pf7 = lhs(lhs_vert, lhs_horiz_7); \
- } else if (lhs_horiz_6 < k_size) { \
- lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \
- lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \
- lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \
- lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \
- lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \
- lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \
- lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \
- } else if (lhs_horiz_5 < k_size) { \
- lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \
- lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \
- lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \
- lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \
- lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \
- lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \
- } else if (lhs_horiz_4 < k_size) { \
- lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \
- lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \
- lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \
- lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \
- lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \
- } else if (lhs_horiz_3 < k_size) { \
- lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \
- lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \
- lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \
- lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \
- } else if (lhs_horiz_2 < k_size) { \
- lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \
- lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \
- lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \
- } else if (lhs_horiz_1 < k_size) { \
- lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \
- lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \
- } else if (lhs_horiz_0 < k_size) { \
- lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \
- } \
- } \
- \
- const Index rhs_vert = base_k + load_idx_vert; \
- if (!needs_edge_check || rhs_vert < k_size) { \
- const Index rhs_horiz_0 = base_n + threadIdx.z + 0 * 8; \
- const Index rhs_horiz_1 = base_n + threadIdx.z + 1 * 8; \
- const Index rhs_horiz_2 = base_n + threadIdx.z + 2 * 8; \
- const Index rhs_horiz_3 = base_n + threadIdx.z + 3 * 8; \
- const Index rhs_horiz_4 = base_n + threadIdx.z + 4 * 8; \
- const Index rhs_horiz_5 = base_n + threadIdx.z + 5 * 8; \
- const Index rhs_horiz_6 = base_n + threadIdx.z + 6 * 8; \
- const Index rhs_horiz_7 = base_n + threadIdx.z + 7 * 8; \
- \
- if (rhs_horiz_7 < n_size) { \
- rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \
- rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \
- rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \
- rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \
- rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \
- rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \
- rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \
- rhs_pf7 = rhs(rhs_vert, rhs_horiz_7); \
- } else if (rhs_horiz_6 < n_size) { \
- rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \
- rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \
- rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \
- rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \
- rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \
- rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \
- rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \
- } else if (rhs_horiz_5 < n_size) { \
- rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \
- rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \
- rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \
- rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \
- rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \
- rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \
- } else if (rhs_horiz_4 < n_size) { \
- rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \
- rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \
- rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \
- rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \
- rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \
- } else if (rhs_horiz_3 < n_size) { \
- rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \
- rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \
- rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \
- rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \
- } else if (rhs_horiz_2 < n_size) { \
- rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \
- rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \
- rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \
- } else if (rhs_horiz_1 < n_size) { \
- rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \
- rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \
- } else if (rhs_horiz_0 < n_size) { \
- rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \
- } \
- } \
- } \
-
-#define writeRegToShmem(_) \
- lhs_shmem[lhs_store_idx_0] = lhs_pf0; \
- rhs_shmem[rhs_store_idx_0] = rhs_pf0; \
- \
- lhs_shmem[lhs_store_idx_1] = lhs_pf1; \
- rhs_shmem[rhs_store_idx_1] = rhs_pf1; \
- \
- lhs_shmem[lhs_store_idx_2] = lhs_pf2; \
- rhs_shmem[rhs_store_idx_2] = rhs_pf2; \
- \
- lhs_shmem[lhs_store_idx_3] = lhs_pf3; \
- rhs_shmem[rhs_store_idx_3] = rhs_pf3; \
- \
- lhs_shmem[lhs_store_idx_4] = lhs_pf4; \
- rhs_shmem[rhs_store_idx_4] = rhs_pf4; \
- \
- lhs_shmem[lhs_store_idx_5] = lhs_pf5; \
- rhs_shmem[rhs_store_idx_5] = rhs_pf5; \
- \
- lhs_shmem[lhs_store_idx_6] = lhs_pf6; \
- rhs_shmem[rhs_store_idx_6] = rhs_pf6; \
- \
- lhs_shmem[lhs_store_idx_7] = lhs_pf7; \
- rhs_shmem[rhs_store_idx_7] = rhs_pf7; \
-
- // declare and initialize result array
-#define res(i, j) _res_##i##j
-#define initResultRow(i) \
- Scalar res(i, 0) = conv(0); \
- Scalar res(i, 1) = conv(0); \
- Scalar res(i, 2) = conv(0); \
- Scalar res(i, 3) = conv(0); \
- Scalar res(i, 4) = conv(0); \
- Scalar res(i, 5) = conv(0); \
- Scalar res(i, 6) = conv(0); \
- Scalar res(i, 7) = conv(0); \
-
- internal::scalar_cast_op<int, Scalar> conv;
- initResultRow(0);
- initResultRow(1);
- initResultRow(2);
- initResultRow(3);
- initResultRow(4);
- initResultRow(5);
- initResultRow(6);
- initResultRow(7);
-#undef initResultRow
-
- for (Index base_k = 0; base_k < k_size; base_k += 64) {
- // wait for previous iteration to finish with shmem. Despite common sense,
- // the code is a bit faster with this here then at bottom of loop
- __syncthreads();
-
- prefetchIntoRegisters(base_k);
- writeRegToShmem();
-
- #undef prefetchIntoRegisters
- #undef writeRegToShmem
-
- // wait for shared mem packing to be done before starting computation
- __syncthreads();
-
- // compute 8x8 matrix product by outer product. This involves packing one column
- // of LHS and one row of RHS into registers (takes 16 registers).
-
-#define lcol(i) _lcol##i
- Scalar lcol(0);
- Scalar lcol(1);
- Scalar lcol(2);
- Scalar lcol(3);
- Scalar lcol(4);
- Scalar lcol(5);
- Scalar lcol(6);
- Scalar lcol(7);
-
-#define rrow(j) _rrow##j
- Scalar rrow(0);
- Scalar rrow(1);
- Scalar rrow(2);
- Scalar rrow(3);
- Scalar rrow(4);
- Scalar rrow(5);
- Scalar rrow(6);
- Scalar rrow(7);
-
- // Now x corresponds to k, y to m, and z to n
- const Scalar* lhs_block = &lhs_shmem[threadIdx.x + 9 * threadIdx.y];
- const Scalar* rhs_block = &rhs_shmem[threadIdx.x + 8 * threadIdx.z];
-
-#define lhs_element(i, j) lhs_block[72 * ((i) + 8 * (j))]
-#define rhs_element(i, j) rhs_block[72 * ((i) + 8 * (j))]
-
-#define loadData(i, j) \
- lcol(0) = lhs_element(0, j); \
- rrow(0) = rhs_element(i, 0); \
- lcol(1) = lhs_element(1, j); \
- rrow(1) = rhs_element(i, 1); \
- lcol(2) = lhs_element(2, j); \
- rrow(2) = rhs_element(i, 2); \
- lcol(3) = lhs_element(3, j); \
- rrow(3) = rhs_element(i, 3); \
- lcol(4) = lhs_element(4, j); \
- rrow(4) = rhs_element(i, 4); \
- lcol(5) = lhs_element(5, j); \
- rrow(5) = rhs_element(i, 5); \
- lcol(6) = lhs_element(6, j); \
- rrow(6) = rhs_element(i, 6); \
- lcol(7) = lhs_element(7, j); \
- rrow(7) = rhs_element(i, 7); \
-
-#define computeCol(j) \
- res(0, j) += lcol(0) * rrow(j); \
- res(1, j) += lcol(1) * rrow(j); \
- res(2, j) += lcol(2) * rrow(j); \
- res(3, j) += lcol(3) * rrow(j); \
- res(4, j) += lcol(4) * rrow(j); \
- res(5, j) += lcol(5) * rrow(j); \
- res(6, j) += lcol(6) * rrow(j); \
- res(7, j) += lcol(7) * rrow(j); \
-
-#define computePass(i) \
- loadData(i, i); \
- \
- computeCol(0); \
- computeCol(1); \
- computeCol(2); \
- computeCol(3); \
- computeCol(4); \
- computeCol(5); \
- computeCol(6); \
- computeCol(7); \
-
- computePass(0);
- computePass(1);
- computePass(2);
- computePass(3);
- computePass(4);
- computePass(5);
- computePass(6);
- computePass(7);
-
-#undef lcol
-#undef rrow
-#undef lhs_element
-#undef rhs_element
-#undef loadData
-#undef computeCol
-#undef computePass
- } // end loop over k
-
- // we've now iterated over all of the large (ie width 64) k blocks and
- // accumulated results in registers. At this point thread (x, y, z) contains
- // the sum across all big k blocks of the product of little k block of index (x, y)
- // with block of index (y, z). To compute the final output, we need to reduce
- // the 8 threads over y by summation.
-#define shuffleInc(i, j, mask) res(i, j) += __shfl_xor(res(i, j), mask)
-
-#define reduceRow(i, mask) \
- shuffleInc(i, 0, mask); \
- shuffleInc(i, 1, mask); \
- shuffleInc(i, 2, mask); \
- shuffleInc(i, 3, mask); \
- shuffleInc(i, 4, mask); \
- shuffleInc(i, 5, mask); \
- shuffleInc(i, 6, mask); \
- shuffleInc(i, 7, mask); \
-
-#define reduceMatrix(mask) \
- reduceRow(0, mask); \
- reduceRow(1, mask); \
- reduceRow(2, mask); \
- reduceRow(3, mask); \
- reduceRow(4, mask); \
- reduceRow(5, mask); \
- reduceRow(6, mask); \
- reduceRow(7, mask); \
-
- // actually perform the reduction, now each thread of index (_, y, z)
- // contains the correct values in its registers that belong in the output
- // block
- reduceMatrix(1);
- reduceMatrix(2);
- reduceMatrix(4);
-
-#undef shuffleInc
-#undef reduceRow
-#undef reduceMatrix
-
- // now we need to copy the 64 values into main memory. We can't split work
- // among threads because all variables are in registers. There's 2 ways
- // to do this:
- // (1) have 1 thread do 64 writes from registers into global memory
- // (2) have 1 thread do 64 writes into shared memory, and then 8 threads
- // each do 8 writes into global memory. We can just overwrite the shared
- // memory from the problem we just solved.
- // (2) is slightly faster than (1) due to less branching and more ILP
-
- // TODO: won't yield much gain, but could just use currently unused shared mem
- // and then we won't have to sync
- // wait for shared mem to be out of use
- __syncthreads();
-
-#define writeResultShmem(i, j) \
- lhs_shmem[i + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j] = res(i, j); \
-
-#define writeRow(i) \
- writeResultShmem(i, 0); \
- writeResultShmem(i, 1); \
- writeResultShmem(i, 2); \
- writeResultShmem(i, 3); \
- writeResultShmem(i, 4); \
- writeResultShmem(i, 5); \
- writeResultShmem(i, 6); \
- writeResultShmem(i, 7); \
-
- if (threadIdx.x == 0) {
- writeRow(0);
- writeRow(1);
- writeRow(2);
- writeRow(3);
- writeRow(4);
- writeRow(5);
- writeRow(6);
- writeRow(7);
- }
-#undef writeResultShmem
-#undef writeRow
-
- const int max_i_write = numext::mini((int)((m_size - base_m - threadIdx.y + 7) / 8), 8);
- const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8);
-
- if (threadIdx.x < max_i_write) {
- if (max_j_write == 8) {
- // TODO: can i trade bank conflicts for coalesced writes?
- Scalar val0 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 0];
- Scalar val1 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 1];
- Scalar val2 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 2];
- Scalar val3 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 3];
- Scalar val4 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 4];
- Scalar val5 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 5];
- Scalar val6 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 6];
- Scalar val7 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 7];
-
- output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 0) = val0;
- output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 1) = val1;
- output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 2) = val2;
- output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 3) = val3;
- output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 4) = val4;
- output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 5) = val5;
- output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 6) = val6;
- output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 7) = val7;
- } else {
-#pragma unroll 7
- for (int j = 0; j < max_j_write; j++) {
- Scalar val = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j];
- output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * j) = val;
- }
- }
- }
-#undef res
-}
-
-
-template<typename Scalar, typename Index, typename LhsMapper,
- typename RhsMapper, typename OutputMapper>
-__global__ void
-__launch_bounds__(512)
-EigenContractionKernel(const LhsMapper lhs, const RhsMapper rhs,
- const OutputMapper output,
- const Index m_size, const Index n_size, const Index k_size) {
- __shared__ Scalar lhs_shmem[72 * 64];
- __shared__ Scalar rhs_shmem[72 * 64];
-
- const Index m_block_idx = blockIdx.x;
- const Index n_block_idx = blockIdx.y;
-
- const Index base_m = 64 * m_block_idx;
- const Index base_n = 64 * n_block_idx;
-
- if (base_m + 63 < m_size && base_n + 63 < n_size) {
- EigenContractionKernelInternal<Scalar, Index, LhsMapper, RhsMapper, OutputMapper, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size);
- } else {
- EigenContractionKernelInternal<Scalar, Index, LhsMapper, RhsMapper, OutputMapper, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size);
- }
-}
-
-
-template<typename Index, typename LhsMapper,
- typename RhsMapper, typename OutputMapper, bool CHECK_LHS_BOUNDARY,
- bool CHECK_RHS_BOUNDARY>
-__device__ EIGEN_STRONG_INLINE void
-EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rhs,
- const OutputMapper output, float2 lhs_shmem2[][16],
- float2 rhs_shmem2[][8], const Index m_size,
- const Index n_size, const Index k_size,
- const Index base_m, const Index base_n) {
- typedef float Scalar;
-
- // prefetch registers
- float4 lhs_pf0, rhs_pf0;
-
- float4 results[4];
- for (int i=0; i < 4; i++) {
- results[i].x = results[i].y = results[i].z = results[i].w = 0;
- }
-
-
-#define prefetch_lhs(reg, row, col) \
- if (!CHECK_LHS_BOUNDARY) { \
- if (col < k_size) { \
- reg =lhs.loadPacket<Unaligned>(row, col); \
- } \
- } else { \
- if (col < k_size) { \
- if (row + 3 < m_size) { \
- reg =lhs.loadPacket<Unaligned>(row, col); \
- } else if (row + 2 < m_size) { \
- reg.x =lhs(row + 0, col); \
- reg.y =lhs(row + 1, col); \
- reg.z =lhs(row + 2, col); \
- } else if (row + 1 < m_size) { \
- reg.x =lhs(row + 0, col); \
- reg.y =lhs(row + 1, col); \
- } else if (row < m_size) { \
- reg.x =lhs(row + 0, col); \
- } \
- } \
- } \
-
-
- Index lhs_vert = base_m+threadIdx.x*4;
-
- for (Index k = 0; k < k_size; k += 16) {
- lhs_pf0 = internal::pset1<float4>(0);
- rhs_pf0 = internal::pset1<float4>(0);
-
- Index lhs_horiz = threadIdx.y+k;
- prefetch_lhs(lhs_pf0, lhs_vert, lhs_horiz)
-
- Index rhs_vert = k+(threadIdx.x%4)*4;
- Index rhs_horiz0 = (threadIdx.x>>2)+threadIdx.y*4+base_n;
-
- if (!CHECK_RHS_BOUNDARY) {
- if ((rhs_vert + 3) < k_size) {
- // just CHECK_RHS_BOUNDARY
- rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
- } else if (rhs_vert + 2 < k_size) {
- // just CHECK_RHS_BOUNDARY
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
- rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0);
- } else if (rhs_vert + 1 < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
- } else if (rhs_vert < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- }
- } else {
- if (rhs_horiz0 < n_size) {
- if ((rhs_vert + 3) < k_size) {
- rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
- } else if ((rhs_vert + 2) < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
- rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0);
- } else if ((rhs_vert + 1) < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
- } else if (rhs_vert < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- }
- }
- }
- float x1, x2 ;
- // the following can be a bitwise operation..... some day.
- if((threadIdx.x%8) < 4) {
- x1 = rhs_pf0.y;
- x2 = rhs_pf0.w;
- } else {
- x1 = rhs_pf0.x;
- x2 = rhs_pf0.z;
- }
- x1 = __shfl_xor(x1, 4);
- x2 = __shfl_xor(x2, 4);
- if((threadIdx.x%8) < 4) {
- rhs_pf0.y = x1;
- rhs_pf0.w = x2;
- } else {
- rhs_pf0.x = x1;
- rhs_pf0.z = x2;
- }
-
- // We have 64 features.
- // Row 0 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 0, 1.
- // Row 1 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 2, 3.
- // ...
- // Row 31 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 62, 63
- // Row 32 -> times (2, 6, 10, 14, 3, 7, 11, 15) for features 0, 1
- // ...
- rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2][threadIdx.x%8] = make_float2(rhs_pf0.x, rhs_pf0.y);
- rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2+32][threadIdx.x%8] = make_float2(rhs_pf0.z, rhs_pf0.w);
-
- // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61)
- // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61)
- // ...
- // Row 15 (time 15) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61)
- // Row 16 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63)
- // ...
-
- lhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(lhs_pf0.x, lhs_pf0.y);
- lhs_shmem2[threadIdx.y+16][threadIdx.x] = make_float2(lhs_pf0.z, lhs_pf0.w);
-
-
-#define add_vals(fl1, fl2, fr1, fr2)\
- results[0].x += fl1.x * fr1.x;\
- results[0].y += fl1.y * fr1.x;\
- results[0].z += fl2.x * fr1.x;\
- results[0].w += fl2.y * fr1.x;\
-\
- results[1].x += fl1.x * fr1.y;\
- results[1].y += fl1.y * fr1.y;\
- results[1].z += fl2.x * fr1.y;\
- results[1].w += fl2.y * fr1.y;\
-\
- results[2].x += fl1.x * fr2.x;\
- results[2].y += fl1.y * fr2.x;\
- results[2].z += fl2.x * fr2.x;\
- results[2].w += fl2.y * fr2.x;\
-\
- results[3].x += fl1.x * fr2.y;\
- results[3].y += fl1.y * fr2.y;\
- results[3].z += fl2.x * fr2.y;\
- results[3].w += fl2.y * fr2.y;\
-
- __syncthreads();
-
- // Do the multiplies.
- #pragma unroll
- for (int koff = 0; koff < 16; koff ++) {
- // 32 x threads.
- float2 fl1 = lhs_shmem2[koff][threadIdx.x];
- float2 fl2 = lhs_shmem2[koff + 16][threadIdx.x];
-
- int start_feature = threadIdx.y * 4;
- float2 fr1 = rhs_shmem2[(start_feature>>1) + 32*((koff%4)/2)][koff/4 + (koff%2)*4];
- float2 fr2 = rhs_shmem2[(start_feature>>1) + 1 + 32*((koff%4)/2)][koff/4 + (koff%2)*4];
-
- add_vals(fl1, fl2, fr1, fr2)
- }
- __syncthreads();
- }
-
-#undef prefetch_lhs
-#undef add_vals
-
- Index horiz_base = threadIdx.y*4+base_n;
- if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) {
- for (int i = 0; i < 4; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- output(lhs_vert + 3, horiz_base + i) = results[i].w;
- }
- } else if (!CHECK_RHS_BOUNDARY) {
- // CHECK LHS
- if (lhs_vert + 3 < m_size) {
- for (int i = 0; i < 4; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- output(lhs_vert + 3, horiz_base + i) = results[i].w;
- }
- } else if (lhs_vert + 2 < m_size) {
- for (int i = 0; i < 4; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- }
- } else if (lhs_vert + 1 < m_size) {
- for (int i = 0; i < 4; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- }
- } else if (lhs_vert < m_size) {
- for (int i = 0; i < 4; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- }
- }
- } else if (!CHECK_LHS_BOUNDARY) {
- // CHECK RHS
- /*
- int ncols_rem = fminf(n_size- horiz_base, 4);
- for (int i = 0; i < ncols_rem; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- output(lhs_vert + 3, horiz_base + i) = results[i].w;
- }*/
- for (int i = 0; i < 4; i++) {
- if (horiz_base+i < n_size) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- output(lhs_vert + 3, horiz_base + i) = results[i].w;
- }
- }
- } else {
- // CHECK both boundaries.
- for (int i = 0; i < 4; i++) {
- if (horiz_base+i < n_size) {
- if (lhs_vert < m_size)
- output(lhs_vert, horiz_base + i) = results[i].x;
- if (lhs_vert + 1 < m_size)
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- if (lhs_vert + 2 < m_size)
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- if (lhs_vert + 3 < m_size)
- output(lhs_vert + 3, horiz_base + i) = results[i].w;
- }
- }
- }
-}
-
-
-template<typename Index, typename LhsMapper,
- typename RhsMapper, typename OutputMapper, bool CHECK_LHS_BOUNDARY,
- bool CHECK_RHS_BOUNDARY>
-__device__ EIGEN_STRONG_INLINE void
-EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
- const OutputMapper output, float2 lhs_shmem2[][32],
- float2 rhs_shmem2[][8], const Index m_size,
- const Index n_size, const Index k_size,
- const Index base_m, const Index base_n) {
- typedef float Scalar;
-
- // prefetch registers
- float4 lhs_pf0, lhs_pf1, lhs_pf2, lhs_pf3;
- float4 rhs_pf0, rhs_pf1;
-
- float4 results[8];
- for (int i=0; i < 8; i++) {
- results[i].x = results[i].y = results[i].z = results[i].w = 0;
- }
-
-
- Index lhs_vert = base_m+threadIdx.x*4+(threadIdx.y%4)*32;
- for (Index k = 0; k < k_size; k += 32) {
- lhs_pf0 = internal::pset1<float4>(0);
- lhs_pf1 = internal::pset1<float4>(0);
- lhs_pf2 = internal::pset1<float4>(0);
- lhs_pf3 = internal::pset1<float4>(0);
-
- rhs_pf0 = internal::pset1<float4>(0);
- rhs_pf1 = internal::pset1<float4>(0);
-
- if (!CHECK_LHS_BOUNDARY) {
- if ((threadIdx.y/4+k+24) < k_size) {
- lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
- lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
- lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
- lhs_pf3 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+24));
- } else if ((threadIdx.y/4+k+16) < k_size) {
- lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
- lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
- lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
- } else if ((threadIdx.y/4+k+8) < k_size) {
- lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
- lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
- } else if ((threadIdx.y/4+k) < k_size) {
- lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
- }
- } else {
- // just CHECK_LHS_BOUNDARY
- if (lhs_vert + 3 < m_size) {
- if ((threadIdx.y/4+k+24) < k_size) {
- lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
- lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
- lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
- lhs_pf3 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+24));
- } else if ((threadIdx.y/4+k+16) < k_size) {
- lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
- lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
- lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
- } else if ((threadIdx.y/4+k+8) < k_size) {
- lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
- lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
- } else if ((threadIdx.y/4+k) < k_size) {
- lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
- }
- } else if (lhs_vert + 2 < m_size) {
- if ((threadIdx.y/4+k+24) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
- lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k));
- lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
- lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
- lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8));
- lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
- lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16));
- lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16));
- lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24));
- lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24));
- lhs_pf3.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+24));
- } else if ((threadIdx.y/4+k+16) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
- lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k));
- lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
- lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
- lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8));
- lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
- lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16));
- lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16));
- } else if ((threadIdx.y/4+k+8) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
- lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k));
- lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
- lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
- lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8));
- } else if ((threadIdx.y/4+k) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
- lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k));
- }
- } else if (lhs_vert + 1 < m_size) {
- if ((threadIdx.y/4+k+24) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
- lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
- lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
- lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
- lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16));
- lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24));
- lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24));
- } else if ((threadIdx.y/4+k+16) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
- lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
- lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
- lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
- lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16));
- } else if ((threadIdx.y/4+k+8) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
- lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
- lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8));
- } else if ((threadIdx.y/4+k) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k));
- }
- } else if (lhs_vert < m_size) {
- if ((threadIdx.y/4+k+24) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
- lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
- lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24));
- } else if ((threadIdx.y/4+k+16) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
- lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16));
- } else if ((threadIdx.y/4+k+8) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8));
- } else if ((threadIdx.y/4+k) < k_size) {
- lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k));
- }
- }
- }
- __syncthreads();
- Index rhs_vert = k+threadIdx.x*4;
- Index rhs_horiz0 = threadIdx.y*2+base_n;
- Index rhs_horiz1 = threadIdx.y*2+1+base_n;
- if (!CHECK_RHS_BOUNDARY) {
- if ((rhs_vert + 3) < k_size) {
- // just CHECK_RHS_BOUNDARY
- rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
- rhs_pf1 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz1);
- } else if (rhs_vert + 2 < k_size) {
- // just CHECK_RHS_BOUNDARY
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
- rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0);
- rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
- rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1);
- rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1);
- } else if (rhs_vert + 1 < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
- rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
- rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1);
- } else if (rhs_vert < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
- }
- } else {
- if (rhs_horiz1 < n_size) {
- if ((rhs_vert + 3) < k_size) {
- // just CHECK_RHS_BOUNDARY
- rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
- rhs_pf1 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz1);
- } else if (rhs_vert + 2 < k_size) {
- // just CHECK_RHS_BOUNDARY
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
- rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0);
- rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
- rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1);
- rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1);
- } else if (k+threadIdx.x*4 + 1 < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
- rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
- rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1);
- } else if (k+threadIdx.x*4 < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf1.x = rhs(rhs_vert, rhs_horiz1);
- }
- } else if (rhs_horiz0 < n_size) {
- if ((rhs_vert + 3) < k_size) {
- // just CHECK_RHS_BOUNDARY
- rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
- } else if ((rhs_vert + 2) < k_size) {
- // just CHECK_RHS_BOUNDARY
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
- rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0);
- } else if ((rhs_vert + 1) < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
- } else if (rhs_vert < k_size) {
- rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
- }
- }
- }
- __syncthreads();
- // Loaded. Do computation
- // Row 0 -> times (0, 4, 8, .. 28) for features 0, 1.
- // Row 1 -> times (0, 4, 8, .. 28) for features 2, 3.
- // ..
- // Row 31 -> times (0, 4, 8, .. 28) for features 62, 63
- rhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(rhs_pf0.x, rhs_pf1.x);
- // Row 32 -> times (1, 5, 9, .. 29) for features 0, 1.
- // Row 33 -> times (1, 5, 9, .. 29) for features 2, 3.
- // ..
- rhs_shmem2[threadIdx.y+32][threadIdx.x] = make_float2(rhs_pf0.y, rhs_pf1.y);
- // Row 64 -> times (2, 6, 10, .. 30) for features 0, 1.
- // Row 65 -> times (2, 6, 10, .. 30) for features 2, 3.
- rhs_shmem2[threadIdx.y+64][threadIdx.x] = make_float2(rhs_pf0.z, rhs_pf1.z);
- // Row 96 -> times (3, 7, 11, .. 31) for features 0, 1.
- // Row 97 -> times (3, 7, 11, .. 31) for features 2, 3.
- rhs_shmem2[threadIdx.y+96][threadIdx.x] = make_float2(rhs_pf0.w, rhs_pf1.w);
-
- // LHS.
- // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125)
- // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125)
- // ...
- // Row 8 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127)
- // Row 15 (time 7) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127)
-
-
-#define add_vals(a_feat1, a_feat2, f1, f2, f3, f4)\
- results[0].x += a_feat1.x * f1.x;\
- results[1].x += a_feat1.x * f1.y;\
- results[2].x += a_feat1.x * f2.x;\
- results[3].x += a_feat1.x * f2.y;\
- results[4].x += a_feat1.x * f3.x;\
- results[5].x += a_feat1.x * f3.y;\
- results[6].x += a_feat1.x * f4.x;\
- results[7].x += a_feat1.x * f4.y;\
-\
- results[0].y += a_feat1.y * f1.x;\
- results[1].y += a_feat1.y * f1.y;\
- results[2].y += a_feat1.y * f2.x;\
- results[3].y += a_feat1.y * f2.y;\
- results[4].y += a_feat1.y * f3.x;\
- results[5].y += a_feat1.y * f3.y;\
- results[6].y += a_feat1.y * f4.x;\
- results[7].y += a_feat1.y * f4.y;\
-\
- results[0].z += a_feat2.x * f1.x;\
- results[1].z += a_feat2.x * f1.y;\
- results[2].z += a_feat2.x * f2.x;\
- results[3].z += a_feat2.x * f2.y;\
- results[4].z += a_feat2.x * f3.x;\
- results[5].z += a_feat2.x * f3.y;\
- results[6].z += a_feat2.x * f4.x;\
- results[7].z += a_feat2.x * f4.y;\
-\
- results[0].w += a_feat2.y * f1.x;\
- results[1].w += a_feat2.y * f1.y;\
- results[2].w += a_feat2.y * f2.x;\
- results[3].w += a_feat2.y * f2.y;\
- results[4].w += a_feat2.y * f3.x;\
- results[5].w += a_feat2.y * f3.y;\
- results[6].w += a_feat2.y * f4.x;\
- results[7].w += a_feat2.y * f4.y;\
-
- lhs_shmem2[threadIdx.y/4][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.x, lhs_pf0.y);
- lhs_shmem2[threadIdx.y/4+8][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.x, lhs_pf1.y);
- lhs_shmem2[threadIdx.y/4+16][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.x, lhs_pf2.y);
- lhs_shmem2[threadIdx.y/4+24][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.x, lhs_pf3.y);
-
- lhs_shmem2[threadIdx.y/4 + 32][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.z, lhs_pf0.w);
- lhs_shmem2[threadIdx.y/4 + 40][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.z, lhs_pf1.w);
- lhs_shmem2[threadIdx.y/4 + 48][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.z, lhs_pf2.w);
- lhs_shmem2[threadIdx.y/4 + 56][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.z, lhs_pf3.w);
-
- __syncthreads();
-
- // Do the multiplies.
- #pragma unroll
- for (int koff = 0; koff < 32; koff ++) {
- float2 a3 = lhs_shmem2[koff][threadIdx.x + (threadIdx.y % 4) * 8];
- float2 a4 = lhs_shmem2[koff + 32][threadIdx.x + (threadIdx.y % 4) * 8];
-
- // first feature is at (threadIdx.y/4) * 8 last is at start + 8.
- int start_feature = (threadIdx.y / 4) * 8;
-
- float2 br1 = rhs_shmem2[start_feature/2 + (koff % 4) * 32][koff/4];
- float2 br2 = rhs_shmem2[start_feature/2 + 1 + (koff % 4) * 32][koff/4];
- float2 br3 = rhs_shmem2[start_feature/2 + 2 + (koff % 4) * 32][koff/4];
- float2 br4 = rhs_shmem2[start_feature/2 + 3 + (koff % 4) * 32][koff/4];
-
- add_vals(a3, a4, br1, br2, br3, br4)
- }
- __syncthreads();
- } // end loop over k
-
-
- __syncthreads();
- Index horiz_base = (threadIdx.y/4)*8+base_n;
- if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) {
- for (int i = 0; i < 8; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- output(lhs_vert + 3, horiz_base + i) = results[i].w;
- }
- } else if (!CHECK_RHS_BOUNDARY) {
- if (lhs_vert + 3 < m_size) {
- for (int i = 0; i < 8; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- output(lhs_vert + 3, horiz_base + i) = results[i].w;
- }
- } else if (lhs_vert + 2 < m_size) {
- for (int i = 0; i < 8; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- }
- } else if (lhs_vert + 1 < m_size) {
- for (int i = 0; i < 8; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- }
- } else if (lhs_vert < m_size) {
- for (int i = 0; i < 8; i++) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- }
- }
- } else if (!CHECK_LHS_BOUNDARY) {
- // CHECK BOUNDARY_B
- for (int i = 0; i < 8; i++) {
- if (horiz_base + i < n_size) {
- output(lhs_vert, horiz_base + i) = results[i].x;
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- output(lhs_vert + 3, horiz_base + i) = results[i].w;
- }
- }
- } else {
- // CHECK both boundaries.
- for (int i = 0; i < 8; i++) {
- if (horiz_base + i < n_size) {
- if (lhs_vert < m_size)
- output(lhs_vert, horiz_base + i) = results[i].x;
- if (lhs_vert + 1 < m_size)
- output(lhs_vert + 1, horiz_base + i) = results[i].y;
- if (lhs_vert + 2 < m_size)
- output(lhs_vert + 2, horiz_base + i) = results[i].z;
- if (lhs_vert + 3 < m_size)
- output(lhs_vert + 3, horiz_base + i) = results[i].w;
- }
- }
- }
-}
-
-
-template<typename Index, typename LhsMapper,
- typename RhsMapper, typename OutputMapper>
-__global__ void
-__launch_bounds__(256)
-EigenFloatContractionKernel(const LhsMapper lhs, const RhsMapper rhs,
- const OutputMapper output,
- const Index m_size, const Index n_size, const Index k_size) {
- __shared__ float2 lhs_shmem[64*32];
- __shared__ float2 rhs_shmem[128*8];
-
- typedef float2 LHS_MEM[64][32];
- typedef float2 RHS_MEM[128][8];
-
- typedef float2 LHS_MEM16x16[32][16];
- typedef float2 RHS_MEM16x16[64][8];
-
- const Index m_block_idx = blockIdx.x;
- const Index n_block_idx = blockIdx.y;
-
- const Index base_m = 128 * m_block_idx;
- const Index base_n = 64 * n_block_idx;
-
- bool check_rhs = (base_n + 63) >= n_size;
- bool check_lhs128 = (base_m + 127) >= m_size;
-
- if (!check_rhs) {
- if (!check_lhs128) {
- // >= 128 rows left
- EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, false, false>(
- lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n);
- } else {
- EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, true, false>(
- lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n);
- }
- } else {
- if (!check_lhs128) {
- // >= 128 rows left
- EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, false, true>(
- lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n);
- } else {
- EigenFloatContractionKernelInternal<Index, LhsMapper, RhsMapper, OutputMapper, true, true>(
- lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n);
- }
- }
-}
-
-template<typename Index, typename LhsMapper,
- typename RhsMapper, typename OutputMapper>
-__global__ void
-__launch_bounds__(256)
-EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs,
- const OutputMapper output,
- const Index m_size, const Index n_size, const Index k_size) {
- __shared__ float2 lhs_shmem[32][16];
- __shared__ float2 rhs_shmem[64][8];
-
- const Index m_block_idx = blockIdx.x;
- const Index n_block_idx = blockIdx.y;
-
- const Index base_m = 64 * m_block_idx;
- const Index base_n = 64 * n_block_idx;
-
- if (base_m + 63 < m_size) {
- if (base_n + 63 < n_size) {
- EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, false, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n);
- } else {
- EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, false, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n);
- }
- } else {
- if (base_n + 63 < n_size) {
- EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, true, false>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n);
- } else {
- EigenFloatContractionKernelInternal16x16<Index, LhsMapper, RhsMapper, OutputMapper, true, true>(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n);
- }
- }
-}
-
-
-template<typename Indices, typename LeftArgType, typename RightArgType>
-struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> :
- public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> > {
-
- typedef GpuDevice Device;
-
- typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self;
- typedef TensorContractionEvaluatorBase<Self> Base;
-
- typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
- typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
- typedef typename XprType::Index Index;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
-
- enum {
- Layout = TensorEvaluator<LeftArgType, Device>::Layout,
- };
-
- // Most of the code is assuming that both input tensors are ColMajor. If the
- // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
- // If we want to compute A * B = C, where A is LHS and B is RHS, the code
- // will pretend B is LHS and A is RHS.
- typedef typename internal::conditional<
- static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
- typedef typename internal::conditional<
- static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
-
- static const int LDims =
- internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
- static const int RDims =
- internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
- static const int ContractDims = internal::array_size<Indices>::value;
-
- typedef array<Index, LDims> left_dim_mapper_t;
- typedef array<Index, RDims> right_dim_mapper_t;
-
- typedef array<Index, ContractDims> contract_t;
- typedef array<Index, LDims - ContractDims> left_nocontract_t;
- typedef array<Index, RDims - ContractDims> right_nocontract_t;
-
- static const int NumDims = LDims + RDims - 2 * ContractDims;
-
- typedef DSizes<Index, NumDims> Dimensions;
-
- // typedefs needed in evalTo
- typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
- typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
-
- typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
- typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
-
- typedef typename LeftEvaluator::Dimensions LeftDimensions;
- typedef typename RightEvaluator::Dimensions RightDimensions;
-
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
- Base(op, device) {}
-
- // We need to redefine this method to make nvcc happy
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
- this->m_leftImpl.evalSubExprsIfNeeded(NULL);
- this->m_rightImpl.evalSubExprsIfNeeded(NULL);
- if (data) {
- evalTo(data);
- return false;
- } else {
- this->m_result = static_cast<Scalar *>(this->m_device.allocate(this->dimensions().TotalSize() * sizeof(Scalar)));
- evalTo(this->m_result);
- return true;
- }
- }
-
- void evalTo(Scalar* buffer) const {
- if (this->m_lhs_inner_dim_contiguous) {
- if (this->m_rhs_inner_dim_contiguous) {
- if (this->m_rhs_inner_dim_reordered) {
- evalTyped<true, true, true, Unaligned>(buffer);
- }
- else {
- evalTyped<true, true, false, Unaligned>(buffer);
- }
- }
- else {
- if (this->m_rhs_inner_dim_reordered) {
- evalTyped<true, false, true, Unaligned>(buffer);
- }
- else {
- evalTyped<true, false, false, Unaligned>(buffer);
- }
- }
- }
- else {
- if (this->m_rhs_inner_dim_contiguous) {
- if (this->m_rhs_inner_dim_reordered) {
- evalTyped<false, true, true, Unaligned>(buffer);
- }
- else {
- evalTyped<false, true, false, Unaligned>(buffer);
- }
- }
- else {
- if (this->m_rhs_inner_dim_reordered) {
- evalTyped<false, false, true, Unaligned>(buffer);
- }
- else {
- evalTyped<false, false, false, Unaligned>(buffer);
- }
- }
- }
- }
-
- template <typename LhsScalar, typename RhsScalar, typename Index, typename LhsMapper, typename RhsMapper, typename OutputMapper> struct LaunchKernels {
- static void Run(const LhsMapper& lhs, const RhsMapper& rhs, const OutputMapper& output, Index m, Index n, Index k, const GpuDevice& device) {
- const Index m_blocks = (m + 63) / 64;
- const Index n_blocks = (n + 63) / 64;
- const dim3 num_blocks(m_blocks, n_blocks, 1);
- const dim3 block_size(8, 8, 8);
- LAUNCH_CUDA_KERNEL((EigenContractionKernel<Scalar, Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k);
- }
- };
-
- template <typename Index, typename LhsMapper, typename RhsMapper, typename OutputMapper> struct LaunchKernels<float, float, Index, LhsMapper, RhsMapper, OutputMapper> {
- static void Run(const LhsMapper& lhs, const RhsMapper& rhs, const OutputMapper& output, Index m, Index n, Index k, const GpuDevice& device) {
- if (m < 768 || n < 768) {
- const Index m_blocks = (m + 63) / 64;
- const Index n_blocks = (n + 63) / 64;
- const dim3 num_blocks(m_blocks, n_blocks, 1);
- const dim3 block_size(16, 16, 1);
- LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel16x16<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k);
- } else {
- const Index m_blocks = (m + 127) / 128;
- const Index n_blocks = (n + 63) / 64;
- const dim3 num_blocks(m_blocks, n_blocks, 1);
- const dim3 block_size(8, 32, 1);
- LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel<Index, LhsMapper, RhsMapper, OutputMapper>), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k);
- }
- }
- };
-
- template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
- void evalTyped(Scalar* buffer) const {
- // columns in left side, rows in right side
- const Index k = this->m_k_size;
- EIGEN_UNUSED_VARIABLE(k)
-
- // rows in left side
- const Index m = this->m_i_size;
-
- // columns in right side
- const Index n = this->m_j_size;
-
- // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
- this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
-
- typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
- LeftEvaluator, left_nocontract_t,
- contract_t, 4,
- lhs_inner_dim_contiguous,
- false, Unaligned> LhsMapper;
-
- typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
- RightEvaluator, right_nocontract_t,
- contract_t, 4,
- rhs_inner_dim_contiguous,
- rhs_inner_dim_reordered, Unaligned> RhsMapper;
-
- typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
-
-
- // initialize data mappers
- LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides,
- this->m_left_contracting_strides, this->m_k_strides);
-
- RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides,
- this->m_right_contracting_strides, this->m_k_strides);
-
- OutputMapper output(buffer, m);
-
- setCudaSharedMemConfig(cudaSharedMemBankSizeEightByte);
- LaunchKernels<LhsScalar, RhsScalar, Index, LhsMapper, RhsMapper, OutputMapper>::Run(lhs, rhs, output, m, n, k, this->m_device);
- }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_USE_GPU and __CUDACC__
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
deleted file mode 100644
index 9b2cb3f..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
+++ /dev/null
@@ -1,467 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
-
-namespace Eigen {
-
-namespace internal {
-
-enum {
- Rhs = 0,
- Lhs = 1
-};
-
-/*
- * Implementation of the Eigen blas_data_mapper class for tensors.
- */
-
-template <typename Tensor, bool HasRawAccess> struct CoeffLoader {
- enum {
- DirectOffsets = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_tensor(tensor) { }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index) {
- eigen_assert(false && "unsupported");
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return m_tensor.coeff(index); }
-
- template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- typename Tensor::PacketReturnType packet(typename Tensor::Index index) const
- {
- return m_tensor.template packet<LoadMode>(index);
- }
-
-
- private:
- const Tensor m_tensor;
-};
-
-template <typename Tensor> struct CoeffLoader<Tensor, true> {
- enum {
- DirectOffsets = true
- };
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_data(tensor.data()) {}
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) {
- m_data += offset;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return loadConstant(m_data+index); }
-
- template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- typename Tensor::PacketReturnType packet(typename Tensor::Index index) const
- {
- return internal::ploadt_ro<typename Tensor::PacketReturnType, LoadMode>(m_data + index);
- }
- private:
- typedef typename Tensor::Scalar Scalar;
- const Scalar* m_data;
-};
-
-template<typename Scalar, typename Index, int side,
- typename Tensor,
- typename nocontract_t, typename contract_t,
- int packet_size, bool inner_dim_contiguous, int Alignment>
-class SimpleTensorContractionMapper {
- public:
- EIGEN_DEVICE_FUNC
- SimpleTensorContractionMapper(const Tensor& tensor,
- const nocontract_t& nocontract_strides,
- const nocontract_t& ij_strides,
- const contract_t& contract_strides,
- const contract_t& k_strides) :
- m_tensor(tensor),
- m_nocontract_strides(nocontract_strides),
- m_ij_strides(ij_strides),
- m_contract_strides(contract_strides),
- m_k_strides(k_strides) { }
-
- enum {
- DirectOffsets = CoeffLoader<Tensor, Tensor::RawAccess>::DirectOffsets
- };
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) {
- m_tensor.offsetBuffer(offset);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar operator()(Index row) const {
- // column major assumption
- return operator()(row, 0);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const {
- return m_tensor.coeff(computeIndex(row, col));
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const {
- const bool left = (side == Lhs);
- Index nocontract_val = left ? row : col;
- Index linidx = 0;
- for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
- const Index idx = nocontract_val / m_ij_strides[i];
- linidx += idx * m_nocontract_strides[i];
- nocontract_val -= idx * m_ij_strides[i];
- }
- if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
- if (side == Lhs && inner_dim_contiguous) {
- eigen_assert(m_nocontract_strides[0] == 1);
- linidx += nocontract_val;
- } else {
- linidx += nocontract_val * m_nocontract_strides[0];
- }
- }
-
- Index contract_val = left ? col : row;
- if(array_size<contract_t>::value > 0) {
- for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
- const Index idx = contract_val / m_k_strides[i];
- linidx += idx * m_contract_strides[i];
- contract_val -= idx * m_k_strides[i];
- }
-
- if (side == Rhs && inner_dim_contiguous) {
- eigen_assert(m_contract_strides[0] == 1);
- linidx += contract_val;
- } else {
- linidx += contract_val * m_contract_strides[0];
- }
- }
-
- return linidx;
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE IndexPair<Index> computeIndexPair(Index row, Index col, const Index distance) const {
- const bool left = (side == Lhs);
- Index nocontract_val[2] = {left ? row : col, left ? row + distance : col};
- Index linidx[2] = {0, 0};
- if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
- for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
- const Index idx0 = nocontract_val[0] / m_ij_strides[i];
- const Index idx1 = nocontract_val[1] / m_ij_strides[i];
- linidx[0] += idx0 * m_nocontract_strides[i];
- linidx[1] += idx1 * m_nocontract_strides[i];
- nocontract_val[0] -= idx0 * m_ij_strides[i];
- nocontract_val[1] -= idx1 * m_ij_strides[i];
- }
- if (side == Lhs && inner_dim_contiguous) {
- eigen_assert(m_nocontract_strides[0] == 1);
- linidx[0] += nocontract_val[0];
- linidx[1] += nocontract_val[1];
- } else {
- linidx[0] += nocontract_val[0] * m_nocontract_strides[0];
- linidx[1] += nocontract_val[1] * m_nocontract_strides[0];
- }
- }
-
- Index contract_val[2] = {left ? col : row, left ? col : row + distance};
- if (array_size<contract_t>::value> 0) {
- for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
- const Index idx0 = contract_val[0] / m_k_strides[i];
- const Index idx1 = contract_val[1] / m_k_strides[i];
- linidx[0] += idx0 * m_contract_strides[i];
- linidx[1] += idx1 * m_contract_strides[i];
- contract_val[0] -= idx0 * m_k_strides[i];
- contract_val[1] -= idx1 * m_k_strides[i];
- }
-
- if (side == Rhs && inner_dim_contiguous) {
- eigen_assert(m_contract_strides[0] == 1);
- linidx[0] += contract_val[0];
- linidx[1] += contract_val[1];
- } else {
- linidx[0] += contract_val[0] * m_contract_strides[0];
- linidx[1] += contract_val[1] * m_contract_strides[0];
- }
- }
- return IndexPair<Index>(linidx[0], linidx[1]);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index firstAligned(Index size) const {
- // Only claim alignment when we can compute the actual stride (ie when we're
- // dealing with the lhs with inner_dim_contiguous. This is because the
- // matrix-vector product relies on the stride when dealing with aligned inputs.
- return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size;
- }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const {
- return ((side == Lhs) && inner_dim_contiguous && array_size<contract_t>::value > 0) ? m_contract_strides[0] : 1;
- }
-
- protected:
- CoeffLoader<Tensor, Tensor::RawAccess> m_tensor;
- const nocontract_t m_nocontract_strides;
- const nocontract_t m_ij_strides;
- const contract_t m_contract_strides;
- const contract_t m_k_strides;
-};
-
-
-template<typename Scalar, typename Index, int side,
- typename Tensor,
- typename nocontract_t, typename contract_t,
- int packet_size, bool inner_dim_contiguous,
- bool inner_dim_reordered, int Alignment>
-class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, Alignment>
-{
- public:
- typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, Alignment> ParentMapper;
-
- EIGEN_DEVICE_FUNC
- BaseTensorContractionMapper(const Tensor& tensor,
- const nocontract_t& nocontract_strides,
- const nocontract_t& ij_strides,
- const contract_t& contract_strides,
- const contract_t& k_strides) :
- ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
-
- typedef typename Tensor::PacketReturnType Packet;
- typedef typename unpacket_traits<Packet>::half HalfPacket;
-
- template <int AlignmentType>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
- // whole method makes column major assumption
-
- // don't need to add offsets for now (because operator handles that)
- // current code assumes packet size must be a multiple of 2
- EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) {
- const Index index = this->computeIndex(i, j);
- eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1);
- return this->m_tensor.template packet<AlignmentType>(index);
- }
-
- const IndexPair<Index> indexPair = this->computeIndexPair(i, j, packet_size - 1);
- const Index first = indexPair.first;
- const Index last = indexPair.second;
-
- // We can always do optimized packet reads from left hand side right now, because
- // the vertical matrix dimension on the left hand side is never contracting.
- // On the right hand side we need to check if the contracting dimensions may have
- // been shuffled first.
- if (Tensor::PacketAccess &&
- (side == Lhs || internal::array_size<contract_t>::value <= 1 || !inner_dim_reordered) &&
- (last - first) == (packet_size - 1)) {
-
- return this->m_tensor.template packet<AlignmentType>(first);
- }
-
- EIGEN_ALIGN_MAX Scalar data[packet_size];
-
- data[0] = this->m_tensor.coeff(first);
- for (Index k = 1; k < packet_size - 1; k += 2) {
- const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1);
- data[k] = this->m_tensor.coeff(internal_pair.first);
- data[k + 1] = this->m_tensor.coeff(internal_pair.second);
- }
- data[packet_size - 1] = this->m_tensor.coeff(last);
-
- return pload<Packet>(data);
- }
-
- template <int AlignmentType>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
- // whole method makes column major assumption
-
- // don't need to add offsets for now (because operator handles that)
- const Index half_packet_size = unpacket_traits<HalfPacket>::size;
- if (half_packet_size == packet_size) {
- return loadPacket<AlignmentType>(i, j);
- }
- EIGEN_ALIGN_MAX Scalar data[half_packet_size];
- for (Index k = 0; k < half_packet_size; k++) {
- data[k] = operator()(i + k, j);
- }
- return pload<HalfPacket>(data);
- }
-};
-
-
-template<typename Scalar, typename Index, int side,
- typename Tensor,
- typename nocontract_t, typename contract_t,
- bool inner_dim_contiguous,
- bool inner_dim_reordered, int Alignment>
-class BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, Alignment> : public SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment>
-{
- public:
- typedef SimpleTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, 1, inner_dim_contiguous, Alignment> ParentMapper;
-
- EIGEN_DEVICE_FUNC
- BaseTensorContractionMapper(const Tensor& tensor,
- const nocontract_t& nocontract_strides,
- const nocontract_t& ij_strides,
- const contract_t& contract_strides,
- const contract_t& k_strides) :
- ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
-
- typedef typename Tensor::PacketReturnType Packet;
- template <int> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
- EIGEN_ALIGN_MAX Scalar data[1];
- data[0] = this->m_tensor.coeff(this->computeIndex(i, j));
- return pload<typename Tensor::PacketReturnType>(data);
- }
- template <int> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const {
- return loadPacket(i, j);
- }
-};
-
-
-template<typename Scalar, typename Index, int side,
- typename Tensor,
- typename nocontract_t, typename contract_t,
- int packet_size,
- bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
-class TensorContractionSubMapper {
- public:
- typedef typename Tensor::PacketReturnType Packet;
- typedef typename unpacket_traits<Packet>::half HalfPacket;
-
- typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper;
- typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self;
- typedef Self LinearMapper;
-
- enum {
- // We can use direct offsets iff the parent mapper supports then and we can compute the strides.
- // TODO: we should also enable direct offsets for the Rhs case.
- UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size<contract_t>::value > 0)
- };
-
- EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)
- : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) {
- // Bake the offsets into the buffer used by the base mapper whenever possible. This avoids the need to recompute
- // this offset every time we attempt to access a coefficient.
- if (UseDirectOffsets) {
- Index stride = m_base_mapper.stride();
- m_base_mapper.offsetBuffer(vert_offset + horiz_offset * stride);
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
- if (UseDirectOffsets) {
- return m_base_mapper(i, 0);
- }
- return m_base_mapper(i + m_vert_offset, m_horiz_offset);
- }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const {
- if (UseDirectOffsets) {
- return m_base_mapper(i, j);
- }
- return m_base_mapper(i + m_vert_offset, j + m_horiz_offset);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
- if (UseDirectOffsets) {
- return m_base_mapper.template loadPacket<Alignment>(i, 0);
- }
- return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, m_horiz_offset);
- }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
- if (UseDirectOffsets) {
- return m_base_mapper.template loadPacket<Alignment>(i, j);
- }
- return m_base_mapper.template loadPacket<Alignment>(i + m_vert_offset, j + m_horiz_offset);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
- if (UseDirectOffsets) {
- return m_base_mapper.template loadHalfPacket<Alignment>(i, 0);
- }
- return m_base_mapper.template loadHalfPacket<Alignment>(i + m_vert_offset, m_horiz_offset);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const {
- if (UseDirectOffsets) {
- m_base_mapper.storePacket(i, 0, p);
- }
- m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
- if (UseDirectOffsets) {
- return LinearMapper(m_base_mapper, i, j);
- }
- return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset);
- }
-
- template <typename PacketT, int AlignmentType>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const {
- EIGEN_STATIC_ASSERT((internal::is_same<PacketT, Packet>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
- const int ActualAlignment = (AlignmentType == Aligned) && (Alignment == Aligned) ? Aligned : Unaligned;
- if (UseDirectOffsets) {
- return m_base_mapper.template loadPacket<ActualAlignment>(i, 0);
- }
- return m_base_mapper.template loadPacket<ActualAlignment>(i + m_vert_offset, m_horiz_offset);
- }
-
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool aligned(Index) const {
- return false;
- }
-
- private:
- ParentMapper m_base_mapper;
- const Index m_vert_offset;
- const Index m_horiz_offset;
-};
-
-
-template<typename Scalar_, typename Index, int side,
- typename Tensor,
- typename nocontract_t, typename contract_t,
- int packet_size,
- bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
-class TensorContractionInputMapper
- : public BaseTensorContractionMapper<Scalar_, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
-
- public:
- typedef Scalar_ Scalar;
- typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base;
- typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
- typedef SubMapper VectorMapper;
-
- EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor,
- const nocontract_t& nocontract_strides,
- const nocontract_t& ij_strides,
- const contract_t& contract_strides,
- const contract_t& k_strides)
- : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const {
- return SubMapper(*this, i, j);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
- return VectorMapper(*this, i, j);
- }
-};
-
-
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
deleted file mode 100644
index c70dea0..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ /dev/null
@@ -1,1043 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H
-
-// evaluator for thread pool device
-#ifdef EIGEN_USE_THREADS
-
-namespace Eigen {
-
-#ifdef EIGEN_USE_SIMPLE_THREAD_POOL
-namespace internal {
-
-template<typename LhsScalar, typename LhsMapper, typename Index>
-struct packLhsArg {
- LhsScalar* blockA;
- const LhsMapper& lhs;
- const Index m_start;
- const Index k_start;
- const Index mc;
- const Index kc;
-};
-
-template<typename LhsScalar, typename RhsScalar, typename RhsMapper, typename OutputMapper, typename Index>
-struct packRhsAndKernelArg {
- const MaxSizeVector<LhsScalar*>* blockAs;
- RhsScalar* blockB;
- const RhsMapper& rhs;
- OutputMapper& output;
- const Index m;
- const Index k;
- const Index n;
- const Index mc;
- const Index kc;
- const Index nc;
- const Index num_threads;
- const Index num_blockAs;
- const Index max_m;
- const Index k_block_idx;
- const Index m_block_idx;
- const Index n_block_idx;
- const Index m_blocks;
- const Index n_blocks;
- MaxSizeVector<Notification*>* kernel_notifications;
- const MaxSizeVector<Notification*>* lhs_notifications;
- const bool need_to_pack;
-};
-
-} // end namespace internal
-#endif // EIGEN_USE_SIMPLE_THREAD_POOL
-
-template<typename Indices, typename LeftArgType, typename RightArgType>
-struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, ThreadPoolDevice> :
- public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, ThreadPoolDevice> > {
-
- typedef ThreadPoolDevice Device;
-
- typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self;
- typedef TensorContractionEvaluatorBase<Self> Base;
-
- typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
- typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
- typedef typename XprType::Index Index;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
- enum {
- Layout = TensorEvaluator<LeftArgType, Device>::Layout,
- };
-
- // Most of the code is assuming that both input tensors are ColMajor. If the
- // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:
- // If we want to compute A * B = C, where A is LHS and B is RHS, the code
- // will pretend B is LHS and A is RHS.
- typedef typename internal::conditional<
- static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;
- typedef typename internal::conditional<
- static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
-
- static const int LDims =
- internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
- static const int RDims =
- internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
- static const int ContractDims = internal::array_size<Indices>::value;
-
- typedef array<Index, LDims> left_dim_mapper_t;
- typedef array<Index, RDims> right_dim_mapper_t;
-
- typedef array<Index, ContractDims> contract_t;
- typedef array<Index, LDims - ContractDims> left_nocontract_t;
- typedef array<Index, RDims - ContractDims> right_nocontract_t;
-
- static const int NumDims = LDims + RDims - 2 * ContractDims;
-
- typedef DSizes<Index, NumDims> Dimensions;
-
- // typedefs needed in evalTo
- typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;
- typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;
- typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits;
-
- typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
- typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
-
- TensorEvaluator(const XprType& op, const Device& device) :
- Base(op, device) {}
-
-#ifndef EIGEN_USE_SIMPLE_THREAD_POOL
- template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous,
- bool rhs_inner_dim_reordered, int Alignment>
- void evalProduct(Scalar* buffer) const {
- typedef internal::TensorContractionInputMapper<
- LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t,
- contract_t, internal::packet_traits<LhsScalar>::size,
- lhs_inner_dim_contiguous, false, Unaligned>
- LhsMapper;
- typedef internal::TensorContractionInputMapper<
- RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t,
- contract_t, internal::packet_traits<RhsScalar>::size,
- rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned>
- RhsMapper;
- typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
- typedef internal::gemm_pack_lhs<LhsScalar, Index,
- typename LhsMapper::SubMapper, Traits::mr,
- Traits::LhsProgress, ColMajor>
- LhsPacker;
- typedef internal::gemm_pack_rhs<
- RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor>
- RhsPacker;
- typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
- Traits::mr, Traits::nr, false, false>
- GebpKernel;
-
- const Index m = this->m_i_size;
- const Index n = this->m_j_size;
- const Index k = this->m_k_size;
- if (m == 0 || n == 0 || k == 0) return;
-
- // Compute a set of algorithm parameters:
- // - kernel block sizes (bm, bn, bk)
- // - task grain sizes (number of kernels executed per task: gm, gn)
- // - number of threads
- // - sharding by row/column
- // - parallel packing or first lhs then rhs
- // and some derived parameters:
- // - number of tasks (nm, nn, nk)
- // - number of kernels (nm0, nn0)
- // Unfortunately, all these parameters are tightly interdependent.
- // So in some cases we first compute approximate values, then compute other
- // values based on these approximations and then refine the approximations.
-
- // There are lots of heuristics here. There is some reasoning behind them,
- // but ultimately they are just tuned on contraction benchmarks for
- // different input configurations, thread counts and instruction sets.
- // So feel free to question any of them.
-
- // Compute whether we want to shard by row or by column.
- // This is a first approximation, it will be refined later. Since we don't
- // know number of threads yet we use 2, because what's we are most
- // interested in at this point is whether it makes sense to use
- // parallelization at all or not.
- bool shard_by_col = shardByCol(m, n, 2);
-
- // First approximation of kernel blocking sizes.
- // Again, we don't know number of threads yet, so we use 2.
- Index bm, bn, bk;
- if (shard_by_col) {
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index,
- internal::ShardByCol>
- blocking(k, m, n, 2);
- bm = blocking.mc();
- bn = blocking.nc();
- bk = blocking.kc();
- } else {
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index,
- internal::ShardByRow>
- blocking(k, m, n, 2);
- bm = blocking.mc();
- bn = blocking.nc();
- bk = blocking.kc();
- }
-
- // Compute optimal number of threads.
- // Note: we use bk instead of k here because we are interested in amount of
- // _parallelizable_ computations, and computations are not parallelizable
- // across k dimension.
- const TensorOpCost cost =
- contractionCost(m, n, bm, bn, bk, shard_by_col, false);
- int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
- static_cast<double>(n) * m, cost, this->m_device.numThreads());
-
- // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost
- // model is not tuned. Remove this when the cost model is tuned.
- if (n == 1) num_threads = 1;
-
- if (num_threads == 1) {
- // The single-threaded algorithm should be faster in this case.
- if (n == 1)
- this->template evalGemv<lhs_inner_dim_contiguous,
- rhs_inner_dim_contiguous,
- rhs_inner_dim_reordered, Alignment>(buffer);
- else
- this->template evalGemm<lhs_inner_dim_contiguous,
- rhs_inner_dim_contiguous,
- rhs_inner_dim_reordered, Alignment>(buffer);
- return;
- }
-
- // Now that we know number of threads, recalculate sharding and blocking.
- shard_by_col = shardByCol(m, n, num_threads);
- if (shard_by_col) {
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index,
- internal::ShardByCol>
- blocking(k, m, n, num_threads);
- bm = blocking.mc();
- bn = blocking.nc();
- bk = blocking.kc();
- } else {
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index,
- internal::ShardByRow>
- blocking(k, m, n, num_threads);
- bm = blocking.mc();
- bn = blocking.nc();
- bk = blocking.kc();
- }
-
- // Number of kernels for each dimension.
- Index nm0 = divup(m, bm);
- Index nn0 = divup(n, bn);
- Index nk = divup(k, bk);
-
- // Calculate task grain size (number of kernels executed per task).
- // This task size coarsening serves two purposes:
- // 1. It reduces per-task overheads including synchronization overheads.
- // 2. It allows to use caches better (reuse the same packed rhs in several
- // consecutive kernels).
- Index gm = 1;
- Index gn = 1;
- // If we are sharding by column, then we prefer to reduce rows first.
- if (shard_by_col) {
- gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col);
- gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col);
- } else {
- gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col);
- gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col);
- }
- // Number of tasks in each dimension.
- Index nm = divup(nm0, gm);
- Index nn = divup(nn0, gn);
-
- // Last by not least, decide whether we want to issue both lhs and rhs
- // packing in parallel; or issue lhs packing first, and then issue rhs
- // packing when lhs packing completes (for !shard_by_col lhs and rhs are
- // swapped). Parallel packing allows more parallelism (for both packing and
- // kernels), while sequential packing provides better locality (once
- // a thread finishes rhs packing it proceed to kernels with that rhs).
- // First, we are interested in parallel packing if there are few tasks.
- bool parallel_pack = num_threads >= nm * nn;
- // Also do parallel packing if all data fits into L2$.
- if (m * bk * Index(sizeof(LhsScalar)) + n * bk * Index(sizeof(RhsScalar)) <=
- l2CacheSize() * num_threads)
- parallel_pack = true;
- // But don't do it if we will use each rhs only once. Locality seems to be
- // more important in this case.
- if ((shard_by_col ? nm : nn) == 1) parallel_pack = false;
-
- LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides,
- this->m_i_strides, this->m_left_contracting_strides,
- this->m_k_strides);
-
- RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides,
- this->m_j_strides, this->m_right_contracting_strides,
- this->m_k_strides);
-
- Context<LhsPacker, RhsPacker, GebpKernel, LhsMapper, RhsMapper,
- OutputMapper>(this->m_device, num_threads, lhs, rhs, buffer, m, n,
- k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, nn0,
- shard_by_col, parallel_pack)
- .run();
- }
-
- // Context coordinates a single parallel gemm operation.
- template <typename LhsPacker, typename RhsPacker, typename GebpKernel,
- typename LhsMapper, typename RhsMapper, typename OutputMapper>
- class Context {
- public:
- Context(const Device& device, int num_threads, LhsMapper& lhs,
- RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm,
- Index bn, Index bk, Index nm, Index nn, Index nk, Index gm,
- Index gn, Index nm0, Index nn0, bool shard_by_col,
- bool parallel_pack)
- : device_(device),
- lhs_(lhs),
- rhs_(rhs),
- buffer_(buffer),
- output_(buffer, tm),
- num_threads_(num_threads),
- shard_by_col_(shard_by_col),
- parallel_pack_(parallel_pack),
- m_(tm),
- n_(tn),
- k_(tk),
- bm_(bm),
- bn_(bn),
- bk_(bk),
- nm_(nm),
- nn_(nn),
- nk_(nk),
- gm_(gm),
- gn_(gn),
- nm0_(nm0),
- nn0_(nn0)
- {
- for (Index x = 0; x < P; x++) {
- // Normal number of notifications for k slice switch is
- // nm_ + nn_ + nm_ * nn_. However, first P - 1 slices will receive only
- // nm_ + nn_ notifications, because they will not receive notifications
- // from preceeding kernels.
- state_switch_[x] =
- x == 0
- ? 1
- : (parallel_pack_ ? nn_ + nm_ : (shard_by_col_ ? nn_ : nm_)) +
- (x == P - 1 ? nm_ * nn_ : 0);
- state_packing_ready_[x] =
- parallel_pack_ ? 0 : (shard_by_col_ ? nm_ : nn_);
- state_kernel_[x] = new std::atomic<uint8_t>*[nm_];
- for (Index m = 0; m < nm_; m++) {
- state_kernel_[x][m] = new std::atomic<uint8_t>[nn_];
- // Kernels generally receive 3 notifications (previous kernel + 2
- // packing), but the first slice won't get notifications from previous
- // kernels.
- for (Index n = 0; n < nn_; n++)
- state_kernel_[x][m][n].store(
- (x == 0 ? 0 : 1) + (parallel_pack_ ? 2 : 1),
- std::memory_order_relaxed);
- }
- }
-
- // Allocate memory for packed rhs/lhs matrices.
- size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1);
- size_t lhs_size =
- divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align;
- size_t rhs_size =
- divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align;
- packed_mem_ = static_cast<char*>(internal::aligned_malloc(
- (nm0_ * lhs_size + nn0_ * rhs_size) * std::min<size_t>(nk_, P - 1)));
- char* mem = static_cast<char*>(packed_mem_);
- for (Index x = 0; x < numext::mini<Index>(nk_, P - 1); x++) {
- packed_lhs_[x].resize(nm0_);
- for (Index m = 0; m < nm0_; m++) {
- packed_lhs_[x][m] = reinterpret_cast<LhsScalar*>(mem);
- mem += lhs_size;
- }
- packed_rhs_[x].resize(nn0_);
- for (Index n = 0; n < nn0_; n++) {
- packed_rhs_[x][n] = reinterpret_cast<RhsScalar*>(mem);
- mem += rhs_size;
- }
- }
- }
-
- ~Context() {
- for (Index x = 0; x < P; x++) {
- for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m];
- delete[] state_kernel_[x];
- }
- internal::aligned_free(packed_mem_);
- }
-
- void run() {
- // Kick off packing of the first slice.
- signal_switch(0, 1);
- // Wait for overall completion.
- // TODO(dvyukov): this wait can lead to deadlock.
- // If nthreads contractions are concurrently submitted from worker
- // threads, this wait will block all worker threads and the system will
- // deadlock.
- done_.Wait();
- }
-
- private:
- Notification done_;
- const Device& device_;
- LhsMapper& lhs_;
- RhsMapper& rhs_;
- Scalar* const buffer_;
- OutputMapper output_;
- const int num_threads_;
- const bool shard_by_col_;
- const bool parallel_pack_;
- // Matrix sizes.
- const Index m_;
- const Index n_;
- const Index k_;
- // Block sizes.
- const Index bm_;
- const Index bn_;
- const Index bk_;
- // Number of tasks.
- const Index nm_;
- const Index nn_;
- const Index nk_;
- // Task grain sizes (number of kernels executed per task).
- const Index gm_;
- const Index gn_;
- // Number of blocks (this is different from ni_/nn_ because of task size
- // coarsening).
- const Index nm0_;
- const Index nn0_;
-
- // Parallelization strategy.
- //
- // Blocks related to the same k block can run in parallel because they write
- // to different output blocks. So we parallelize within k slices, this
- // gives us parallelism level of m x n. Before we can start any kernels
- // related to k-th slice, we need to issue m lhs packing tasks and n rhs
- // packing tasks.
- //
- // However, there is a bottleneck when we are finishing kernels for k-th
- // slice (at the very end there is only 1 runnable kernel). To mitigate this
- // bottleneck we allow kernels from k-th and k+1-th slices to run in
- // parallel. Note that (m, n, k) and (m, n, k+1) kernels write to the same
- // output block, so they must not run in parallel.
- //
- // This gives us the following dependency graph.
- // On each k slice we have m x n kernel tasks, m lhs paking tasks and n rhs
- // packing tasks.
- // Kernel (m, n, k) can start when:
- // - kernel (m, n, k-1) has finished
- // - lhs packing (m, k) has finished
- // - rhs packing (n, k) has finished
- // Lhs/rhs packing can start when:
- // - all k-1 packing has finished (artificially imposed to limit amount of
- // parallel packing)
- //
- // On top of that we limit runnable tasks to two consecutive k slices.
- // This is done to limit amount of memory we need for packed lhs/rhs
- // (for each k slice we need m*bk + n*bk memory in packed_lhs_/packed_rhs_).
- //
- // state_switch_ tracks when we are ready to switch to the next k slice.
- // state_kernel_[m][n] tracks when we are ready to kick off kernel (m, n).
- // These variable are rolling over 3 consecutive k slices: first two we are
- // actively executing + one to track completion of kernels in the second
- // slice.
- static const Index P = 3;
- void* packed_mem_;
- std::vector<LhsScalar*> packed_lhs_[P - 1];
- std::vector<RhsScalar*> packed_rhs_[P - 1];
- std::atomic<uint8_t>** state_kernel_[P];
- // state_switch_ is frequently modified by worker threads, while other
- // fields are read-only after constructor. Let's move it to a separate cache
- // line to reduce cache-coherency traffic.
- char pad_[128];
- std::atomic<Index> state_packing_ready_[P];
- std::atomic<Index> state_switch_[P];
-
- void pack_lhs(Index m, Index k) {
- const Index mend = m * gm_ + gm(m);
- for (Index m1 = m * gm_; m1 < mend; m1++)
- LhsPacker()(packed_lhs_[k % (P - 1)][m1],
- lhs_.getSubMapper(m1 * bm_, k * bk_), bk(k), bm(m1));
-
- if (!parallel_pack_ && shard_by_col_) {
- signal_packing(k);
- } else {
- signal_switch(k + 1);
- for (Index n = nn_ - 1; n >= 0; n--) signal_kernel(m, n, k, n == 0);
- }
- }
-
- void pack_rhs(Index n, Index k) {
- const Index nend = n * gn_ + gn(n);
- for (Index n1 = n * gn_; n1 < nend; n1++) {
- if (k == 0) {
- // Zero the output memory in parallel.
- // On 10000x2x10000 mm zeroing can easily take half of time.
- // Zero (bn x m) row. Safe to do here because all kernels that will
- // write to this memory depend on completion of this task.
- // Note: don't call device_.memset() here. device_.memset() blocks on
- // thread pool worker thread, which can lead to underutilization and
- // deadlocks.
- memset(buffer_ + n1 * bn_ * m_, 0, bn(n1) * m_ * sizeof(Scalar));
- }
- RhsPacker()(packed_rhs_[k % (P - 1)][n1],
- rhs_.getSubMapper(k * bk_, n1 * bn_), bk(k), bn(n1));
- }
-
- if (parallel_pack_ || shard_by_col_) {
- signal_switch(k + 1);
- for (Index m = nm_ - 1; m >= 0; m--) signal_kernel(m, n, k, m == 0);
- } else {
- signal_packing(k);
- }
- }
-
- void kernel(Index m, Index n, Index k) {
- // Note: order of iteration matters here. Iteration over m is innermost
- // because we want to reuse the same packed rhs in consequetive tasks
- // (rhs fits into L2$ while lhs only into L3$).
- const Index nend = n * gn_ + gn(n);
- const Index mend = m * gm_ + gm(m);
- if (shard_by_col_) {
- for (Index n1 = n * gn_; n1 < nend; n1++) {
- for (Index m1 = m * gm_; m1 < mend; m1++)
- GebpKernel()(output_.getSubMapper(m1 * bm_, n1 * bn_),
- packed_lhs_[k % (P - 1)][m1],
- packed_rhs_[k % (P - 1)][n1], bm(m1), bk(k), bn(n1),
- Scalar(1), -1, -1, 0, 0);
- }
- } else {
- for (Index m1 = m * gm_; m1 < mend; m1++)
- for (Index n1 = n * gn_; n1 < nend; n1++) {
- GebpKernel()(output_.getSubMapper(m1 * bm_, n1 * bn_),
- packed_lhs_[k % (P - 1)][m1],
- packed_rhs_[k % (P - 1)][n1], bm(m1), bk(k), bn(n1),
- Scalar(1), -1, -1, 0, 0);
- }
- }
- signal_kernel(m, n, k + 1, false);
- signal_switch(k + 2);
- }
-
- void signal_packing(Index k) {
- eigen_assert(!parallel_pack_);
- Index s = state_packing_ready_[k % P].fetch_sub(1);
- eigen_assert(s > 0);
- if (s != 1) return;
- state_packing_ready_[k % P] = shard_by_col_ ? nm_ : nn_;
- enqueue_packing(k, shard_by_col_);
- }
-
- void signal_kernel(Index m, Index n, Index k, bool sync) {
- std::atomic<uint8_t>* state = &state_kernel_[k % P][m][n];
- Index s = state->load();
- eigen_assert(s > 0);
- if (s != 1 && state->fetch_sub(1) != 1) return;
- state->store(parallel_pack_ ? 3 : 2, std::memory_order_relaxed);
- if (sync)
- kernel(m, n, k);
- else
- device_.enqueueNoNotification([=]() { kernel(m, n, k); });
- }
-
- void signal_switch(Index k, Index v = 1) {
- Index s = state_switch_[k % P].fetch_sub(v);
- eigen_assert(s >= v);
- if (s != v) return;
-
- // Ready to switch to the next k slice.
- // Reset counter for the next iteration.
- state_switch_[k % P] =
- (parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)) +
- nm_ * nn_;
- if (k < nk_) {
- // Issue lhs/rhs packing. Their completion will in turn kick off
- // kernels.
- if (parallel_pack_) {
- enqueue_packing(k, !shard_by_col_);
- enqueue_packing(k, shard_by_col_);
- } else if (shard_by_col_) {
- enqueue_packing(k, false);
- } else {
- enqueue_packing(k, true);
- }
-
- // Termination handling.
- // Because kernel completion signals k + 2 switch, we need to finish nk
- // + 2 slices without issuing any tasks on nk + 1 slice. So here we
- // pretend that all nk + 1 packing tasks just finish instantly; so that
- // nk + 2 switch only waits for completion of nk kernels.
- } else if (k == nk_) {
- signal_switch(k + 1,
- parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_));
- } else {
- done_.Notify();
- }
- }
-
- // Enqueue all rhs/lhs packing for k-th slice.
- void enqueue_packing(Index k, bool rhs) {
- enqueue_packing_helper(0, rhs ? nn_ : nm_, k, rhs);
- }
-
- void enqueue_packing_helper(Index start, Index end, Index k, bool rhs) {
- if (end - start == 1) {
- if (rhs)
- pack_rhs(start, k);
- else
- pack_lhs(start, k);
- } else {
- Index mid = (start + end) / 2;
- device_.enqueueNoNotification(
- [=]() { enqueue_packing_helper(mid, end, k, rhs); });
- device_.enqueueNoNotification(
- [=]() { enqueue_packing_helper(start, mid, k, rhs); });
- }
- }
-
- // Block sizes with accounting for potentially incomplete last block.
- Index bm(Index m) const { return m + 1 < nm0_ ? bm_ : m_ + bm_ - bm_ * nm0_; }
- Index bn(Index n) const { return n + 1 < nn0_ ? bn_ : n_ + bn_ - bn_ * nn0_; }
- Index bk(Index k) const { return k + 1 < nk_ ? bk_ : k_ + bk_ - bk_ * nk_; }
- // Task grain sizes accounting for potentially incomplete last task.
- Index gm(Index m) const { return m + 1 < nm_ ? gm_ : nm0_ + gm_ - gm_ * nm_; }
- Index gn(Index n) const { return n + 1 < nn_ ? gn_ : nn0_ + gn_ - gn_ * nn_; }
-
- Context(const Context&) = delete;
- void operator=(const Context&) = delete;
- };
-
- // Decide whether we want to shard m x n contraction by columns or by rows.
- static bool shardByCol(Index m, Index n, Index num_threads) {
- // Note: we are comparing both n and m against Traits::nr, it is not
- // a mistake. We are trying to figure out how both n and m will fit into
- // the main sharding dimension.
-
- // Sharding by column is the default
- // ... unless there is enough data for vectorization over rows
- if (m / num_threads >= Traits::nr &&
- // and not enough data for vectorization over columns
- (n / num_threads < Traits::nr ||
- // ... or barely enough data for vectorization over columns,
- // but it is not evenly dividable across threads
- (n / num_threads < 4 * Traits::nr &&
- (n % (num_threads * Traits::nr)) != 0 &&
- // ... and it is evenly dividable across threads for rows
- ((m % (num_threads * Traits::nr)) == 0 ||
- // .. or it is not evenly dividable for both dimensions but
- // there is much more data over rows so that corner effects are
- // mitigated.
- (m / n >= 6)))))
- return false;
- // Wait, or if matrices are just substantially prolonged over the other
- // dimension.
- if (n / num_threads < 16 * Traits::nr && m > n * 32) return false;
- return true;
- }
-
- Index coarsenM(Index m, Index n, Index bm, Index bn, Index bk, Index gn,
- int num_threads, bool shard_by_col) const {
- Index gm = 1;
- Index gm1 = 1;
- Index nm0 = divup(m, bm);
- Index nm1 = nm0;
- for (;;) {
- // Find the next candidate for m grain size. It needs to result in
- // different number of blocks. E.g. if we have 10 kernels, we want to try
- // 5 and 10, but not 6, 7, 8 and 9.
- while (gm1 <= nm0 && nm1 == divup(nm0, gm1)) gm1++;
- if (gm1 > nm0) break;
- // Check the candidate.
- int res = checkGrain(m, n, bm, bn, bk, gm1, gn, gm, gn, num_threads,
- shard_by_col);
- if (res < 0) break;
- nm1 = divup(nm0, gm1);
- if (res == 0) continue;
- // Commit new grain size.
- gm = gm1;
- }
- return gm;
- }
-
- Index coarsenN(Index m, Index n, Index bm, Index bn, Index bk, Index gm,
- int num_threads, bool shard_by_col) const {
- Index gn = 1;
- Index gn1 = 1;
- Index nn0 = divup(n, bn);
- Index nn1 = nn0;
- for (;;) {
- while (gn1 <= nn0 && nn1 == divup(nn0, gn1)) gn1++;
- if (gn1 > nn0) break;
- int res = checkGrain(m, n, bm, bn, bk, gm, gn1, gm, gn, num_threads,
- shard_by_col);
- if (res < 0) break;
- nn1 = divup(nn0, gn1);
- if (res == 0) continue;
- gn = gn1;
- }
- return gn;
- }
-
- // checkGrain checks whether grain (gm, gn) is suitable and is better than
- // (oldgm, oldgn).
- int checkGrain(Index m, Index n, Index bm, Index bn, Index bk, Index gm,
- Index gn, Index oldgm, Index oldgn, int num_threads,
- bool shard_by_col) const {
- const TensorOpCost cost =
- contractionCost(bm * gm, bn * gn, bm, bn, bk, shard_by_col, true);
- double taskSize = TensorCostModel<ThreadPoolDevice>::taskSize(
- static_cast<double>(bm) * gm * bn * gn, cost);
- // If the task is too small, then we agree on it regardless of anything
- // else. Otherwise synchronization overheads will dominate.
- if (taskSize < 1) return 1;
- // If it is too large, then we reject it and all larger tasks.
- if (taskSize > 2) return -1;
- // Now we are in presumably good task size range.
- // The main deciding factor here is parallelism. Consider that we have 12
- // kernels and 4 threads. Grains of 2, 3 and 4 all yield good task sizes.
- // But 2/4 yield 6/3 tasks, which gives us parallelism of 0.75 (at most 3/4
- // of cores will be busy). While grain size 3 gives us 4 tasks, which gives
- // us parallelism of 1 (we can load all cores).
- Index nm0 = divup(m, bm);
- Index nn0 = divup(n, bn);
- Index new_tasks = divup(nm0, gm) * divup(nn0, gn);
- double new_parallelism = static_cast<double>(new_tasks) /
- (divup<int>(new_tasks, num_threads) * num_threads);
- Index old_tasks = divup(nm0, oldgm) * divup(nn0, oldgn);
- double old_parallelism = static_cast<double>(old_tasks) /
- (divup<int>(old_tasks, num_threads) * num_threads);
- if (new_parallelism > old_parallelism || new_parallelism == 1) return 1;
- return 0;
- }
-
-#else // EIGEN_USE_SIMPLE_THREAD_POOL
-
- template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
- void evalProduct(Scalar* buffer) const {
- if (this->m_j_size == 1) {
- this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
- return;
- }
-
- evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
- }
-
- template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
- void evalGemm(Scalar* buffer) const {
- // columns in left side, rows in right side
- const Index k = this->m_k_size;
-
- // rows in left side
- const Index m = this->m_i_size;
-
- // columns in right side
- const Index n = this->m_j_size;
-
- // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar)
- this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
-
-
- const int lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size;
- const int rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size;
-
- typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,
- LeftEvaluator, left_nocontract_t,
- contract_t, lhs_packet_size,
- lhs_inner_dim_contiguous,
- false, Unaligned> LhsMapper;
-
- typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,
- RightEvaluator, right_nocontract_t,
- contract_t, rhs_packet_size,
- rhs_inner_dim_contiguous,
- rhs_inner_dim_reordered, Unaligned> RhsMapper;
-
- typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
-
- // TODO: packing could be faster sometimes if we supported row major tensor mappers
- typedef internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, Traits::mr,
- Traits::LhsProgress, ColMajor> LhsPacker;
- typedef internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> RhsPacker;
-
- // TODO: replace false, false with conjugate values?
- typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
- Traits::mr, Traits::nr, false, false> GebpKernel;
-
- typedef internal::packLhsArg<LhsScalar, LhsMapper, Index> packLArg;
- typedef internal::packRhsAndKernelArg<LhsScalar, RhsScalar, RhsMapper, OutputMapper, Index> packRKArg;
-
- // initialize data mappers
- LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides,
- this->m_left_contracting_strides, this->m_k_strides);
-
- RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides,
- this->m_right_contracting_strides, this->m_k_strides);
-
- OutputMapper output(buffer, m);
-
- // compute block sizes (which depend on number of threads)
- const Index num_threads = this->m_device.numThreads();
- internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, num_threads);
- Index mc = blocking.mc();
- Index nc = blocking.nc();
- Index kc = blocking.kc();
- eigen_assert(mc <= m);
- eigen_assert(nc <= n);
- eigen_assert(kc <= k);
-
-#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
- const Index k_blocks = CEIL_DIV(k, kc);
- const Index n_blocks = CEIL_DIV(n, nc);
- const Index m_blocks = CEIL_DIV(m, mc);
- const Index sizeA = mc * kc;
- const Index sizeB = kc * nc;
-
- /* cout << "m: " << m << " n: " << n << " k: " << k << endl;
- cout << "mc: " << mc << " nc: " << nc << " kc: " << kc << endl;
- cout << "m_blocks: " << m_blocks << " n_blocks: " << n_blocks << " k_blocks: " << k_blocks << endl;
- cout << "num threads: " << num_threads << endl;
- */
-
- // note: m_device.allocate should return 16 byte aligned pointers, but if blockA and blockB
- // aren't 16 byte aligned segfaults will happen due to SIMD instructions
- // note: You can get away with allocating just a single blockA and offsets and meet the
- // the alignment requirements with the assumption that
- // (Traits::mr * sizeof(ResScalar)) % 16 == 0
- const Index numBlockAs = numext::mini(num_threads, m_blocks);
- MaxSizeVector<LhsScalar *> blockAs(num_threads);
- for (int i = 0; i < num_threads; i++) {
- blockAs.push_back(static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar))));
- }
-
- // To circumvent alignment issues, I'm just going to separately allocate the memory for each thread
- // TODO: is this too much memory to allocate? This simplifies coding a lot, but is wasteful.
- // Other options: (1) reuse memory when a thread finishes. con: tricky
- // (2) allocate block B memory in each thread. con: overhead
- MaxSizeVector<RhsScalar *> blockBs(n_blocks);
- for (int i = 0; i < n_blocks; i++) {
- blockBs.push_back(static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar))));
- }
-
- // lhs_notifications starts with all null Notifications
- MaxSizeVector<Notification*> lhs_notifications(num_threads, nullptr);
-
- // this should really be numBlockAs * n_blocks;
- const Index num_kernel_notifications = num_threads * n_blocks;
- MaxSizeVector<Notification*> kernel_notifications(num_kernel_notifications,
- nullptr);
-
- for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) {
- const Index k_start = k_block_idx * kc;
- // make sure we don't overshoot right edge of left matrix
- const Index actual_kc = numext::mini(k_start + kc, k) - k_start;
-
- for (Index m_block_idx = 0; m_block_idx < m_blocks; m_block_idx += numBlockAs) {
- const Index num_blocks = numext::mini(m_blocks-m_block_idx, numBlockAs);
-
- for (Index mt_block_idx = m_block_idx; mt_block_idx < m_block_idx+num_blocks; mt_block_idx++) {
- const Index m_start = mt_block_idx * mc;
- const Index actual_mc = numext::mini(m_start + mc, m) - m_start;
- eigen_assert(actual_mc > 0);
-
- Index blockAId = (k_block_idx * m_blocks + mt_block_idx) % num_threads;
-
- for (int i = 0; i < n_blocks; ++i) {
- Index notification_id = (blockAId * n_blocks + i);
- // Wait for any current kernels using this slot to complete
- // before using it.
- if (kernel_notifications[notification_id]) {
- wait_until_ready(kernel_notifications[notification_id]);
- delete kernel_notifications[notification_id];
- }
- kernel_notifications[notification_id] = new Notification();
- }
- const packLArg arg = {
- blockAs[blockAId], // blockA
- lhs, // lhs
- m_start, // m
- k_start, // k
- actual_mc, // mc
- actual_kc, // kc
- };
-
- // Delete any existing notification since we may be
- // replacing it. The algorithm should ensure that there are
- // no existing waiters on this notification.
- delete lhs_notifications[blockAId];
- lhs_notifications[blockAId] =
- this->m_device.enqueue(&Self::packLhs<packLArg, LhsPacker>, arg);
- }
-
- // now start kernels.
- const Index m_base_start = m_block_idx * mc;
- const bool need_to_pack = m_block_idx == 0;
-
- for (Index n_block_idx = 0; n_block_idx < n_blocks; n_block_idx++) {
- const Index n_start = n_block_idx * nc;
- const Index actual_nc = numext::mini(n_start + nc, n) - n_start;
-
- // first make sure the previous kernels are all done before overwriting rhs. Also wait if
- // we're going to start new k. In both cases need_to_pack is true.
- if (need_to_pack) {
- for (Index i = num_blocks; i < num_threads; ++i) {
- Index blockAId = (k_block_idx * m_blocks + i + m_block_idx) % num_threads;
- Index future_id = (blockAId * n_blocks + n_block_idx);
- wait_until_ready(kernel_notifications[future_id]);
- }
- }
-
- packRKArg arg = {
- &blockAs, // blockA
- blockBs[n_block_idx], // blockB
- rhs, // rhs
- output, // output
- m_base_start, // m
- k_start, // k
- n_start, // n
- mc, // mc
- actual_kc, // kc
- actual_nc, // nc
- num_threads,
- numBlockAs,
- m,
- k_block_idx,
- m_block_idx,
- n_block_idx, // n_block_idx
- m_blocks, // m_blocks
- n_blocks, // n_blocks
- &kernel_notifications, // kernel notifications
- &lhs_notifications, // lhs notifications
- need_to_pack, // need_to_pack
- };
-
- // We asynchronously kick off this function, which ends up
- // notifying the appropriate kernel_notifications objects,
- // which this thread waits on before exiting.
- this->m_device.enqueueNoNotification(&Self::packRhsAndKernel<packRKArg, RhsPacker, GebpKernel>, arg);
- }
- }
- }
-
- // Make sure all the kernels are done.
- for (size_t i = 0; i < kernel_notifications.size(); ++i) {
- wait_until_ready(kernel_notifications[i]);
- delete kernel_notifications[i];
- }
-
- // No need to wait for lhs notifications since they should have
- // already been waited on. Just clean them up.
- for (size_t i = 0; i < lhs_notifications.size(); ++i) {
- delete lhs_notifications[i];
- }
-
- // deallocate all of the memory for both A and B's
- for (size_t i = 0; i < blockAs.size(); i++) {
- this->m_device.deallocate(blockAs[i]);
- }
- for (size_t i = 0; i < blockBs.size(); i++) {
- this->m_device.deallocate(blockBs[i]);
- }
-
-#undef CEIL_DIV
- }
-
- /*
- * Packs a LHS block of size (mt, kc) starting at lhs(m, k). Before packing
- * the LHS block, check that all of the kernels that worked on the same
- * mt_block_idx in the previous m_block are done.
- */
- template <typename packLArg, typename LhsPacker>
- static void packLhs(const packLArg arg) {
- // perform actual packing
- LhsPacker pack_lhs;
- pack_lhs(arg.blockA, arg.lhs.getSubMapper(arg.m_start, arg.k_start), arg.kc, arg.mc);
- }
-
- /*
- * Packs a RHS block of size (kc, nc) starting at (k, n) after checking that
- * all kernels in the previous block are done.
- * Then for each LHS future, we wait on the future and then call GEBP
- * on the area packed by the future (which starts at
- * blockA + future_idx * mt * kc) on the LHS and with the full packed
- * RHS block.
- * The output of this GEBP is written to output(m + i * mt, n).
- */
- template <typename packRKArg, typename RhsPacker, typename GebpKernel>
- static void packRhsAndKernel(packRKArg arg) {
- if (arg.need_to_pack) {
- RhsPacker pack_rhs;
- pack_rhs(arg.blockB, arg.rhs.getSubMapper(arg.k, arg.n), arg.kc, arg.nc);
- }
-
- GebpKernel gebp;
- for (Index mt_block_idx = 0; mt_block_idx < arg.num_blockAs; mt_block_idx++) {
- const Index m_base_start = arg.m + arg.mc*mt_block_idx;
- if (m_base_start < arg.max_m) {
- Index blockAId = (arg.k_block_idx * arg.m_blocks + mt_block_idx + arg.m_block_idx) % arg.num_threads;
- wait_until_ready((*arg.lhs_notifications)[blockAId]);
- const Index actual_mc = numext::mini(m_base_start + arg.mc, arg.max_m) - m_base_start;
- gebp(arg.output.getSubMapper(m_base_start, arg.n),
- (*arg.blockAs)[blockAId], arg.blockB,
- actual_mc, arg.kc, arg.nc, Scalar(1), -1, -1, 0, 0);
-
- // Notify that the kernel is done.
- const Index set_idx = blockAId * arg.n_blocks + arg.n_block_idx;
- (*arg.kernel_notifications)[set_idx]->Notify();
- }
- }
- }
-#endif // EIGEN_USE_SIMPLE_THREAD_POOL
-
- TensorOpCost contractionCost(Index m, Index n, Index bm, Index bn, Index bk,
- bool shard_by_col, bool prepacked) const {
- const int packed_size = std::min<int>(PacketType<LhsScalar, Device>::size,
- PacketType<RhsScalar, Device>::size);
- const int output_packet_size = internal::unpacket_traits<PacketReturnType>::size;
- const double kd = static_cast<double>(bk);
- // Peak VFMA bandwidth is 0.5. However if we have not enough data for
- // vectorization bandwidth drops. The 4.0 and 2.0 bandwidth is determined
- // experimentally.
- double computeBandwidth = bk == 1 ? 4.0 :
- (shard_by_col ? bn : bm) < Traits::nr ||
- (shard_by_col ? bm : bn) < Traits::mr ? 2.0 : 0.5;
-#ifndef EIGEN_VECTORIZE_FMA
- // Bandwidth of all of VFMA/MULPS/ADDPS is 0.5 on latest Intel processors.
- // However for MULPS/ADDPS we have dependent sequence of 2 such instructions,
- // so overall bandwidth is 1.0.
- if (computeBandwidth == 0.5) computeBandwidth = 1.0;
-#endif
- // Computations.
- TensorOpCost cost = TensorOpCost(0, 0, kd * computeBandwidth, true, packed_size);
- // Output stores.
- cost += TensorOpCost(0, sizeof(CoeffReturnType), 0, true, output_packet_size);
- if (prepacked) {
- // Packing and kernels are executed in different tasks. When we calculate
- // task grain size we look only at kernel cost assuming that kernel
- // is more expensive than packing.
- return cost;
- }
- // Lhs/rhs loads + computations.
- TensorOpCost lhsCost = this->m_leftImpl.costPerCoeff(true) * (kd / n);
- TensorOpCost rhsCost = this->m_rightImpl.costPerCoeff(true) * (kd / m);
- // Lhs packing memory cost does not contribute considerably to overall
- // execution time because lhs is prefetched early and accessed sequentially.
- if (shard_by_col)
- lhsCost.dropMemoryCost();
- else
- rhsCost.dropMemoryCost();
- return cost + lhsCost + rhsCost;
- }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_USE_THREADS
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
deleted file mode 100644
index 860a694..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ /dev/null
@@ -1,279 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
-
-namespace Eigen {
-
-/** \class TensorConversionOp
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor conversion class. This class makes it possible to vectorize
- * type casting operations when the number of scalars per packet in the source
- * and the destination type differ
- */
-namespace internal {
-template<typename TargetType, typename XprType>
-struct traits<TensorConversionOp<TargetType, XprType> >
-{
- // Type promotion to handle the case where the types of the lhs and the rhs are different.
- typedef TargetType Scalar;
- typedef typename traits<XprType>::StorageKind StorageKind;
- typedef typename traits<XprType>::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = traits<XprType>::NumDimensions;
- static const int Layout = traits<XprType>::Layout;
- enum { Flags = 0 };
-};
-
-template<typename TargetType, typename XprType>
-struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense>
-{
- typedef const TensorConversionOp<TargetType, XprType>& type;
-};
-
-template<typename TargetType, typename XprType>
-struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type>
-{
- typedef TensorConversionOp<TargetType, XprType> type;
-};
-
-} // end namespace internal
-
-
-template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
-struct PacketConverter {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- PacketConverter(const TensorEvaluator& impl)
- : m_impl(impl) {}
-
- template<int LoadMode, typename Index>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
- return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
- }
-
- private:
- const TensorEvaluator& m_impl;
-};
-
-
-template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
-struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- PacketConverter(const TensorEvaluator& impl)
- : m_impl(impl) {}
-
- template<int LoadMode, typename Index>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
- const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
-
- SrcPacket src1 = m_impl.template packet<LoadMode>(index);
- SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
- TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
- return result;
- }
-
- private:
- const TensorEvaluator& m_impl;
-};
-
-template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
-struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- PacketConverter(const TensorEvaluator& impl)
- : m_impl(impl) {}
-
- template<int LoadMode, typename Index>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
- const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
-
- SrcPacket src1 = m_impl.template packet<LoadMode>(index);
- SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
- SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
- SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
- TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
- return result;
- }
-
- private:
- const TensorEvaluator& m_impl;
-};
-
-template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
-struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- PacketConverter(const TensorEvaluator& impl)
- : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
-
- template<int LoadMode, typename Index>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
- const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
- // Only call m_impl.packet() when we have direct access to the underlying data. This
- // ensures that we don't compute the subexpression twice. We may however load some
- // coefficients twice, but in practice this doesn't negatively impact performance.
- if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
- // Force unaligned memory loads since we can't ensure alignment anymore
- return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
- } else {
- const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
- typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
- typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
- internal::scalar_cast_op<SrcType, TgtType> converter;
- EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
- for (int i = 0; i < TgtPacketSize; ++i) {
- values[i] = converter(m_impl.coeff(index+i));
- }
- TgtPacket rslt = internal::pload<TgtPacket>(values);
- return rslt;
- }
- }
-
- private:
- const TensorEvaluator& m_impl;
- const typename TensorEvaluator::Index m_maxIndex;
-};
-
-template<typename TargetType, typename XprType>
-class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
- typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
- typedef typename internal::traits<TensorConversionOp>::Index Index;
- typedef typename internal::nested<TensorConversionOp>::type Nested;
- typedef Scalar CoeffReturnType;
- typedef typename NumTraits<Scalar>::Real RealScalar;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr)
- : m_xpr(xpr) {}
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
-};
-
-template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) {
- impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
-};
-
-template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) {
- return impl.evalSubExprsIfNeeded(data);
- }
-};
-
-
-// Eval as rvalue
-template<typename TargetType, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
-{
- typedef TensorConversionOp<TargetType, ArgType> XprType;
- typedef typename XprType::Index Index;
- typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
- typedef TargetType Scalar;
- typedef TargetType CoeffReturnType;
- typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef typename PacketType<SrcType, Device>::type PacketSourceType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = true,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device)
- {
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data)
- {
- return ConversionSubExprEval<internal::is_same<TargetType, SrcType>::value, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
- {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- internal::scalar_cast_op<SrcType, TargetType> converter;
- return converter(m_impl.coeff(index));
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
- internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
- return PacketConv<LoadMode, Vectorizable>::run(m_impl, index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
- if (vectorized) {
- const double SrcCoeffRatio =
- internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
- const double TgtCoeffRatio =
- internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
- return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
- TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
- } else {
- return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
- }
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
- template <int LoadMode, bool ActuallyVectorize>
- struct PacketConv {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
- internal::scalar_cast_op<SrcType, TargetType> converter;
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = converter(impl.coeff(index+i));
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
- };
-
- template <int LoadMode>
- struct PacketConv<LoadMode, true> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
- const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
- const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
- PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
- SrcCoeffRatio, TgtCoeffRatio> converter(impl);
- return converter.template packet<LoadMode>(index);
- }
- };
-
- TensorEvaluator<ArgType, Device> m_impl;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
deleted file mode 100644
index abdf742..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ /dev/null
@@ -1,1104 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
-
-namespace Eigen {
-
-/** \class TensorConvolution
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor convolution class.
- *
- *
- */
-namespace internal {
-
-template <typename Index, typename InputDims, int NumKernelDims, int Layout>
-class IndexMapper {
- public:
- IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
- const array<Index, NumKernelDims>& indices) {
-
- array<Index, NumDims> dimensions = input_dims;
- for (int i = 0; i < NumKernelDims; ++i) {
- const Index index = indices[i];
- const Index input_dim = input_dims[index];
- const Index kernel_dim = kernel_dims[i];
- const Index result_dim = input_dim - kernel_dim + 1;
- dimensions[index] = result_dim;
- }
-
- array<Index, NumDims> inputStrides;
- array<Index, NumDims> outputStrides;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- inputStrides[0] = 1;
- outputStrides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- inputStrides[i] = inputStrides[i-1] * input_dims[i-1];
- outputStrides[i] = outputStrides[i-1] * dimensions[i-1];
- }
- } else {
- inputStrides[NumDims - 1] = 1;
- outputStrides[NumDims - 1] = 1;
- for (int i = static_cast<int>(NumDims) - 2; i >= 0; --i) {
- inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1];
- outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1];
- }
- }
-
- array<Index, NumDims> cudaInputDimensions;
- array<Index, NumDims> cudaOutputDimensions;
- array<Index, NumDims> tmp = dimensions;
- array<Index, NumDims> ordering;
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? 0
- : NumDims - NumKernelDims;
- for (int i = 0; i < NumKernelDims; ++i) {
- const Index index = i + offset;
- ordering[index] = indices[i];
- tmp[indices[i]] = -1;
- cudaInputDimensions[index] = input_dims[indices[i]];
- cudaOutputDimensions[index] = dimensions[indices[i]];
- }
-
- int written = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? NumKernelDims
- : 0;
- for (int i = 0; i < NumDims; ++i) {
- if (tmp[i] >= 0) {
- ordering[written] = i;
- cudaInputDimensions[written] = input_dims[i];
- cudaOutputDimensions[written] = dimensions[i];
- ++written;
- }
- }
-
- for (int i = 0; i < NumDims; ++i) {
- m_inputStrides[i] = inputStrides[ordering[i]];
- m_outputStrides[i] = outputStrides[ordering[i]];
- }
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = 0; i < NumDims; ++i) {
- if (i > NumKernelDims) {
- m_cudaInputStrides[i] =
- m_cudaInputStrides[i - 1] * cudaInputDimensions[i - 1];
- m_cudaOutputStrides[i] =
- m_cudaOutputStrides[i - 1] * cudaOutputDimensions[i - 1];
- } else {
- m_cudaInputStrides[i] = 1;
- m_cudaOutputStrides[i] = 1;
- }
- }
- } else {
- for (int i = NumDims - 1; i >= 0; --i) {
- if (i + 1 < offset) {
- m_cudaInputStrides[i] =
- m_cudaInputStrides[i + 1] * cudaInputDimensions[i + 1];
- m_cudaOutputStrides[i] =
- m_cudaOutputStrides[i + 1] * cudaOutputDimensions[i + 1];
- } else {
- m_cudaInputStrides[i] = 1;
- m_cudaOutputStrides[i] = 1;
- }
- }
- }
- }
-
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputPlaneToTensorInputOffset(Index p) const {
- Index inputIndex = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int d = NumDims - 1; d > NumKernelDims; --d) {
- const Index idx = p / m_cudaInputStrides[d];
- inputIndex += idx * m_inputStrides[d];
- p -= idx * m_cudaInputStrides[d];
- }
- inputIndex += p * m_inputStrides[NumKernelDims];
- } else {
- std::ptrdiff_t limit = 0;
- if (NumKernelDims < NumDims) {
- limit = NumDims - NumKernelDims - 1;
- }
- for (int d = 0; d < limit; ++d) {
- const Index idx = p / m_cudaInputStrides[d];
- inputIndex += idx * m_inputStrides[d];
- p -= idx * m_cudaInputStrides[d];
- }
- inputIndex += p * m_inputStrides[limit];
- }
- return inputIndex;
- }
-
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputPlaneToTensorOutputOffset(Index p) const {
- Index outputIndex = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int d = NumDims - 1; d > NumKernelDims; --d) {
- const Index idx = p / m_cudaOutputStrides[d];
- outputIndex += idx * m_outputStrides[d];
- p -= idx * m_cudaOutputStrides[d];
- }
- outputIndex += p * m_outputStrides[NumKernelDims];
- } else {
- std::ptrdiff_t limit = 0;
- if (NumKernelDims < NumDims) {
- limit = NumDims - NumKernelDims - 1;
- }
- for (int d = 0; d < limit; ++d) {
- const Index idx = p / m_cudaOutputStrides[d];
- outputIndex += idx * m_outputStrides[d];
- p -= idx * m_cudaOutputStrides[d];
- }
- outputIndex += p * m_outputStrides[limit];
- }
- return outputIndex;
- }
-
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i) const {
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? 0
- : NumDims - NumKernelDims;
- return i * m_inputStrides[offset];
- }
-
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i) const {
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? 0
- : NumDims - NumKernelDims;
- return i * m_outputStrides[offset];
- }
-
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j) const {
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? 0
- : NumDims - NumKernelDims;
- return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1];
- }
-
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j) const {
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? 0
- : NumDims - NumKernelDims;
- return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1];
- }
-
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j, Index k) const {
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? 0
- : NumDims - NumKernelDims;
- return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] +
- k * m_inputStrides[offset + 2];
- }
-
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const {
- const size_t offset = static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? 0
- : NumDims - NumKernelDims;
- return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] +
- k * m_outputStrides[offset + 2];
- }
-
- private:
- static const int NumDims = internal::array_size<InputDims>::value;
- array<Index, NumDims> m_inputStrides;
- array<Index, NumDims> m_outputStrides;
- array<Index, NumDims> m_cudaInputStrides;
- array<Index, NumDims> m_cudaOutputStrides;
-};
-
-
-
-template<typename Dimensions, typename InputXprType, typename KernelXprType>
-struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >
-{
- // Type promotion to handle the case where the types of the lhs and the rhs are different.
- typedef typename promote_storage_type<typename InputXprType::Scalar,
- typename KernelXprType::Scalar>::ret Scalar;
- typedef typename promote_storage_type<typename traits<InputXprType>::StorageKind,
- typename traits<KernelXprType>::StorageKind>::ret StorageKind;
- typedef typename promote_index_type<typename traits<InputXprType>::Index,
- typename traits<KernelXprType>::Index>::type Index;
- typedef typename InputXprType::Nested LhsNested;
- typedef typename KernelXprType::Nested RhsNested;
- typedef typename remove_reference<LhsNested>::type _LhsNested;
- typedef typename remove_reference<RhsNested>::type _RhsNested;
- static const int NumDimensions = traits<InputXprType>::NumDimensions;
- static const int Layout = traits<InputXprType>::Layout;
-
- enum {
- Flags = 0
- };
-};
-
-template<typename Dimensions, typename InputXprType, typename KernelXprType>
-struct eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, Eigen::Dense>
-{
- typedef const TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>& type;
-};
-
-template<typename Dimensions, typename InputXprType, typename KernelXprType>
-struct nested<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType>, 1, typename eval<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >::type>
-{
- typedef TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename Indices, typename InputXprType, typename KernelXprType>
-class TensorConvolutionOp : public TensorBase<TensorConvolutionOp<Indices, InputXprType, KernelXprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename internal::promote_storage_type<typename InputXprType::CoeffReturnType,
- typename KernelXprType::CoeffReturnType>::ret CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorConvolutionOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorConvolutionOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims)
- : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {}
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const Indices& indices() const { return m_indices; }
-
- /** \returns the nested expressions */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const typename internal::remove_all<typename InputXprType::Nested>::type&
- inputExpression() const { return m_input_xpr; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const typename internal::remove_all<typename KernelXprType::Nested>::type&
- kernelExpression() const { return m_kernel_xpr; }
-
- protected:
- typename InputXprType::Nested m_input_xpr;
- typename KernelXprType::Nested m_kernel_xpr;
- const Indices m_indices;
-};
-
-
-template<typename Indices, typename InputArgType, typename KernelArgType, typename Device>
-struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, Device>
-{
- typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
-
- static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
- static const int NumKernelDims = internal::array_size<Indices>::value;
- typedef typename XprType::Index Index;
- typedef DSizes<Index, NumDims> Dimensions;
-
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<InputArgType, Device>::PacketAccess & TensorEvaluator<KernelArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<InputArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device)
- {
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- const typename TensorEvaluator<InputArgType, Device>::Dimensions& input_dims = m_inputImpl.dimensions();
- const typename TensorEvaluator<KernelArgType, Device>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_inputStride[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1];
- }
- } else {
- m_inputStride[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1];
- }
- }
-
- m_dimensions = m_inputImpl.dimensions();
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = 0; i < NumKernelDims; ++i) {
- const Index index = op.indices()[i];
- const Index input_dim = input_dims[index];
- const Index kernel_dim = kernel_dims[i];
- const Index result_dim = input_dim - kernel_dim + 1;
- m_dimensions[index] = result_dim;
- if (i > 0) {
- m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1];
- } else {
- m_kernelStride[0] = 1;
- }
- m_indexStride[i] = m_inputStride[index];
- }
-
- m_outputStride[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1];
- }
- } else {
- for (int i = NumKernelDims - 1; i >= 0; --i) {
- const Index index = op.indices()[i];
- const Index input_dim = input_dims[index];
- const Index kernel_dim = kernel_dims[i];
- const Index result_dim = input_dim - kernel_dim + 1;
- m_dimensions[index] = result_dim;
- if (i < NumKernelDims - 1) {
- m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1];
- } else {
- m_kernelStride[NumKernelDims - 1] = 1;
- }
- m_indexStride[i] = m_inputStride[index];
- }
-
- m_outputStride[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
- m_inputImpl.evalSubExprsIfNeeded(NULL);
- preloadKernel();
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_inputImpl.cleanup();
- if (m_local_kernel) {
- m_device.deallocate((void*)m_kernel);
- m_local_kernel = false;
- }
- m_kernel = NULL;
- }
-
- void evalTo(typename XprType::Scalar* buffer) {
- evalSubExprsIfNeeded(NULL);
- for (int i = 0; i < dimensions().TotalSize(); ++i) {
- buffer[i] += coeff(i);
- }
- cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- CoeffReturnType result = CoeffReturnType(0);
- convolve(firstInput(index), 0, NumKernelDims-1, result);
- return result;
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const
- {
- Index indices[2] = {index, index+PacketSize-1};
- Index startInputs[2] = {0, 0};
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx0 = indices[0] / m_outputStride[i];
- const Index idx1 = indices[1] / m_outputStride[i];
- startInputs[0] += idx0 * m_inputStride[i];
- startInputs[1] += idx1 * m_inputStride[i];
- indices[0] -= idx0 * m_outputStride[i];
- indices[1] -= idx1 * m_outputStride[i];
- }
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx0 = indices[0] / m_outputStride[i];
- const Index idx1 = indices[1] / m_outputStride[i];
- startInputs[0] += idx0 * m_inputStride[i];
- startInputs[1] += idx1 * m_inputStride[i];
- indices[0] -= idx0 * m_outputStride[i];
- indices[1] -= idx1 * m_outputStride[i];
- }
- }
- startInputs[0] += indices[0];
- startInputs[1] += indices[1];
-
- if (startInputs[1]-startInputs[0] == PacketSize-1) {
- PacketReturnType result = internal::pset1<PacketReturnType>(0);
- convolvePacket(startInputs[0], 0, NumKernelDims-1, result);
- return result;
- } else {
- EIGEN_ALIGN_MAX Scalar data[PacketSize];
- data[0] = Scalar(0);
- convolve(startInputs[0], 0, NumKernelDims-1, data[0]);
- for (int i = 1; i < PacketSize-1; ++i) {
- data[i] = Scalar(0);
- convolve(firstInput(index+i), 0, NumKernelDims-1, data[i]);
- }
- data[PacketSize-1] = Scalar(0);
- convolve(startInputs[1], 0, NumKernelDims-1, data[PacketSize-1]);
- return internal::pload<PacketReturnType>(data);
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- const double kernel_size = m_kernelImpl.dimensions().TotalSize();
- // We ignore the use of fused multiply-add.
- const double convolve_compute_cost =
- TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
- const double firstIndex_compute_cost =
- NumDims *
- (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
- TensorOpCost::DivCost<Index>());
- return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) +
- kernel_size * (m_inputImpl.costPerCoeff(vectorized) +
- m_kernelImpl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, convolve_compute_cost, vectorized,
- PacketSize));
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- private:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
- Index startInput = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_outputStride[i];
- startInput += idx * m_inputStride[i];
- index -= idx * m_outputStride[i];
- }
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = index / m_outputStride[i];
- startInput += idx * m_inputStride[i];
- index -= idx * m_outputStride[i];
- }
- }
- startInput += index;
- return startInput;
- }
-
- EIGEN_DEVICE_FUNC void convolve(Index firstIndex, Index firstKernel, int DimIndex, CoeffReturnType& accum) const {
- for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) {
- const Index input = firstIndex + j * m_indexStride[DimIndex];
- const Index kernel = firstKernel + j * m_kernelStride[DimIndex];
- if (DimIndex > 0) {
- convolve(input, kernel, DimIndex-1, accum);
- } else {
- accum += m_inputImpl.coeff(input) * m_kernel[kernel];
- }
- }
- }
-
- template <typename Packet>
- EIGEN_DEVICE_FUNC void convolvePacket(Index firstIndex, Index firstKernel, int DimIndex, Packet& accum) const {
- for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) {
- const Index input = firstIndex + j * m_indexStride[DimIndex];
- const Index kernel = firstKernel + j * m_kernelStride[DimIndex];
- if (DimIndex > 0) {
- convolvePacket(input, kernel, DimIndex-1, accum);
- } else {
- accum = internal::pmadd<Packet>(m_inputImpl.template packet<Unaligned>(input), internal::pset1<Packet>(m_kernel[kernel]), accum);
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() {
- // Don't make a local copy of the kernel unless we have to (i.e. it's an
- // expression that needs to be evaluated)
- const Scalar* in_place = m_kernelImpl.data();
- if (in_place) {
- m_kernel = in_place;
- m_local_kernel = false;
- } else {
- size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
- Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
- typedef TensorEvalToOp<const KernelArgType> EvalTo;
- EvalTo evalToTmp(local, m_kernelArg);
- const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value;
- internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
-
- m_kernel = local;
- m_local_kernel = true;
- }
- }
-
- array<Index, NumDims> m_inputStride;
- array<Index, NumDims> m_outputStride;
-
- array<Index, NumKernelDims> m_indexStride;
- array<Index, NumKernelDims> m_kernelStride;
- TensorEvaluator<InputArgType, Device> m_inputImpl;
- TensorEvaluator<KernelArgType, Device> m_kernelImpl;
- Dimensions m_dimensions;
-
- KernelArgType m_kernelArg;
- const Scalar* m_kernel;
- bool m_local_kernel;
- const Device& m_device;
-};
-
-
-
-
-// Use an optimized implementation of the evaluation code for GPUs whenever possible.
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
-
-template <int StaticKernelSize>
-struct GetKernelSize {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int /*kernelSize*/) const {
- return StaticKernelSize;
- }
-};
-template <>
-struct GetKernelSize<Dynamic> {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int kernelSize) const {
- return kernelSize;
- }
-};
-
-template <typename InputEvaluator, typename Index, typename InputDims,
- int StaticKernelSize>
-__global__ void EigenConvolutionKernel1D(
- InputEvaluator eval,
- const internal::IndexMapper<Index, InputDims, 1, InputEvaluator::Layout>
- indexMapper,
- const float* __restrict kernel, const int numPlanes, const int numX,
- const int maxX, const int kernelSize, float* buffer) {
- extern __shared__ float s[];
-
- const int first_x = blockIdx.x * maxX;
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
- const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSize>()(kernelSize);
- const int num_x_output = last_x - first_x + 1;
-
- const int first_plane = blockIdx.y * blockDim.y;
- const int plane_stride = blockDim.y * gridDim.y;
-
- for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) {
- // Load inputs to shared memory
- const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p);
- const int plane_kernel_offset = threadIdx.y * num_x_input;
- #pragma unroll
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
- const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x);
- s[i + plane_kernel_offset] = eval.coeff(tensor_index);
- }
-
- __syncthreads();
-
- // Compute the convolution
- const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p);
-
- #pragma unroll
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
- const int kernel_offset = plane_kernel_offset + i;
- float result = 0.0f;
- #pragma unroll
- for (int k = 0; k < GetKernelSize<StaticKernelSize>()(kernelSize); ++k) {
- result += s[k + kernel_offset] * kernel[k];
- }
- const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x);
- buffer[tensor_index] = result;
- }
- __syncthreads();
- }
-};
-
-template <typename InputEvaluator, typename Index, typename InputDims,
- int StaticKernelSizeX, int StaticKernelSizeY>
-__global__ void EigenConvolutionKernel2D(
- InputEvaluator eval,
- const internal::IndexMapper<Index, InputDims, 2, InputEvaluator::Layout>
- indexMapper,
- const float* __restrict kernel, const int numPlanes, const int numX,
- const int maxX, const int numY, const int maxY, const int kernelSizeX,
- const int kernelSizeY, float* buffer) {
- extern __shared__ float s[];
-
- const int first_x = blockIdx.x * maxX;
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
- const int num_x_input = last_x - first_x + GetKernelSize<StaticKernelSizeX>()(kernelSizeX);
- const int num_x_output = last_x - first_x + 1;
-
- const int first_y = blockIdx.y * maxY;
- const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1;
- const int num_y_input = last_y - first_y + GetKernelSize<StaticKernelSizeY>()(kernelSizeY);
- const int num_y_output = last_y - first_y + 1;
-
- const int first_plane = blockIdx.z * blockDim.z;
- const int plane_stride = blockDim.z * gridDim.z;
-
- for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) {
-
- const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p);
- const int plane_kernel_offset = threadIdx.z * num_y_input;
-
- // Load inputs to shared memory
- #pragma unroll
- for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
- const int input_offset = num_x_input * (j + plane_kernel_offset);
- #pragma unroll
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
- const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y);
- s[i + input_offset] = eval.coeff(tensor_index);
- }
- }
-
- __syncthreads();
-
- // Convolution
- const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p);
-
- #pragma unroll
- for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
- #pragma unroll
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
- float result = 0.0f;
- #pragma unroll
- for (int l = 0; l < GetKernelSize<StaticKernelSizeY>()(kernelSizeY); ++l) {
- const int kernel_offset = kernelSizeX * l;
- const int input_offset = i + num_x_input * (j + l + plane_kernel_offset);
- #pragma unroll
- for (int k = 0; k < GetKernelSize<StaticKernelSizeX>()(kernelSizeX); ++k) {
- result += s[k + input_offset] * kernel[k + kernel_offset];
- }
- }
- const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y);
- buffer[tensor_index] = result;
- }
- }
-
- __syncthreads();
- }
-};
-
-template <typename InputEvaluator, typename Index, typename InputDims>
-__global__ void EigenConvolutionKernel3D(
- InputEvaluator eval,
- const internal::IndexMapper<Index, InputDims, 3, InputEvaluator::Layout>
- indexMapper,
- const float* __restrict kernel, const size_t numPlanes, const size_t numX,
- const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ,
- const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY,
- const size_t kernelSizeZ, float* buffer) {
- extern __shared__ float s[];
-
- // Load inputs to shared memory
- const int first_x = blockIdx.x * maxX;
- const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1;
- const int num_x_input = last_x - first_x + kernelSizeX;
-
- const int first_y = blockIdx.y * maxY;
- const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1;
- const int num_y_input = last_y - first_y + kernelSizeY;
-
- const int first_z = blockIdx.z * maxZ;
- const int last_z = (first_z + maxZ < numZ ? first_z + maxZ : numZ) - 1;
- const int num_z_input = last_z - first_z + kernelSizeZ;
-
- for (int p = 0; p < numPlanes; ++p) {
-
- const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p);
- const int plane_kernel_offset = 0;
-
- for (int k = threadIdx.z; k < num_z_input; k += blockDim.z) {
- for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) {
- for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) {
- const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y, k+first_z);
- s[i + num_x_input * (j + num_y_input * (k + plane_kernel_offset))] = eval.coeff(tensor_index);
- }
- }
- }
-
- __syncthreads();
-
- // Convolution
- const int num_z_output = last_z - first_z + 1;
- const int num_y_output = last_y - first_y + 1;
- const int num_x_output = last_x - first_x + 1;
- const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p);
-
- for (int k = threadIdx.z; k < num_z_output; k += blockDim.z) {
- for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) {
- for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) {
- float result = 0.0f;
- for (int n = 0; n < kernelSizeZ; ++n) {
- for (int m = 0; m < kernelSizeY; ++m) {
- for (int l = 0; l < kernelSizeX; ++l) {
- result += s[i + l + num_x_input * (j + m + num_y_input * (k + n + plane_kernel_offset))] * kernel[l + kernelSizeX * (m + kernelSizeY * n)];
- }
- }
- }
- const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y, k+first_z);
- buffer[tensor_index] = result;
- }
- }
- }
- __syncthreads();
- }
-};
-
-
-
-template<typename Indices, typename InputArgType, typename KernelArgType>
-struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelArgType>, GpuDevice>
-{
- typedef TensorConvolutionOp<Indices, InputArgType, KernelArgType> XprType;
-
- static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions>::value;
- static const int NumKernelDims = internal::array_size<Indices>::value;
- typedef typename XprType::Index Index;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions KernelDimensions;
-
- enum {
- IsAligned = TensorEvaluator<InputArgType, GpuDevice>::IsAligned & TensorEvaluator<KernelArgType, GpuDevice>::IsAligned,
- PacketAccess = false,
- Layout = TensorEvaluator<InputArgType, GpuDevice>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device)
- : m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
- {
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- const typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions& input_dims = m_inputImpl.dimensions();
- const typename TensorEvaluator<KernelArgType, GpuDevice>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
-
- m_dimensions = m_inputImpl.dimensions();
- for (int i = 0; i < NumKernelDims; ++i) {
- const Index index = op.indices()[i];
- const Index input_dim = input_dims[index];
- const Index kernel_dim = kernel_dims[i];
- const Index result_dim = input_dim - kernel_dim + 1;
- m_dimensions[index] = result_dim;
- }
- }
-
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, GpuDevice>::type PacketReturnType;
- typedef typename InputArgType::Scalar Scalar;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
- preloadKernel();
- m_inputImpl.evalSubExprsIfNeeded(NULL);
- if (data) {
- executeEval(data);
- return false;
- } else {
- m_buf = (Scalar*)m_device.allocate(dimensions().TotalSize() * sizeof(Scalar));
- executeEval(m_buf);
- return true;
- }
- }
-
- EIGEN_STRONG_INLINE void cleanup() {
- m_inputImpl.cleanup();
- if (m_buf) {
- m_device.deallocate(m_buf);
- m_buf = NULL;
- }
- if (m_local_kernel) {
- m_device.deallocate((void*)m_kernel);
- m_local_kernel = false;
- }
- m_kernel = NULL;
- }
-
- EIGEN_STRONG_INLINE void preloadKernel() {
- // Don't make a local copy of the kernel unless we have to (i.e. it's an
- // expression that needs to be evaluated)
- const Scalar* in_place = m_kernelImpl.data();
- if (in_place) {
- m_kernel = in_place;
- m_local_kernel = false;
- } else {
- size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
- Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
- typedef TensorEvalToOp<const KernelArgType> EvalTo;
- EvalTo evalToTmp(local, m_kernelArg);
- const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value;
- internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device);
-
- m_kernel = local;
- m_local_kernel = true;
- }
- }
-
- static unsigned int ceil(unsigned int num, unsigned int denom) {
- const unsigned int rounded_toward_zero = num / denom;
- if (num > rounded_toward_zero * denom) {
- return rounded_toward_zero + 1;
- }
- return rounded_toward_zero;
- }
-
- void executeEval(Scalar* data) const {
- typedef typename TensorEvaluator<InputArgType, GpuDevice>::Dimensions InputDims;
-
- const int maxSharedMem = m_device.sharedMemPerBlock();
- const int maxThreadsPerBlock = m_device.maxCudaThreadsPerBlock();
- const int maxBlocksPerProcessor = m_device.maxCudaThreadsPerMultiProcessor() / maxThreadsPerBlock;
- const int numMultiProcessors = m_device.getNumCudaMultiProcessors();
- const int warpSize = 32;
-
- switch (NumKernelDims) {
- case 1: {
- const int kernel_size = m_kernelImpl.dimensions().TotalSize();
-
- const int numX = dimensions()[m_indices[0]];
- const int numP = dimensions().TotalSize() / numX;
- int maxX;
- dim3 block_size;
-
- const int single_stride_dim =
- static_cast<int>(Layout) == static_cast<int>(ColMajor)
- ? 0
- : m_inputImpl.dimensions().rank() - 1;
- if (m_indices[0] == single_stride_dim) {
- // Maximum the reuse
- const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32;
- maxX = numext::mini<int>(inner_dim, numX);
- const int maxP = numext::mini<int>(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP);
- block_size.x = numext::mini(maxThreadsPerBlock, maxX);
- block_size.y = numext::mini<int>(maxThreadsPerBlock / block_size.x, maxP);
- }
- else {
- // Read as much as possible alongside the inner most dimension, that is the plane
- const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar));
- const int maxP = numext::mini<int>(inner_dim, numP);
- maxX = numext::mini<int>(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX);
-
- block_size.x = numext::mini(warpSize, maxX);
- block_size.y = numext::mini<int>(maxThreadsPerBlock/block_size.x, maxP);
- }
-
- const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar);
- assert(shared_mem <= maxSharedMem);
-
- const int num_x_blocks = ceil(numX, maxX);
- const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
- const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks);
-
- dim3 num_blocks(num_x_blocks, numext::mini<int>(num_y_blocks, ceil(numP, block_size.y)));
-
-
- //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
-
- const array<Index, 1> indices(m_indices[0]);
- const array<Index, 1> kernel_dims(m_kernelImpl.dimensions()[0]);
- internal::IndexMapper<Index, InputDims, 1, Layout> indexMapper(
- m_inputImpl.dimensions(), kernel_dims, indices);
- switch(kernel_size) {
- case 4: {
- LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data);
- break;
- }
- case 7: {
- LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 7, data);
- break;
- }
- default: {
- LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, kernel_size, data);
- }
- }
- break;
- }
-
- case 2: {
- const int idxX =
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 1;
- const int idxY =
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 0;
- const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
- const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
-
- const int numX = dimensions()[m_indices[idxX]];
- const int numY = dimensions()[m_indices[idxY]];
- const int numP = dimensions().TotalSize() / (numX*numY);
-
- const float scaling_factor = sqrtf(static_cast<float>(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x));
-
- // Snap maxX to warp size
- int inner_dim = ((static_cast<int>(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32;
- const int maxX = numext::mini<int>(inner_dim, numX);
- const int maxY = numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY);
- const int maxP = numext::mini<int>(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP);
-
- dim3 block_size;
- block_size.x = numext::mini(1024, maxX);
- block_size.y = numext::mini<int>(1024/block_size.x, maxY);
- block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxP);
-
- const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar);
- assert(shared_mem <= maxSharedMem);
-
- const int num_x_blocks = ceil(numX, maxX);
- const int num_y_blocks = ceil(numY, maxY);
- const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem);
- const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks);
-
- dim3 num_blocks(num_x_blocks, num_y_blocks, numext::mini<int>(num_z_blocks, ceil(numP, block_size.z)));
-
-
- //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
-
- const array<Index, 2> indices(m_indices[idxX], m_indices[idxY]);
- const array<Index, 2> kernel_dims(m_kernelImpl.dimensions()[idxX],
- m_kernelImpl.dimensions()[idxY]);
- internal::IndexMapper<Index, InputDims, 2, Layout> indexMapper(
- m_inputImpl.dimensions(), kernel_dims, indices);
- switch (kernel_size_x) {
- case 4: {
- switch (kernel_size_y) {
- case 7: {
- LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, 7, data);
- break;
- }
- default: {
- LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 4, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, kernel_size_y, data);
- break;
- }
- }
- break;
- }
- case 7: {
- switch (kernel_size_y) {
- case 4: {
- LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, 4, data);
- break;
- }
- default: {
- LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, 7, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, kernel_size_y, data);
- break;
- }
- }
- break;
- }
- default: {
- LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims, Dynamic, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, kernel_size_x, kernel_size_y, data);
- break;
- }
- }
- break;
- }
-
- case 3: {
- const int idxX =
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : 2;
- const int idxY =
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 1;
- const int idxZ =
- static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 0;
-
- const int kernel_size_x = m_kernelImpl.dimensions()[idxX];
- const int kernel_size_y = m_kernelImpl.dimensions()[idxY];
- const int kernel_size_z = m_kernelImpl.dimensions()[idxZ];
-
- const int numX = dimensions()[m_indices[idxX]];
- const int numY = dimensions()[m_indices[idxY]];
- const int numZ = dimensions()[m_indices[idxZ]];
- const int numP = dimensions().TotalSize() / (numX*numY*numZ);
-
- const int maxX = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX));
- const int maxY = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY));
- const int maxZ = numext::mini<int>(128, numext::mini<int>(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ));
-
- dim3 block_size;
- block_size.x = numext::mini(32, maxX);
- block_size.y = numext::mini(32, maxY);
- block_size.z = numext::mini<int>(1024/(block_size.x*block_size.y), maxZ);
- dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ));
-
- const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar);
- assert(shared_mem <= maxSharedMem);
-
- //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl;
- const array<Index, 3> indices(m_indices[idxX], m_indices[idxY],
- m_indices[idxZ]);
- const array<Index, 3> kernel_dims(m_kernelImpl.dimensions()[idxX],
- m_kernelImpl.dimensions()[idxY],
- m_kernelImpl.dimensions()[idxZ]);
- internal::IndexMapper<Index, InputDims, 3, Layout> indexMapper(
- m_inputImpl.dimensions(), kernel_dims, indices);
-
- LAUNCH_CUDA_KERNEL((EigenConvolutionKernel3D<TensorEvaluator<InputArgType, GpuDevice>, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data);
- break;
- }
-
- default: {
- EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE);
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- eigen_assert(m_buf);
- eigen_assert(index < m_dimensions.TotalSize());
- return m_buf[index];
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const
- {
- eigen_assert(m_buf);
- eigen_assert(index < m_dimensions.TotalSize());
- return internal::ploadt<PacketReturnType, LoadMode>(m_buf+index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost
- // model.
- const double kernel_size = m_kernelImpl.dimensions().TotalSize();
- // We ignore the use of fused multiply-add.
- const double convolve_compute_cost =
- TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
- const double firstIndex_compute_cost =
- NumDims *
- (2 * TensorOpCost::AddCost<Index>() + 2 * TensorOpCost::MulCost<Index>() +
- TensorOpCost::DivCost<Index>());
- return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) +
- kernel_size * (m_inputImpl.costPerCoeff(vectorized) +
- m_kernelImpl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, convolve_compute_cost, vectorized,
- PacketSize));
- }
-
- private:
- // No assignment (copies are needed by the kernels)
- TensorEvaluator& operator = (const TensorEvaluator&);
-
- TensorEvaluator<InputArgType, GpuDevice> m_inputImpl;
- TensorEvaluator<KernelArgType, GpuDevice> m_kernelImpl;
- KernelArgType m_kernelArg;
- Indices m_indices;
- Dimensions m_dimensions;
- Scalar* m_buf;
- const Scalar* m_kernel;
- bool m_local_kernel;
-
- const GpuDevice& m_device;
-};
-#endif
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
deleted file mode 100644
index 83c449c..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
+++ /dev/null
@@ -1,212 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016 Rasmus Munk Larsen <rmlarsen@google.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
-#define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
-
-namespace Eigen {
-
-/** \class TensorEvaluator
- * \ingroup CXX11_Tensor_Module
- *
- * \brief A cost model used to limit the number of threads used for evaluating
- * tensor expression.
- *
- */
-
-// Class storing the cost of evaluating a tensor expression in terms of the
-// estimated number of operand bytes loads, bytes stored, and compute cycles.
-class TensorOpCost {
- public:
- // TODO(rmlarsen): Fix the scalar op costs in Eigen proper. Even a simple
- // model based on minimal reciprocal throughput numbers from Intel or
- // Agner Fog's tables would be better than what is there now.
- template <typename ArgType>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() {
- return internal::functor_traits<
- internal::scalar_product_op<ArgType, ArgType> >::Cost;
- }
- template <typename ArgType>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost() {
- return internal::functor_traits<internal::scalar_sum_op<ArgType> >::Cost;
- }
- template <typename ArgType>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost() {
- return internal::functor_traits<
- internal::scalar_quotient_op<ArgType, ArgType> >::Cost;
- }
- template <typename ArgType>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost() {
- return internal::functor_traits<internal::scalar_mod_op<ArgType> >::Cost;
- }
- template <typename SrcType, typename TargetType>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost() {
- return internal::functor_traits<
- internal::scalar_cast_op<SrcType, TargetType> >::Cost;
- }
-
- EIGEN_DEVICE_FUNC
- TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {}
- EIGEN_DEVICE_FUNC
- TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles)
- : bytes_loaded_(bytes_loaded),
- bytes_stored_(bytes_stored),
- compute_cycles_(compute_cycles) {}
-
- EIGEN_DEVICE_FUNC
- TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles,
- bool vectorized, double packet_size)
- : bytes_loaded_(bytes_loaded),
- bytes_stored_(bytes_stored),
- compute_cycles_(vectorized ? compute_cycles / packet_size
- : compute_cycles) {
- eigen_assert(bytes_loaded >= 0 && (numext::isfinite)(bytes_loaded));
- eigen_assert(bytes_stored >= 0 && (numext::isfinite)(bytes_stored));
- eigen_assert(compute_cycles >= 0 && (numext::isfinite)(compute_cycles));
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_loaded() const {
- return bytes_loaded_;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_stored() const {
- return bytes_stored_;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double compute_cycles() const {
- return compute_cycles_;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double total_cost(
- double load_cost, double store_cost, double compute_cost) const {
- return load_cost * bytes_loaded_ + store_cost * bytes_stored_ +
- compute_cost * compute_cycles_;
- }
-
- // Drop memory access component. Intended for cases when memory accesses are
- // sequential or are completely masked by computations.
- EIGEN_DEVICE_FUNC void dropMemoryCost() {
- bytes_loaded_ = 0;
- bytes_stored_ = 0;
- }
-
- // TODO(rmlarsen): Define min in terms of total cost, not elementwise.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin(
- const TensorOpCost& rhs) const {
- double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded());
- double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored());
- double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles());
- return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles);
- }
-
- // TODO(rmlarsen): Define max in terms of total cost, not elementwise.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax(
- const TensorOpCost& rhs) const {
- double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded());
- double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored());
- double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles());
- return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator+=(
- const TensorOpCost& rhs) {
- bytes_loaded_ += rhs.bytes_loaded();
- bytes_stored_ += rhs.bytes_stored();
- compute_cycles_ += rhs.compute_cycles();
- return *this;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator*=(double rhs) {
- bytes_loaded_ *= rhs;
- bytes_stored_ *= rhs;
- compute_cycles_ *= rhs;
- return *this;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator+(
- TensorOpCost lhs, const TensorOpCost& rhs) {
- lhs += rhs;
- return lhs;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*(
- TensorOpCost lhs, double rhs) {
- lhs *= rhs;
- return lhs;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*(
- double lhs, TensorOpCost rhs) {
- rhs *= lhs;
- return rhs;
- }
-
- friend std::ostream& operator<<(std::ostream& os, const TensorOpCost& tc) {
- return os << "[bytes_loaded = " << tc.bytes_loaded()
- << ", bytes_stored = " << tc.bytes_stored()
- << ", compute_cycles = " << tc.compute_cycles() << "]";
- }
-
- private:
- double bytes_loaded_;
- double bytes_stored_;
- double compute_cycles_;
-};
-
-// TODO(rmlarsen): Implement a policy that chooses an "optimal" number of theads
-// in [1:max_threads] instead of just switching multi-threading off for small
-// work units.
-template <typename Device>
-class TensorCostModel {
- public:
- // Scaling from Eigen compute cost to device cycles.
- static const int kDeviceCyclesPerComputeCycle = 1;
-
- // Costs in device cycles.
- static const int kStartupCycles = 100000;
- static const int kPerThreadCycles = 100000;
- static const int kTaskSize = 40000;
-
- // Returns the number of threads in [1:max_threads] to use for
- // evaluating an expression with the given output size and cost per
- // coefficient.
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int numThreads(
- double output_size, const TensorOpCost& cost_per_coeff, int max_threads) {
- double cost = totalCost(output_size, cost_per_coeff);
- int threads = (cost - kStartupCycles) / kPerThreadCycles + 0.9;
- return numext::mini(max_threads, numext::maxi(1, threads));
- }
-
- // taskSize assesses parallel task size.
- // Value of 1.0 means ideal parallel task size. Values < 1.0 mean that task
- // granularity needs to be increased to mitigate parallelization overheads.
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double taskSize(
- double output_size, const TensorOpCost& cost_per_coeff) {
- return totalCost(output_size, cost_per_coeff) / kTaskSize;
- }
-
- private:
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double totalCost(
- double output_size, const TensorOpCost& cost_per_coeff) {
- // Cost of memory fetches from L2 cache. 64 is typical cache line size.
- // 11 is L2 cache latency on Haswell.
- // We don't know whether data is in L1, L2 or L3. But we are most interested
- // in single-threaded computational time around 100us-10ms (smaller time
- // is too small for parallelization, larger time is not intersting
- // either because we are probably using all available threads already).
- // And for the target time range, L2 seems to be what matters. Data set
- // fitting into L1 is too small to take noticeable time. Data set fitting
- // only into L3 presumably will take more than 10ms to load and process.
- const double kLoadCycles = 1.0 / 64 * 11;
- const double kStoreCycles = 1.0 / 64 * 11;
- // Scaling from Eigen compute cost to device cycles.
- return output_size *
- cost_per_coeff.total_cost(kLoadCycles, kStoreCycles,
- kDeviceCyclesPerComputeCycle);
- }
-};
-
-} // namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
deleted file mode 100644
index e020d07..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
+++ /dev/null
@@ -1,313 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
-
-namespace Eigen {
-
-/** \class TensorCustomUnaryOp
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor custom class.
- *
- *
- */
-namespace internal {
-template<typename CustomUnaryFunc, typename XprType>
-struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
-{
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::StorageKind StorageKind;
- typedef typename XprType::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = traits<XprType>::NumDimensions;
- static const int Layout = traits<XprType>::Layout;
-};
-
-template<typename CustomUnaryFunc, typename XprType>
-struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense>
-{
- typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type;
-};
-
-template<typename CustomUnaryFunc, typename XprType>
-struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
-{
- typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename CustomUnaryFunc, typename XprType>
-class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename internal::nested<TensorCustomUnaryOp>::type Nested;
- typedef typename internal::traits<TensorCustomUnaryOp>::StorageKind StorageKind;
- typedef typename internal::traits<TensorCustomUnaryOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func)
- : m_expr(expr), m_func(func) {}
-
- EIGEN_DEVICE_FUNC
- const CustomUnaryFunc& func() const { return m_func; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_expr; }
-
- protected:
- typename XprType::Nested m_expr;
- const CustomUnaryFunc m_func;
-};
-
-
-// Eval as rvalue
-template<typename CustomUnaryFunc, typename XprType, typename Device>
-struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Device>
-{
- typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType;
- typedef typename internal::traits<ArgType>::Index Index;
- static const int NumDims = internal::traits<ArgType>::NumDimensions;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename internal::remove_const<typename ArgType::Scalar>::type Scalar;
- typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = (internal::packet_traits<Scalar>::size > 1),
- BlockAccess = false,
- Layout = TensorEvaluator<XprType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device)
- : m_op(op), m_device(device), m_result(NULL)
- {
- m_dimensions = op.func().dimensions(op.expression());
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
- if (data) {
- evalTo(data);
- return false;
- } else {
- m_result = static_cast<CoeffReturnType*>(
- m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
- evalTo(m_result);
- return true;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- if (m_result != NULL) {
- m_device.deallocate(m_result);
- m_result = NULL;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
- return m_result[index];
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
- return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- // TODO(rmlarsen): Extend CustomOp API to return its cost estimate.
- return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }
-
- protected:
- EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
- TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(
- data, m_dimensions);
- m_op.func().eval(m_op.expression(), result, m_device);
- }
-
- Dimensions m_dimensions;
- const ArgType m_op;
- const Device& m_device;
- CoeffReturnType* m_result;
-};
-
-
-
-/** \class TensorCustomBinaryOp
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor custom class.
- *
- *
- */
-namespace internal {
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
-{
- typedef typename internal::promote_storage_type<typename LhsXprType::Scalar,
- typename RhsXprType::Scalar>::ret Scalar;
- typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType,
- typename RhsXprType::CoeffReturnType>::ret CoeffReturnType;
- typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
- typename traits<RhsXprType>::StorageKind>::ret StorageKind;
- typedef typename promote_index_type<typename traits<LhsXprType>::Index,
- typename traits<RhsXprType>::Index>::type Index;
- typedef typename LhsXprType::Nested LhsNested;
- typedef typename RhsXprType::Nested RhsNested;
- typedef typename remove_reference<LhsNested>::type _LhsNested;
- typedef typename remove_reference<RhsNested>::type _RhsNested;
- static const int NumDimensions = traits<LhsXprType>::NumDimensions;
- static const int Layout = traits<LhsXprType>::Layout;
-};
-
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Eigen::Dense>
-{
- typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type;
-};
-
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
-{
- typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
-class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename internal::traits<TensorCustomBinaryOp>::CoeffReturnType CoeffReturnType;
- typedef typename internal::nested<TensorCustomBinaryOp>::type Nested;
- typedef typename internal::traits<TensorCustomBinaryOp>::StorageKind StorageKind;
- typedef typename internal::traits<TensorCustomBinaryOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func)
-
- : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {}
-
- EIGEN_DEVICE_FUNC
- const CustomBinaryFunc& func() const { return m_func; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename LhsXprType::Nested>::type&
- lhsExpression() const { return m_lhs_xpr; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename RhsXprType::Nested>::type&
- rhsExpression() const { return m_rhs_xpr; }
-
- protected:
- typename LhsXprType::Nested m_lhs_xpr;
- typename RhsXprType::Nested m_rhs_xpr;
- const CustomBinaryFunc m_func;
-};
-
-
-// Eval as rvalue
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, typename Device>
-struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Device>
-{
- typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType;
- typedef typename internal::traits<XprType>::Index Index;
- static const int NumDims = internal::traits<XprType>::NumDimensions;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = (internal::packet_traits<Scalar>::size > 1),
- BlockAccess = false,
- Layout = TensorEvaluator<LhsXprType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_op(op), m_device(device), m_result(NULL)
- {
- m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression());
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
- if (data) {
- evalTo(data);
- return false;
- } else {
- m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
- evalTo(m_result);
- return true;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- if (m_result != NULL) {
- m_device.deallocate(m_result);
- m_result = NULL;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
- return m_result[index];
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
- return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- // TODO(rmlarsen): Extend CustomOp API to return its cost estimate.
- return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }
-
- protected:
- EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
- TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions);
- m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
- }
-
- Dimensions m_dimensions;
- const XprType m_op;
- const Device& m_device;
- CoeffReturnType* m_result;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
deleted file mode 100644
index 29e50a3..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
-#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
-
-namespace Eigen {
-
-/** \class TensorDevice
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Pseudo expression providing an operator = that will evaluate its argument
- * on the specified computing 'device' (GPU, thread pool, ...)
- *
- * Example:
- * C.device(EIGEN_GPU) = A + B;
- *
- * Todo: operator *= and /=.
- */
-
-template <typename ExpressionType, typename DeviceType> class TensorDevice {
- public:
- TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {}
-
- template<typename OtherDerived>
- EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
- typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
- Assign assign(m_expression, other);
- internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) {
- typedef typename OtherDerived::Scalar Scalar;
- typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum;
- Sum sum(m_expression, other);
- typedef TensorAssignOp<ExpressionType, const Sum> Assign;
- Assign assign(m_expression, sum);
- internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
- typedef typename OtherDerived::Scalar Scalar;
- typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
- Difference difference(m_expression, other);
- typedef TensorAssignOp<ExpressionType, const Difference> Assign;
- Assign assign(m_expression, difference);
- internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
- return *this;
- }
-
- protected:
- const DeviceType& m_device;
- ExpressionType& m_expression;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
deleted file mode 100644
index 4f5767b..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ /dev/null
@@ -1,337 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H)
-#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
-
-namespace Eigen {
-
-static const int kCudaScratchSize = 1024;
-
-// This defines an interface that GPUDevice can take to use
-// CUDA streams underneath.
-class StreamInterface {
- public:
- virtual ~StreamInterface() {}
-
- virtual const cudaStream_t& stream() const = 0;
- virtual const cudaDeviceProp& deviceProperties() const = 0;
-
- // Allocate memory on the actual device where the computation will run
- virtual void* allocate(size_t num_bytes) const = 0;
- virtual void deallocate(void* buffer) const = 0;
-
- // Return a scratchpad buffer of size 1k
- virtual void* scratchpad() const = 0;
-
- // Return a semaphore. The semaphore is initially initialized to 0, and
- // each kernel using it is responsible for resetting to 0 upon completion
- // to maintain the invariant that the semaphore is always equal to 0 upon
- // each kernel start.
- virtual unsigned int* semaphore() const = 0;
-};
-
-static cudaDeviceProp* m_deviceProperties;
-static bool m_devicePropInitialized = false;
-
-static void initializeDeviceProp() {
- if (!m_devicePropInitialized) {
- // Attempts to ensure proper behavior in the case of multiple threads
- // calling this function simultaneously. This would be trivial to
- // implement if we could use std::mutex, but unfortunately mutex don't
- // compile with nvcc, so we resort to atomics and thread fences instead.
- // Note that if the caller uses a compiler that doesn't support c++11 we
- // can't ensure that the initialization is thread safe.
-#if __cplusplus >= 201103L
- static std::atomic<bool> first(true);
- if (first.exchange(false)) {
-#else
- static bool first = true;
- if (first) {
- first = false;
-#endif
- // We're the first thread to reach this point.
- int num_devices;
- cudaError_t status = cudaGetDeviceCount(&num_devices);
- if (status != cudaSuccess) {
- std::cerr << "Failed to get the number of CUDA devices: "
- << cudaGetErrorString(status)
- << std::endl;
- assert(status == cudaSuccess);
- }
- m_deviceProperties = new cudaDeviceProp[num_devices];
- for (int i = 0; i < num_devices; ++i) {
- status = cudaGetDeviceProperties(&m_deviceProperties[i], i);
- if (status != cudaSuccess) {
- std::cerr << "Failed to initialize CUDA device #"
- << i
- << ": "
- << cudaGetErrorString(status)
- << std::endl;
- assert(status == cudaSuccess);
- }
- }
-
-#if __cplusplus >= 201103L
- std::atomic_thread_fence(std::memory_order_release);
-#endif
- m_devicePropInitialized = true;
- } else {
- // Wait for the other thread to inititialize the properties.
- while (!m_devicePropInitialized) {
-#if __cplusplus >= 201103L
- std::atomic_thread_fence(std::memory_order_acquire);
-#endif
- sleep(1);
- }
- }
- }
-}
-
-static const cudaStream_t default_stream = cudaStreamDefault;
-
-class CudaStreamDevice : public StreamInterface {
- public:
- // Use the default stream on the current device
- CudaStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) {
- cudaGetDevice(&device_);
- initializeDeviceProp();
- }
- // Use the default stream on the specified device
- CudaStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) {
- initializeDeviceProp();
- }
- // Use the specified stream. Note that it's the
- // caller responsibility to ensure that the stream can run on
- // the specified device. If no device is specified the code
- // assumes that the stream is associated to the current gpu device.
- CudaStreamDevice(const cudaStream_t* stream, int device = -1)
- : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) {
- if (device < 0) {
- cudaGetDevice(&device_);
- } else {
- int num_devices;
- cudaError_t err = cudaGetDeviceCount(&num_devices);
- EIGEN_UNUSED_VARIABLE(err)
- assert(err == cudaSuccess);
- assert(device < num_devices);
- device_ = device;
- }
- initializeDeviceProp();
- }
-
- virtual ~CudaStreamDevice() {
- if (scratch_) {
- deallocate(scratch_);
- }
- }
-
- const cudaStream_t& stream() const { return *stream_; }
- const cudaDeviceProp& deviceProperties() const {
- return m_deviceProperties[device_];
- }
- virtual void* allocate(size_t num_bytes) const {
- cudaError_t err = cudaSetDevice(device_);
- EIGEN_UNUSED_VARIABLE(err)
- assert(err == cudaSuccess);
- void* result;
- err = cudaMalloc(&result, num_bytes);
- assert(err == cudaSuccess);
- assert(result != NULL);
- return result;
- }
- virtual void deallocate(void* buffer) const {
- cudaError_t err = cudaSetDevice(device_);
- EIGEN_UNUSED_VARIABLE(err)
- assert(err == cudaSuccess);
- assert(buffer != NULL);
- err = cudaFree(buffer);
- assert(err == cudaSuccess);
- }
-
- virtual void* scratchpad() const {
- if (scratch_ == NULL) {
- scratch_ = allocate(kCudaScratchSize + sizeof(unsigned int));
- }
- return scratch_;
- }
-
- virtual unsigned int* semaphore() const {
- if (semaphore_ == NULL) {
- char* scratch = static_cast<char*>(scratchpad()) + kCudaScratchSize;
- semaphore_ = reinterpret_cast<unsigned int*>(scratch);
- cudaError_t err = cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_);
- EIGEN_UNUSED_VARIABLE(err)
- assert(err == cudaSuccess);
- }
- return semaphore_;
- }
-
- private:
- const cudaStream_t* stream_;
- int device_;
- mutable void* scratch_;
- mutable unsigned int* semaphore_;
-};
-
-struct GpuDevice {
- // The StreamInterface is not owned: the caller is
- // responsible for its initialization and eventual destruction.
- explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) {
- eigen_assert(stream);
- }
- explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
- eigen_assert(stream);
- }
- // TODO(bsteiner): This is an internal API, we should not expose it.
- EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
- return stream_->stream();
- }
-
- EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
- return stream_->allocate(num_bytes);
- }
-
- EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
- stream_->deallocate(buffer);
- }
-
- EIGEN_STRONG_INLINE void* scratchpad() const {
- return stream_->scratchpad();
- }
-
- EIGEN_STRONG_INLINE unsigned int* semaphore() const {
- return stream_->semaphore();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
-#ifndef __CUDA_ARCH__
- cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice,
- stream_->stream());
- EIGEN_UNUSED_VARIABLE(err)
- assert(err == cudaSuccess);
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
- }
-
- EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
- cudaError_t err =
- cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream());
- EIGEN_UNUSED_VARIABLE(err)
- assert(err == cudaSuccess);
- }
-
- EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
- cudaError_t err =
- cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream());
- EIGEN_UNUSED_VARIABLE(err)
- assert(err == cudaSuccess);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
-#ifndef __CUDA_ARCH__
- cudaError_t err = cudaMemsetAsync(buffer, c, n, stream_->stream());
- EIGEN_UNUSED_VARIABLE(err)
- assert(err == cudaSuccess);
-#else
- eigen_assert(false && "The default device should be used instead to generate kernel code");
-#endif
- }
-
- EIGEN_STRONG_INLINE size_t numThreads() const {
- // FIXME
- return 32;
- }
-
- EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
- // FIXME
- return 48*1024;
- }
-
- EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
- // We won't try to take advantage of the l2 cache for the time being, and
- // there is no l3 cache on cuda devices.
- return firstLevelCacheSize();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
-#if defined(__CUDACC__) && !defined(__CUDA_ARCH__)
- cudaError_t err = cudaStreamSynchronize(stream_->stream());
- if (err != cudaSuccess) {
- std::cerr << "Error detected in CUDA stream: "
- << cudaGetErrorString(err)
- << std::endl;
- assert(err == cudaSuccess);
- }
-#else
- assert(false && "The default device should be used instead to generate kernel code");
-#endif
- }
-
- EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
- return stream_->deviceProperties().multiProcessorCount;
- }
- EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
- return stream_->deviceProperties().maxThreadsPerBlock;
- }
- EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
- return stream_->deviceProperties().maxThreadsPerMultiProcessor;
- }
- EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
- return stream_->deviceProperties().sharedMemPerBlock;
- }
- EIGEN_STRONG_INLINE int majorDeviceVersion() const {
- return stream_->deviceProperties().major;
- }
- EIGEN_STRONG_INLINE int minorDeviceVersion() const {
- return stream_->deviceProperties().minor;
- }
-
- EIGEN_STRONG_INLINE int maxBlocks() const {
- return max_blocks_;
- }
-
- // This function checks if the CUDA runtime recorded an error for the
- // underlying stream device.
- inline bool ok() const {
-#ifdef __CUDACC__
- cudaError_t error = cudaStreamQuery(stream_->stream());
- return (error == cudaSuccess) || (error == cudaErrorNotReady);
-#else
- return false;
-#endif
- }
-
- private:
- const StreamInterface* stream_;
- int max_blocks_;
-};
-
-#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \
- (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
- assert(cudaGetLastError() == cudaSuccess);
-
-
-// FIXME: Should be device and kernel specific.
-#ifdef __CUDACC__
-static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
-#ifndef __CUDA_ARCH__
- cudaError_t status = cudaDeviceSetSharedMemConfig(config);
- EIGEN_UNUSED_VARIABLE(status)
- assert(status == cudaSuccess);
-#else
- EIGEN_UNUSED_VARIABLE(config)
-#endif
-}
-#endif
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
deleted file mode 100644
index 9d14139..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ /dev/null
@@ -1,81 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H
-#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H
-
-
-namespace Eigen {
-
-// Default device for the machine (typically a single cpu core)
-struct DefaultDevice {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
- return internal::aligned_malloc(num_bytes);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
- internal::aligned_free(buffer);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
- ::memcpy(dst, src, n);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
- memcpy(dst, src, n);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
- memcpy(dst, src, n);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
- ::memset(buffer, c, n);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
-#ifndef __CUDA_ARCH__
- // Running on the host CPU
- return 1;
-#else
- // Running on a CUDA device
- return 32;
-#endif
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
-#ifndef __CUDA_ARCH__
- // Running on the host CPU
- return l1CacheSize();
-#else
- // Running on a CUDA device, return the amount of shared memory available.
- return 48*1024;
-#endif
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
-#ifndef __CUDA_ARCH__
- // Running single threaded on the host CPU
- return l3CacheSize();
-#else
- // Running on a CUDA device
- return firstLevelCacheSize();
-#endif
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
-#ifndef __CUDA_ARCH__
- // Running single threaded on the host CPU
- // Should return an enum that encodes the ISA supported by the CPU
- return 1;
-#else
- // Running on a CUDA device
- return __CUDA_ARCH__ / 100;
-#endif
- }
-};
-
-} // namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
deleted file mode 100644
index 7c03989..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
-
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H)
-#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
-
-namespace Eigen {
-struct SyclDevice {
- /// class members
- /// sycl queue
- mutable cl::sycl::queue m_queue;
- /// std::map is the container used to make sure that we create only one buffer
- /// per pointer. The lifespan of the buffer now depends on the lifespan of SyclDevice.
- /// If a non-read-only pointer is needed to be accessed on the host we should manually deallocate it.
- mutable std::map<const void *, std::shared_ptr<void>> buffer_map;
- /// creating device by using selector
- template<typename dev_Selector> SyclDevice(dev_Selector s)
- :
-#ifdef EIGEN_EXCEPTIONS
- m_queue(cl::sycl::queue(s, [=](cl::sycl::exception_list l) {
- for (const auto& e : l) {
- try {
- std::rethrow_exception(e);
- } catch (cl::sycl::exception e) {
- std::cout << e.what() << std::endl;
- }
- }
- }))
-#else
- m_queue(cl::sycl::queue(s))
-#endif
- {}
- // destructor
- ~SyclDevice() { deallocate_all(); }
-
- template <typename T> void deallocate(T *p) const {
- auto it = buffer_map.find(p);
- if (it != buffer_map.end()) {
- buffer_map.erase(it);
- internal::aligned_free(p);
- }
- }
- void deallocate_all() const {
- std::map<const void *, std::shared_ptr<void>>::iterator it=buffer_map.begin();
- while (it!=buffer_map.end()) {
- auto p=it->first;
- buffer_map.erase(it);
- internal::aligned_free(const_cast<void*>(p));
- it=buffer_map.begin();
- }
- buffer_map.clear();
- }
-
- /// creation of sycl accessor for a buffer. This function first tries to find
- /// the buffer in the buffer_map. If found it gets the accessor from it, if not,
- ///the function then adds an entry by creating a sycl buffer for that particular pointer.
- template <cl::sycl::access::mode AcMd, typename T> inline cl::sycl::accessor<T, 1, AcMd, cl::sycl::access::target::global_buffer>
- get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh, const T * ptr) const {
- return (get_sycl_buffer<T>(num_bytes, ptr)->template get_access<AcMd, cl::sycl::access::target::global_buffer>(cgh));
- }
-
- template<typename T> inline std::pair<std::map<const void *, std::shared_ptr<void>>::iterator,bool> add_sycl_buffer(const T *ptr, size_t num_bytes) const {
- using Type = cl::sycl::buffer<T, 1>;
- std::pair<std::map<const void *, std::shared_ptr<void>>::iterator,bool> ret = buffer_map.insert(std::pair<const void *, std::shared_ptr<void>>(ptr, std::shared_ptr<void>(new Type(cl::sycl::range<1>(num_bytes)),
- [](void *dataMem) { delete static_cast<Type*>(dataMem); })));
- (static_cast<Type*>(buffer_map.at(ptr).get()))->set_final_data(nullptr);
- return ret;
- }
-
- template <typename T> inline cl::sycl::buffer<T, 1>* get_sycl_buffer(size_t num_bytes,const T * ptr) const {
- return static_cast<cl::sycl::buffer<T, 1>*>(add_sycl_buffer(ptr, num_bytes).first->second.get());
- }
-
- /// allocating memory on the cpu
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t) const {
- return internal::aligned_malloc(8);
- }
-
- // some runtime conditions that can be applied here
- bool isDeviceSuitable() const { return true; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, size_t n) const {
- ::memcpy(dst, src, n);
- }
-
- template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const {
- auto host_acc= (static_cast<cl::sycl::buffer<T, 1>*>(add_sycl_buffer(dst, n).first->second.get()))-> template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::host_buffer>();
- memcpy(host_acc.get_pointer(), src, n);
- }
- /// whith the current implementation of sycl, the data is copied twice from device to host. This will be fixed soon.
- template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(T *dst, const T *src, size_t n) const {
- auto it = buffer_map.find(src);
- if (it != buffer_map.end()) {
- auto host_acc= (static_cast<cl::sycl::buffer<T, 1>*>(it->second.get()))-> template get_access<cl::sycl::access::mode::read, cl::sycl::access::target::host_buffer>();
- memcpy(dst,host_acc.get_pointer(), n);
- } else{
- eigen_assert("no device memory found. The memory might be destroyed before creation");
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c, size_t n) const {
- ::memset(buffer, c, n);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
- return 1;
- }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
deleted file mode 100644
index 17f0466..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ /dev/null
@@ -1,282 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H)
-#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H
-
-namespace Eigen {
-
-// Use the SimpleThreadPool by default. We'll switch to the new non blocking
-// thread pool later.
-#ifndef EIGEN_USE_SIMPLE_THREAD_POOL
-template <typename Env> using ThreadPoolTempl = NonBlockingThreadPoolTempl<Env>;
-typedef NonBlockingThreadPool ThreadPool;
-#else
-template <typename Env> using ThreadPoolTempl = SimpleThreadPoolTempl<Env>;
-typedef SimpleThreadPool ThreadPool;
-#endif
-
-
-// Barrier is an object that allows one or more threads to wait until
-// Notify has been called a specified number of times.
-class Barrier {
- public:
- Barrier(unsigned int count) : state_(count << 1), notified_(false) {
- eigen_assert(((count << 1) >> 1) == count);
- }
- ~Barrier() {
- eigen_assert((state_>>1) == 0);
- }
-
- void Notify() {
- unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
- if (v != 1) {
- eigen_assert(((v + 2) & ~1) != 0);
- return; // either count has not dropped to 0, or waiter is not waiting
- }
- std::unique_lock<std::mutex> l(mu_);
- eigen_assert(!notified_);
- notified_ = true;
- cv_.notify_all();
- }
-
- void Wait() {
- unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel);
- if ((v >> 1) == 0) return;
- std::unique_lock<std::mutex> l(mu_);
- while (!notified_) {
- cv_.wait(l);
- }
- }
-
- private:
- std::mutex mu_;
- std::condition_variable cv_;
- std::atomic<unsigned int> state_; // low bit is waiter flag
- bool notified_;
-};
-
-
-// Notification is an object that allows a user to to wait for another
-// thread to signal a notification that an event has occurred.
-//
-// Multiple threads can wait on the same Notification object,
-// but only one caller must call Notify() on the object.
-struct Notification : Barrier {
- Notification() : Barrier(1) {};
-};
-
-
-// Runs an arbitrary function and then calls Notify() on the passed in
-// Notification.
-template <typename Function, typename... Args> struct FunctionWrapperWithNotification
-{
- static void run(Notification* n, Function f, Args... args) {
- f(args...);
- if (n) {
- n->Notify();
- }
- }
-};
-
-template <typename Function, typename... Args> struct FunctionWrapperWithBarrier
-{
- static void run(Barrier* b, Function f, Args... args) {
- f(args...);
- if (b) {
- b->Notify();
- }
- }
-};
-
-template <typename SyncType>
-static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
- if (n) {
- n->Wait();
- }
-}
-
-
-// Build a thread pool device on top the an existing pool of threads.
-struct ThreadPoolDevice {
- // The ownership of the thread pool remains with the caller.
- ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { }
-
- EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
- return internal::aligned_malloc(num_bytes);
- }
-
- EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
- internal::aligned_free(buffer);
- }
-
- EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
- ::memcpy(dst, src, n);
- }
- EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
- memcpy(dst, src, n);
- }
- EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
- memcpy(dst, src, n);
- }
-
- EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
- ::memset(buffer, c, n);
- }
-
- EIGEN_STRONG_INLINE int numThreads() const {
- return num_threads_;
- }
-
- EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
- return l1CacheSize();
- }
-
- EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
- // The l3 cache size is shared between all the cores.
- return l3CacheSize() / num_threads_;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
- // Should return an enum that encodes the ISA supported by the CPU
- return 1;
- }
-
- template <class Function, class... Args>
- EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const {
- Notification* n = new Notification();
- pool_->Schedule(std::bind(&FunctionWrapperWithNotification<Function, Args...>::run, n, f, args...));
- return n;
- }
-
- template <class Function, class... Args>
- EIGEN_STRONG_INLINE void enqueue_with_barrier(Barrier* b,
- Function&& f,
- Args&&... args) const {
- pool_->Schedule(std::bind(
- &FunctionWrapperWithBarrier<Function, Args...>::run, b, f, args...));
- }
-
- template <class Function, class... Args>
- EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const {
- pool_->Schedule(std::bind(f, args...));
- }
-
- // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if
- // called from one of the threads in pool_. Returns -1 otherwise.
- EIGEN_STRONG_INLINE int currentThreadId() const {
- return pool_->CurrentThreadId();
- }
-
- // parallelFor executes f with [0, n) arguments in parallel and waits for
- // completion. F accepts a half-open interval [first, last).
- // Block size is choosen based on the iteration cost and resulting parallel
- // efficiency. If block_align is not nullptr, it is called to round up the
- // block size.
- void parallelFor(Index n, const TensorOpCost& cost,
- std::function<Index(Index)> block_align,
- std::function<void(Index, Index)> f) const {
- typedef TensorCostModel<ThreadPoolDevice> CostModel;
- if (n <= 1 || numThreads() == 1 ||
- CostModel::numThreads(n, cost, static_cast<int>(numThreads())) == 1) {
- f(0, n);
- return;
- }
-
- // Calculate block size based on (1) the iteration cost and (2) parallel
- // efficiency. We want blocks to be not too small to mitigate
- // parallelization overheads; not too large to mitigate tail
- // effect and potential load imbalance and we also want number
- // of blocks to be evenly dividable across threads.
-
- double block_size_f = 1.0 / CostModel::taskSize(1, cost);
- const Index max_oversharding_factor = 4;
- Index block_size = numext::mini(
- n, numext::maxi<Index>(divup<Index>(n, max_oversharding_factor * numThreads()),
- block_size_f));
- const Index max_block_size = numext::mini(n, 2 * block_size);
- if (block_align) {
- Index new_block_size = block_align(block_size);
- eigen_assert(new_block_size >= block_size);
- block_size = numext::mini(n, new_block_size);
- }
- Index block_count = divup(n, block_size);
- // Calculate parallel efficiency as fraction of total CPU time used for
- // computations:
- double max_efficiency =
- static_cast<double>(block_count) /
- (divup<int>(block_count, numThreads()) * numThreads());
- // Now try to increase block size up to max_block_size as long as it
- // doesn't decrease parallel efficiency.
- for (Index prev_block_count = block_count;
- max_efficiency < 1.0 && prev_block_count > 1;) {
- // This is the next block size that divides size into a smaller number
- // of blocks than the current block_size.
- Index coarser_block_size = divup(n, prev_block_count - 1);
- if (block_align) {
- Index new_block_size = block_align(coarser_block_size);
- eigen_assert(new_block_size >= coarser_block_size);
- coarser_block_size = numext::mini(n, new_block_size);
- }
- if (coarser_block_size > max_block_size) {
- break; // Reached max block size. Stop.
- }
- // Recalculate parallel efficiency.
- const Index coarser_block_count = divup(n, coarser_block_size);
- eigen_assert(coarser_block_count < prev_block_count);
- prev_block_count = coarser_block_count;
- const double coarser_efficiency =
- static_cast<double>(coarser_block_count) /
- (divup<int>(coarser_block_count, numThreads()) * numThreads());
- if (coarser_efficiency + 0.01 >= max_efficiency) {
- // Taking it.
- block_size = coarser_block_size;
- block_count = coarser_block_count;
- if (max_efficiency < coarser_efficiency) {
- max_efficiency = coarser_efficiency;
- }
- }
- }
-
- // Recursively divide size into halves until we reach block_size.
- // Division code rounds mid to block_size, so we are guaranteed to get
- // block_count leaves that do actual computations.
- Barrier barrier(static_cast<unsigned int>(block_count));
- std::function<void(Index, Index)> handleRange;
- handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) {
- if (last - first <= block_size) {
- // Single block or less, execute directly.
- f(first, last);
- barrier.Notify();
- return;
- }
- // Split into halves and submit to the pool.
- Index mid = first + divup((last - first) / 2, block_size) * block_size;
- pool_->Schedule([=, &handleRange]() { handleRange(mid, last); });
- pool_->Schedule([=, &handleRange]() { handleRange(first, mid); });
- };
- handleRange(0, n);
- barrier.Wait();
- }
-
- // Convenience wrapper for parallelFor that does not align blocks.
- void parallelFor(Index n, const TensorOpCost& cost,
- std::function<void(Index, Index)> f) const {
- parallelFor(n, cost, nullptr, std::move(f));
- }
-
- private:
- ThreadPoolInterface* pool_;
- int num_threads_;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
deleted file mode 100644
index 1a30e45..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
+++ /dev/null
@@ -1,236 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
-#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
-
-namespace Eigen {
-
-/** \internal
- *
- * \class TensorDimensionList
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n.
- *
- * \sa Tensor
- */
-
-template <typename Index, std::size_t Rank> struct DimensionList {
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- const Index operator[] (const Index i) const { return i; }
-};
-
-namespace internal {
-
-template<typename Index, std::size_t Rank> struct array_size<DimensionList<Index, Rank> > {
- static const size_t value = Rank;
-};
-template<typename Index, std::size_t Rank> struct array_size<const DimensionList<Index, Rank> > {
- static const size_t value = Rank;
-};
-
-template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(DimensionList<Index, Rank>&) {
- return n;
-}
-template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(const DimensionList<Index, Rank>&) {
- return n;
-}
-
-
-#if EIGEN_HAS_CONSTEXPR
-template <typename Index, std::size_t Rank>
-struct index_known_statically_impl<DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) {
- return true;
- }
-};
-template <typename Index, std::size_t Rank>
-struct index_known_statically_impl<const DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) {
- return true;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct all_indices_known_statically_impl<DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run() {
- return true;
- }
-};
-template <typename Index, std::size_t Rank>
-struct all_indices_known_statically_impl<const DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run() {
- return true;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct indices_statically_known_to_increase_impl<DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run() {
- return true;
- }
-};
-template <typename Index, std::size_t Rank>
-struct indices_statically_known_to_increase_impl<const DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run() {
- return true;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_eq_impl<DimensionList<Index, Rank> > {
- static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return i == value;
- }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_eq_impl<const DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return i == value;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_ne_impl<DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return i != value;
- }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_ne_impl<const DimensionList<Index, Rank> > {
- static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return i != value;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_gt_impl<DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return i > value;
- }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_gt_impl<const DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return i > value;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_lt_impl<DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return i < value;
- }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_lt_impl<const DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return i < value;
- }
-};
-
-#else
-template <typename Index, std::size_t Rank>
-struct index_known_statically_impl<DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) {
- return true;
- }
-};
-template <typename Index, std::size_t Rank>
-struct index_known_statically_impl<const DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) {
- return true;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct all_indices_known_statically_impl<DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() {
- return true;
- }
-};
-template <typename Index, std::size_t Rank>
-struct all_indices_known_statically_impl<const DimensionList<Index, Rank> > {
- EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() {
- return true;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct indices_statically_known_to_increase_impl<DimensionList<Index, Rank> > {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
- return true;
- }
-};
-template <typename Index, std::size_t Rank>
-struct indices_statically_known_to_increase_impl<const DimensionList<Index, Rank> > {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
- return true;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_eq_impl<DimensionList<Index, Rank> > {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
- return false;
- }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_eq_impl<const DimensionList<Index, Rank> > {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
- return false;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_ne_impl<DimensionList<Index, Rank> > {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){
- return false;
- }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_ne_impl<const DimensionList<Index, Rank> > {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
- return false;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_gt_impl<DimensionList<Index, Rank> > {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
- return false;
- }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_gt_impl<const DimensionList<Index, Rank> > {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
- return false;
- }
-};
-
-template <typename Index, std::size_t Rank>
-struct index_statically_lt_impl<DimensionList<Index, Rank> > {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
- return false;
- }
-};
-template <typename Index, std::size_t Rank>
-struct index_statically_lt_impl<const DimensionList<Index, Rank> > {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) {
- return false;
- }
-};
-#endif
-
-} // end namespace internal
-} // end namespace Eigen
-
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
deleted file mode 100644
index 451940d..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h
+++ /dev/null
@@ -1,428 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H
-
-
-namespace Eigen {
-
-/** \internal
- *
- * \class TensorDimensions
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Set of classes used to encode and store the dimensions of a Tensor.
- *
- * The Sizes class encodes as part of the type the number of dimensions and the
- * sizes corresponding to each dimension. It uses no storage space since it is
- * entirely known at compile time.
- * The DSizes class is its dynamic sibling: the number of dimensions is known
- * at compile time but the sizes are set during execution.
- *
- * \sa Tensor
- */
-
-// Boilerplate code
-namespace internal {
-
-template<std::size_t n, typename Dimension> struct dget {
- static const std::size_t value = get<n, Dimension>::value;
-};
-
-
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
-struct fixed_size_tensor_index_linearization_helper
-{
- template <typename Dimensions> EIGEN_DEVICE_FUNC
- static inline Index run(array<Index, NumIndices> const& indices,
- const Dimensions& dimensions)
- {
- return array_get<RowMajor ? n - 1 : (NumIndices - n)>(indices) +
- dget<RowMajor ? n - 1 : (NumIndices - n), Dimensions>::value *
- fixed_size_tensor_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions);
- }
-};
-
-template<typename Index, std::size_t NumIndices, bool RowMajor>
-struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
-{
- template <typename Dimensions> EIGEN_DEVICE_FUNC
- static inline Index run(array<Index, NumIndices> const&, const Dimensions&)
- {
- return 0;
- }
-};
-
-template<typename Index, std::size_t n>
-struct fixed_size_tensor_index_extraction_helper
-{
- template <typename Dimensions> EIGEN_DEVICE_FUNC
- static inline Index run(const Index index,
- const Dimensions& dimensions)
- {
- const Index mult = (index == n-1) ? 1 : 0;
- return array_get<n-1>(dimensions) * mult +
- fixed_size_tensor_index_extraction_helper<Index, n - 1>::run(index, dimensions);
- }
-};
-
-template<typename Index>
-struct fixed_size_tensor_index_extraction_helper<Index, 0>
-{
- template <typename Dimensions> EIGEN_DEVICE_FUNC
- static inline Index run(const Index,
- const Dimensions&)
- {
- return 0;
- }
- };
-
-} // end namespace internal
-
-
-// Fixed size
-#ifndef EIGEN_EMULATE_CXX11_META_H
-template <typename std::ptrdiff_t... Indices>
-struct Sizes : internal::numeric_list<std::ptrdiff_t, Indices...> {
- typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base;
- static const std::ptrdiff_t total_size = internal::arg_prod(Indices...);
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const {
- return Base::count;
- }
-
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t TotalSize() {
- return internal::arg_prod(Indices...);
- }
-
- EIGEN_DEVICE_FUNC Sizes() { }
- template <typename DenseIndex>
- explicit EIGEN_DEVICE_FUNC Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
- // todo: add assertion
- }
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { }
- explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
- // todo: add assertion
- }
-#endif
-
- template <typename T> Sizes& operator = (const T& /*other*/) {
- // add assertion failure if the size of other is different
- return *this;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const {
- return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count>::run(index, *this);
- }
-
- template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
- return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *static_cast<const Base*>(this));
- }
- template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
- return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *static_cast<const Base*>(this));
- }
-};
-
-namespace internal {
-template <typename std::ptrdiff_t... Indices>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indices...>&) {
- return Sizes<Indices...>::total_size;
-}
-}
-
-#else
-
-template <std::size_t n>
-struct non_zero_size {
- typedef internal::type2val<std::size_t, n> type;
-};
-template <>
-struct non_zero_size<0> {
- typedef internal::null_type type;
-};
-
-template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0, std::size_t V5=0> struct Sizes {
- typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base;
- static const size_t count = Base::count;
- static const std::size_t total_size = internal::arg_prod<Base>::value;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const {
- return count;
- }
-
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() {
- return internal::arg_prod<Base>::value;
- }
-
- Sizes() { }
- template <typename DenseIndex>
- explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
- // todo: add assertion
- }
- template <typename T> Sizes& operator = (const T& /*other*/) {
- // add assertion failure if the size of other is different
- return *this;
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
- explicit Sizes(std::initializer_list<std::size_t>) {
- // todo: add assertion
- }
-#else
- EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) {
- }
- EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) {
- }
- EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) {
- }
- EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
- }
- EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) {
- }
-#endif
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index operator[] (const Index index) const {
- switch (index) {
- case 0:
- return internal::get<0, Base>::value;
- case 1:
- return internal::get<1, Base>::value;
- case 2:
- return internal::get<2, Base>::value;
- case 3:
- return internal::get<3, Base>::value;
- case 4:
- return internal::get<4, Base>::value;
- default:
- eigen_assert(false && "index overflow");
- return static_cast<Index>(-1);
- }
- }
-
- template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
- return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *reinterpret_cast<const Base*>(this));
- }
- template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
- return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *reinterpret_cast<const Base*>(this));
- }
-};
-
-namespace internal {
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
- return Sizes<V1, V2, V3, V4, V5>::total_size;
-}
-}
-
-#endif
-
-// Boilerplate
-namespace internal {
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
-struct tensor_index_linearization_helper
-{
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Index run(array<Index, NumIndices> const& indices, array<Index, NumIndices> const& dimensions)
- {
- return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) +
- array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) *
- tensor_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions);
- }
-};
-
-template<typename Index, std::size_t NumIndices, bool RowMajor>
-struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
-{
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Index run(array<Index, NumIndices> const& indices, array<Index, NumIndices> const&)
- {
- return array_get<RowMajor ? 0 : NumIndices - 1>(indices);
- }
-};
-} // end namespace internal
-
-
-
-// Dynamic size
-template <typename DenseIndex, int NumDims>
-struct DSizes : array<DenseIndex, NumDims> {
- typedef array<DenseIndex, NumDims> Base;
- static const int count = NumDims;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const {
- return NumDims;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const {
- return (NumDims == 0) ? 1 : internal::array_prod(*static_cast<const Base*>(this));
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() {
- for (int i = 0 ; i < NumDims; ++i) {
- (*this)[i] = 0;
- }
- }
- EIGEN_DEVICE_FUNC explicit DSizes(const array<DenseIndex, NumDims>& a) : Base(a) { }
-
- EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0) {
- eigen_assert(NumDims == 1);
- (*this)[0] = i0;
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension, IndexTypes... otherDimensions) : Base({{firstDimension, secondDimension, otherDimensions...}}) {
- EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
-#else
- EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1) {
- eigen_assert(NumDims == 2);
- (*this)[0] = i0;
- (*this)[1] = i1;
- }
- EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) {
- eigen_assert(NumDims == 3);
- (*this)[0] = i0;
- (*this)[1] = i1;
- (*this)[2] = i2;
- }
- EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) {
- eigen_assert(NumDims == 4);
- (*this)[0] = i0;
- (*this)[1] = i1;
- (*this)[2] = i2;
- (*this)[3] = i3;
- }
- EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) {
- eigen_assert(NumDims == 5);
- (*this)[0] = i0;
- (*this)[1] = i1;
- (*this)[2] = i2;
- (*this)[3] = i3;
- (*this)[4] = i4;
- }
-#endif
-
- EIGEN_DEVICE_FUNC DSizes& operator = (const array<DenseIndex, NumDims>& other) {
- *static_cast<Base*>(this) = other;
- return *this;
- }
-
- // A constexpr would be so much better here
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfColMajor(const array<DenseIndex, NumDims>& indices) const {
- return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, false>::run(indices, *static_cast<const Base*>(this));
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const {
- return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this));
- }
-};
-
-
-
-
-// Boilerplate
-namespace internal {
-template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
-struct tensor_vsize_index_linearization_helper
-{
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const& dimensions)
- {
- return array_get<RowMajor ? n : (NumIndices - n - 1)>(indices) +
- array_get<RowMajor ? n : (NumIndices - n - 1)>(dimensions) *
- tensor_vsize_index_linearization_helper<Index, NumIndices, n - 1, RowMajor>::run(indices, dimensions);
- }
-};
-
-template<typename Index, std::size_t NumIndices, bool RowMajor>
-struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor>
-{
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Index run(array<Index, NumIndices> const& indices, std::vector<DenseIndex> const&)
- {
- return array_get<RowMajor ? 0 : NumIndices - 1>(indices);
- }
-};
-} // end namespace internal
-
-
-namespace internal {
-
-template <typename DenseIndex, int NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > {
- static const size_t value = NumDims;
-};
-template <typename DenseIndex, int NumDims> struct array_size<DSizes<DenseIndex, NumDims> > {
- static const size_t value = NumDims;
-};
-#ifndef EIGEN_EMULATE_CXX11_META_H
-template <typename std::ptrdiff_t... Indices> struct array_size<const Sizes<Indices...> > {
-static const std::ptrdiff_t value = Sizes<Indices...>::count;
-};
-template <typename std::ptrdiff_t... Indices> struct array_size<Sizes<Indices...> > {
-static const std::ptrdiff_t value = Sizes<Indices...>::count;
-};
-template <std::ptrdiff_t n, typename std::ptrdiff_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) {
- return get<n, internal::numeric_list<std::size_t, Indices...> >::value;
-}
-template <std::ptrdiff_t n> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) {
- eigen_assert(false && "should never be called");
- return -1;
-}
-#else
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > {
- static const size_t value = Sizes<V1,V2,V3,V4,V5>::count;
-};
-template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > {
- static const size_t value = Sizes<V1,V2,V3,V4,V5>::count;
-};
-template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>&) {
- return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value;
-}
-
-#endif
-
-
-template <typename Dims1, typename Dims2, size_t n, size_t m>
-struct sizes_match_below_dim {
- static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) {
- return false;
- }
-};
-template <typename Dims1, typename Dims2, size_t n>
-struct sizes_match_below_dim<Dims1, Dims2, n, n> {
- static EIGEN_DEVICE_FUNC inline bool run(Dims1& dims1, Dims2& dims2) {
- return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &
- sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2);
- }
-};
-template <typename Dims1, typename Dims2>
-struct sizes_match_below_dim<Dims1, Dims2, 0, 0> {
- static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) {
- return true;
- }
-};
-
-} // end namespace internal
-
-
-template <typename Dims1, typename Dims2>
-EIGEN_DEVICE_FUNC bool dimensions_match(Dims1& dims1, Dims2& dims2) {
- return internal::sizes_match_below_dim<Dims1, Dims2, internal::array_size<Dims1>::value, internal::array_size<Dims2>::value>::run(dims1, dims2);
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
deleted file mode 100644
index 0698713..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ /dev/null
@@ -1,181 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
-#define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
-
-namespace Eigen {
-
-/** \class TensorForcedEval
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor reshaping class.
- *
- *
- */
-namespace internal {
-template<typename XprType, template <class> class MakePointer_>
-struct traits<TensorEvalToOp<XprType, MakePointer_> >
-{
- // Type promotion to handle the case where the types of the lhs and the rhs are different.
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-
- enum {
- Flags = 0
- };
- template <class T>
- struct MakePointer {
- // Intermediate typedef to workaround MSVC issue.
- typedef MakePointer_<T> MakePointerT;
- typedef typename MakePointerT::Type Type;
- };
-};
-
-template<typename XprType, template <class> class MakePointer_>
-struct eval<TensorEvalToOp<XprType, MakePointer_>, Eigen::Dense>
-{
- typedef const TensorEvalToOp<XprType, MakePointer_>& type;
-};
-
-template<typename XprType, template <class> class MakePointer_>
-struct nested<TensorEvalToOp<XprType, MakePointer_>, 1, typename eval<TensorEvalToOp<XprType, MakePointer_> >::type>
-{
- typedef TensorEvalToOp<XprType, MakePointer_> type;
-};
-
-} // end namespace internal
-
-
-
-
-template<typename XprType, template <class> class MakePointer_>
-class TensorEvalToOp : public TensorBase<TensorEvalToOp<XprType, MakePointer_>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorEvalToOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
- typedef typename MakePointer_<CoeffReturnType>::Type PointerType;
- typedef typename Eigen::internal::nested<TensorEvalToOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorEvalToOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorEvalToOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr)
- : m_xpr(expr), m_buffer(buffer) {}
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; }
-
- protected:
- typename XprType::Nested m_xpr;
- PointerType m_buffer;
-};
-
-
-
-template<typename ArgType, typename Device, template <class> class MakePointer_>
-struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
-{
- typedef TensorEvalToOp<ArgType, MakePointer_> XprType;
- typedef typename ArgType::Scalar Scalar;
- typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
- typedef typename XprType::Index Index;
- typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = true
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_device(device),
- m_buffer(op.buffer()), m_op(op), m_expression(op.expression())
- { }
-
- // Used for accessor extraction in SYCL Managed TensorMap:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const {
- return m_op;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() {
- }
-
- typedef typename internal::traits<const TensorEvalToOp<ArgType, MakePointer_> >::template MakePointer<CoeffReturnType>::Type DevicePointer;
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) {
- EIGEN_UNUSED_VARIABLE(scalar);
- eigen_assert(scalar == NULL);
- return m_impl.evalSubExprsIfNeeded(m_buffer);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
- m_buffer[i] = m_impl.coeff(i);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) {
- internal::pstoret<CoeffReturnType, PacketReturnType, Aligned>(m_buffer + i, m_impl.template packet<TensorEvaluator<ArgType, Device>::IsAligned ? Aligned : Unaligned>(i));
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return m_buffer[index];
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- // We assume that evalPacket or evalScalar is called to perform the
- // assignment and account for the cost of the write here.
- return m_impl.costPerCoeff(vectorized) +
- TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; }
- ArgType expression() const { return m_expression; }
-
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
- /// added for sycl in order to construct the buffer from the sycl device
- const Device& device() const{return m_device;}
-
- private:
- TensorEvaluator<ArgType, Device> m_impl;
- const Device& m_device;
- DevicePointer m_buffer;
- const XprType& m_op;
- const ArgType m_expression;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
deleted file mode 100644
index 834ce07..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ /dev/null
@@ -1,633 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
-#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
-
-namespace Eigen {
-
-/** \class TensorEvaluator
- * \ingroup CXX11_Tensor_Module
- *
- * \brief The tensor evaluator classes.
- *
- * These classes are responsible for the evaluation of the tensor expression.
- *
- * TODO: add support for more types of expressions, in particular expressions
- * leading to lvalues (slicing, reshaping, etc...)
- */
-
-// Generic evaluator
-template<typename Derived, typename Device>
-struct TensorEvaluator
-{
- typedef typename Derived::Index Index;
- typedef typename Derived::Scalar Scalar;
- typedef typename Derived::Scalar CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef typename Derived::Dimensions Dimensions;
-
- // NumDimensions is -1 for variable dim tensors
- static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
- internal::traits<Derived>::NumDimensions : 0;
-
- enum {
- IsAligned = Derived::IsAligned,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
- Layout = Derived::Layout,
- CoordAccess = NumCoords > 0,
- RawAccess = true
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
- : m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m)
- { }
-
- // Used for accessor extraction in SYCL Managed TensorMap:
- const Derived& derived() const { return m_impl; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) {
- if (dest) {
- m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize());
- return false;
- }
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
- eigen_assert(m_data);
- return m_data[index];
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
- eigen_assert(m_data);
- return m_data[index];
- }
-
- template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- PacketReturnType packet(Index index) const
- {
- return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
- }
-
- template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void writePacket(Index index, const PacketReturnType& x)
- {
- return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
- eigen_assert(m_data);
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- return m_data[m_dims.IndexOfColMajor(coords)];
- } else {
- return m_data[m_dims.IndexOfRowMajor(coords)];
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<DenseIndex, NumCoords>& coords) {
- eigen_assert(m_data);
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- return m_data[m_dims.IndexOfColMajor(coords)];
- } else {
- return m_data[m_dims.IndexOfRowMajor(coords)];
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
- internal::unpacket_traits<PacketReturnType>::size);
- }
-
- EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; }
-
- /// required by sycl in order to construct sycl buffer from raw pointer
- const Device& device() const{return m_device;}
-
- protected:
- typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data;
- Dimensions m_dims;
- const Device& m_device;
- const Derived& m_impl;
-};
-
-namespace {
-template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-T loadConstant(const T* address) {
- return *address;
-}
-// Use the texture cache on CUDA devices whenever possible
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
-template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-float loadConstant(const float* address) {
- return __ldg(address);
-}
-template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-double loadConstant(const double* address) {
- return __ldg(address);
-}
-template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-Eigen::half loadConstant(const Eigen::half* address) {
- return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
-}
-#endif
-}
-
-
-// Default evaluator for rvalues
-template<typename Derived, typename Device>
-struct TensorEvaluator<const Derived, Device>
-{
- typedef typename Derived::Index Index;
- typedef typename Derived::Scalar Scalar;
- typedef typename Derived::Scalar CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef typename Derived::Dimensions Dimensions;
-
- // NumDimensions is -1 for variable dim tensors
- static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
- internal::traits<Derived>::NumDimensions : 0;
-
- enum {
- IsAligned = Derived::IsAligned,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
- Layout = Derived::Layout,
- CoordAccess = NumCoords > 0,
- RawAccess = true
- };
-
- // Used for accessor extraction in SYCL Managed TensorMap:
- const Derived& derived() const { return m_impl; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
- : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m)
- { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
- if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
- m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar));
- return false;
- }
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
- eigen_assert(m_data);
- return loadConstant(m_data+index);
- }
-
- template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- PacketReturnType packet(Index index) const
- {
- return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
- eigen_assert(m_data);
- const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
- : m_dims.IndexOfRowMajor(coords);
- return loadConstant(m_data+index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
- internal::unpacket_traits<PacketReturnType>::size);
- }
-
- EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; }
-
- /// added for sycl in order to construct the buffer from the sycl device
- const Device& device() const{return m_device;}
-
- protected:
- typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data;
- Dimensions m_dims;
- const Device& m_device;
- const Derived& m_impl;
-};
-
-
-
-
-// -------------------- CwiseNullaryOp --------------------
-
-template<typename NullaryOp, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
-{
- typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
-
- enum {
- IsAligned = true,
- PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC
- TensorEvaluator(const XprType& op, const Device& device)
- : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
- { }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
- typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
-
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
- {
- return m_wrapper(m_functor, index);
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
- internal::unpacket_traits<PacketReturnType>::size);
- }
-
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
-
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; }
- /// required by sycl in order to extract the accessor
- NullaryOp functor() const { return m_functor; }
-
-
- private:
- const NullaryOp m_functor;
- TensorEvaluator<ArgType, Device> m_argImpl;
- const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
-};
-
-
-
-// -------------------- CwiseUnaryOp --------------------
-
-template<typename UnaryOp, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
-{
- typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
-
- enum {
- IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess & internal::functor_traits<UnaryOp>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
- : m_functor(op.functor()),
- m_argImpl(op.nestedExpression(), device)
- { }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
- typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
-
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
- m_argImpl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_argImpl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
- {
- return m_functor(m_argImpl.coeff(index));
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
- return m_argImpl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
-
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; }
- /// added for sycl in order to construct the buffer from sycl device
- UnaryOp functor() const { return m_functor; }
-
-
- private:
- const UnaryOp m_functor;
- TensorEvaluator<ArgType, Device> m_argImpl;
-};
-
-
-// -------------------- CwiseBinaryOp --------------------
-
-template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device>
-struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
-{
- typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
-
- enum {
- IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess &
- internal::functor_traits<BinaryOp>::PacketAccess,
- Layout = TensorEvaluator<LeftArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
- : m_functor(op.functor()),
- m_leftImpl(op.lhsExpression(), device),
- m_rightImpl(op.rhsExpression(), device)
- {
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
- eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
- }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
- typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
-
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
- {
- // TODO: use right impl instead if right impl dimensions are known at compile time.
- return m_leftImpl.dimensions();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
- m_leftImpl.evalSubExprsIfNeeded(NULL);
- m_rightImpl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_leftImpl.cleanup();
- m_rightImpl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
- {
- return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
- }
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
- return m_leftImpl.costPerCoeff(vectorized) +
- m_rightImpl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
- /// required by sycl in order to extract the accessor
- BinaryOp functor() const { return m_functor; }
-
- private:
- const BinaryOp m_functor;
- TensorEvaluator<LeftArgType, Device> m_leftImpl;
- TensorEvaluator<RightArgType, Device> m_rightImpl;
-};
-
-// -------------------- CwiseTernaryOp --------------------
-
-template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device>
-struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
-{
- typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
-
- enum {
- IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
- PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess & TensorEvaluator<Arg2Type, Device>::PacketAccess & TensorEvaluator<Arg3Type, Device>::PacketAccess &
- internal::functor_traits<TernaryOp>::PacketAccess,
- Layout = TensorEvaluator<Arg1Type, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
- : m_functor(op.functor()),
- m_arg1Impl(op.arg1Expression(), device),
- m_arg2Impl(op.arg2Expression(), device),
- m_arg3Impl(op.arg3Expression(), device)
- {
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
- typename internal::traits<Arg2Type>::StorageKind>::value),
- STORAGE_KIND_MUST_MATCH)
- EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
- typename internal::traits<Arg3Type>::StorageKind>::value),
- STORAGE_KIND_MUST_MATCH)
- EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
- typename internal::traits<Arg2Type>::Index>::value),
- STORAGE_INDEX_MUST_MATCH)
- EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
- typename internal::traits<Arg3Type>::Index>::value),
- STORAGE_INDEX_MUST_MATCH)
-
- eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
- }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
- typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
-
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
- {
- // TODO: use arg2 or arg3 dimensions if they are known at compile time.
- return m_arg1Impl.dimensions();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
- m_arg1Impl.evalSubExprsIfNeeded(NULL);
- m_arg2Impl.evalSubExprsIfNeeded(NULL);
- m_arg3Impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_arg1Impl.cleanup();
- m_arg2Impl.cleanup();
- m_arg3Impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
- {
- return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
- }
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
- m_arg2Impl.template packet<LoadMode>(index),
- m_arg3Impl.template packet<LoadMode>(index));
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
- return m_arg1Impl.costPerCoeff(vectorized) +
- m_arg2Impl.costPerCoeff(vectorized) +
- m_arg3Impl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
-
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; }
-
- private:
- const TernaryOp m_functor;
- TensorEvaluator<Arg1Type, Device> m_arg1Impl;
- TensorEvaluator<Arg2Type, Device> m_arg2Impl;
- TensorEvaluator<Arg3Type, Device> m_arg3Impl;
-};
-
-
-// -------------------- SelectOp --------------------
-
-template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device>
-struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
-{
- typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
- typedef typename XprType::Scalar Scalar;
-
- enum {
- IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess &
- internal::packet_traits<Scalar>::HasBlend,
- Layout = TensorEvaluator<IfArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
- : m_condImpl(op.ifExpression(), device),
- m_thenImpl(op.thenExpression(), device),
- m_elseImpl(op.elseExpression(), device)
- {
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
- eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
- eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
- }
-
- typedef typename XprType::Index Index;
- typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
- typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
-
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
- {
- // TODO: use then or else impl instead if they happen to be known at compile time.
- return m_condImpl.dimensions();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
- m_condImpl.evalSubExprsIfNeeded(NULL);
- m_thenImpl.evalSubExprsIfNeeded(NULL);
- m_elseImpl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_condImpl.cleanup();
- m_thenImpl.cleanup();
- m_elseImpl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
- {
- return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
- }
- template<int LoadMode>
- EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
- {
- internal::Selector<PacketSize> select;
- for (Index i = 0; i < PacketSize; ++i) {
- select.select[i] = m_condImpl.coeff(index+i);
- }
- return internal::pblend(select,
- m_thenImpl.template packet<LoadMode>(index),
- m_elseImpl.template packet<LoadMode>(index));
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- return m_condImpl.costPerCoeff(vectorized) +
- m_thenImpl.costPerCoeff(vectorized)
- .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; }
-
- private:
- TensorEvaluator<IfArgType, Device> m_condImpl;
- TensorEvaluator<ThenArgType, Device> m_thenImpl;
- TensorEvaluator<ElseArgType, Device> m_elseImpl;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
deleted file mode 100644
index f01d77c..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ /dev/null
@@ -1,288 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
-#define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
-
-namespace Eigen {
-
-/** \class TensorExecutor
- * \ingroup CXX11_Tensor_Module
- *
- * \brief The tensor executor class.
- *
- * This class is responsible for launch the evaluation of the expression on
- * the specified computing device.
- */
-namespace internal {
-
-// Default strategy: the expression is evaluated with a single cpu thread.
-template<typename Expression, typename Device, bool Vectorizable>
-class TensorExecutor
-{
- public:
- typedef typename Expression::Index Index;
- EIGEN_DEVICE_FUNC
- static inline void run(const Expression& expr, const Device& device = Device())
- {
- TensorEvaluator<Expression, Device> evaluator(expr, device);
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
- if (needs_assign)
- {
- const Index size = array_prod(evaluator.dimensions());
- for (Index i = 0; i < size; ++i) {
- evaluator.evalScalar(i);
- }
- }
- evaluator.cleanup();
- }
-};
-
-
-template<typename Expression>
-class TensorExecutor<Expression, DefaultDevice, true>
-{
- public:
- typedef typename Expression::Index Index;
- EIGEN_DEVICE_FUNC
- static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice())
- {
- TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device);
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
- if (needs_assign)
- {
- const Index size = array_prod(evaluator.dimensions());
- const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
- // Give the compiler a strong hint to unroll the loop. But don't insist
- // on unrolling, because if the function is expensive the compiler should not
- // unroll the loop at the expense of inlining.
- const Index UnrolledSize = (size / (4 * PacketSize)) * 4 * PacketSize;
- for (Index i = 0; i < UnrolledSize; i += 4*PacketSize) {
- for (Index j = 0; j < 4; j++) {
- evaluator.evalPacket(i + j * PacketSize);
- }
- }
- const Index VectorizedSize = (size / PacketSize) * PacketSize;
- for (Index i = UnrolledSize; i < VectorizedSize; i += PacketSize) {
- evaluator.evalPacket(i);
- }
- for (Index i = VectorizedSize; i < size; ++i) {
- evaluator.evalScalar(i);
- }
- }
- evaluator.cleanup();
- }
-};
-
-
-
-// Multicore strategy: the index space is partitioned and each partition is executed on a single core
-#ifdef EIGEN_USE_THREADS
-template <typename Evaluator, typename Index, bool Vectorizable>
-struct EvalRange {
- static void run(Evaluator* evaluator_in, const Index first, const Index last) {
- Evaluator evaluator = *evaluator_in;
- eigen_assert(last >= first);
- for (Index i = first; i < last; ++i) {
- evaluator.evalScalar(i);
- }
- }
-
- static Index alignBlockSize(Index size) {
- return size;
- }
-};
-
-template <typename Evaluator, typename Index>
-struct EvalRange<Evaluator, Index, true> {
- static const int PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
-
- static void run(Evaluator* evaluator_in, const Index first, const Index last) {
- Evaluator evaluator = *evaluator_in;
- eigen_assert(last >= first);
- Index i = first;
- if (last - first >= PacketSize) {
- eigen_assert(first % PacketSize == 0);
- Index last_chunk_offset = last - 4 * PacketSize;
- // Give the compiler a strong hint to unroll the loop. But don't insist
- // on unrolling, because if the function is expensive the compiler should not
- // unroll the loop at the expense of inlining.
- for (; i <= last_chunk_offset; i += 4*PacketSize) {
- for (Index j = 0; j < 4; j++) {
- evaluator.evalPacket(i + j * PacketSize);
- }
- }
- last_chunk_offset = last - PacketSize;
- for (; i <= last_chunk_offset; i += PacketSize) {
- evaluator.evalPacket(i);
- }
- }
- for (; i < last; ++i) {
- evaluator.evalScalar(i);
- }
- }
-
- static Index alignBlockSize(Index size) {
- // Align block size to packet size and account for unrolling in run above.
- if (size >= 16 * PacketSize) {
- return (size + 4 * PacketSize - 1) & ~(4 * PacketSize - 1);
- }
- // Aligning to 4 * PacketSize would increase block size by more than 25%.
- return (size + PacketSize - 1) & ~(PacketSize - 1);
- }
-};
-
-template <typename Expression, bool Vectorizable>
-class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> {
- public:
- typedef typename Expression::Index Index;
- static inline void run(const Expression& expr, const ThreadPoolDevice& device)
- {
- typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
- Evaluator evaluator(expr, device);
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
- if (needs_assign)
- {
- const Index size = array_prod(evaluator.dimensions());
-#if !defined(EIGEN_USE_SIMPLE_THREAD_POOL)
- device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
- EvalRange<Evaluator, Index, Vectorizable>::alignBlockSize,
- [&evaluator](Index first, Index last) {
- EvalRange<Evaluator, Index, Vectorizable>::run(&evaluator, first, last);
- });
-#else
- size_t num_threads = device.numThreads();
- if (num_threads > 1) {
- num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
- size, evaluator.costPerCoeff(Vectorizable), num_threads);
- }
- if (num_threads == 1) {
- EvalRange<Evaluator, Index, Vectorizable>::run(&evaluator, 0, size);
- } else {
- const Index PacketSize = Vectorizable ? unpacket_traits<typename Evaluator::PacketReturnType>::size : 1;
- Index blocksz = std::ceil<Index>(static_cast<float>(size)/num_threads) + PacketSize - 1;
- const Index blocksize = numext::maxi<Index>(PacketSize, (blocksz - (blocksz % PacketSize)));
- const Index numblocks = size / blocksize;
-
- Barrier barrier(numblocks);
- for (int i = 0; i < numblocks; ++i) {
- device.enqueue_with_barrier(
- &barrier, &EvalRange<Evaluator, Index, Vectorizable>::run,
- &evaluator, i * blocksize, (i + 1) * blocksize);
- }
- if (numblocks * blocksize < size) {
- EvalRange<Evaluator, Index, Vectorizable>::run(
- &evaluator, numblocks * blocksize, size);
- }
- barrier.Wait();
- }
-#endif // defined(!EIGEN_USE_SIMPLE_THREAD_POOL)
- }
- evaluator.cleanup();
- }
-};
-#endif // EIGEN_USE_THREADS
-
-
-// GPU: the evaluation of the expression is offloaded to a GPU.
-#if defined(EIGEN_USE_GPU)
-
-template <typename Expression, bool Vectorizable>
-class TensorExecutor<Expression, GpuDevice, Vectorizable> {
- public:
- typedef typename Expression::Index Index;
- static void run(const Expression& expr, const GpuDevice& device);
-};
-
-
-#if defined(__CUDACC__)
-template <typename Evaluator, typename Index, bool Vectorizable>
-struct EigenMetaKernelEval {
- static __device__ EIGEN_ALWAYS_INLINE
- void run(Evaluator& eval, Index first, Index last, Index step_size) {
- for (Index i = first; i < last; i += step_size) {
- eval.evalScalar(i);
- }
- }
-};
-
-template <typename Evaluator, typename Index>
-struct EigenMetaKernelEval<Evaluator, Index, true> {
- static __device__ EIGEN_ALWAYS_INLINE
- void run(Evaluator& eval, Index first, Index last, Index step_size) {
- const Index PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
- const Index vectorized_size = (last / PacketSize) * PacketSize;
- const Index vectorized_step_size = step_size * PacketSize;
-
- // Use the vector path
- for (Index i = first * PacketSize; i < vectorized_size;
- i += vectorized_step_size) {
- eval.evalPacket(i);
- }
- for (Index i = vectorized_size + first; i < last; i += step_size) {
- eval.evalScalar(i);
- }
- }
-};
-
-template <typename Evaluator, typename Index>
-__global__ void
-__launch_bounds__(1024)
-EigenMetaKernel(Evaluator eval, Index size) {
-
- const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
- const Index step_size = blockDim.x * gridDim.x;
-
- const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned;
- EigenMetaKernelEval<Evaluator, Index, vectorizable>::run(eval, first_index, size, step_size);
-}
-
-/*static*/
-template <typename Expression, bool Vectorizable>
-inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run(
- const Expression& expr, const GpuDevice& device) {
- TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
- if (needs_assign) {
- const int block_size = device.maxCudaThreadsPerBlock();
- const int max_blocks = device.getNumCudaMultiProcessors() *
- device.maxCudaThreadsPerMultiProcessor() / block_size;
- const Index size = array_prod(evaluator.dimensions());
- // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
- const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size)), 1);
-
- LAUNCH_CUDA_KERNEL(
- (EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, Index>),
- num_blocks, block_size, 0, device, evaluator, size);
- }
- evaluator.cleanup();
-}
-
-#endif // __CUDACC__
-#endif // EIGEN_USE_GPU
-
-// SYCL Executor policy
-#ifdef EIGEN_USE_SYCL
-
-template <typename Expression, bool Vectorizable>
-class TensorExecutor<Expression, SyclDevice, Vectorizable> {
-public:
- static inline void run(const Expression &expr, const SyclDevice &device) {
- // call TensorSYCL module
- TensorSycl::run(expr, device);
- }
-};
-
-#endif
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
deleted file mode 100644
index 85dfc7a..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
+++ /dev/null
@@ -1,371 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
-#define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
-
-namespace Eigen {
-
-/** \class TensorExpr
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor expression classes.
- *
- * The TensorCwiseNullaryOp class applies a nullary operators to an expression.
- * This is typically used to generate constants.
- *
- * The TensorCwiseUnaryOp class represents an expression where a unary operator
- * (e.g. cwiseSqrt) is applied to an expression.
- *
- * The TensorCwiseBinaryOp class represents an expression where a binary
- * operator (e.g. addition) is applied to a lhs and a rhs expression.
- *
- */
-namespace internal {
-template<typename NullaryOp, typename XprType>
-struct traits<TensorCwiseNullaryOp<NullaryOp, XprType> >
- : traits<XprType>
-{
- typedef traits<XprType> XprTraits;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::Nested XprTypeNested;
- typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-
- enum {
- Flags = 0
- };
-};
-
-} // end namespace internal
-
-
-
-template<typename NullaryOp, typename XprType>
-class TensorCwiseNullaryOp : public TensorBase<TensorCwiseNullaryOp<NullaryOp, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef TensorCwiseNullaryOp<NullaryOp, XprType> Nested;
- typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorCwiseNullaryOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseNullaryOp(const XprType& xpr, const NullaryOp& func = NullaryOp())
- : m_xpr(xpr), m_functor(func) {}
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- nestedExpression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC
- const NullaryOp& functor() const { return m_functor; }
-
- protected:
- typename XprType::Nested m_xpr;
- const NullaryOp m_functor;
-};
-
-
-
-namespace internal {
-template<typename UnaryOp, typename XprType>
-struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> >
- : traits<XprType>
-{
- // TODO(phli): Add InputScalar, InputPacket. Check references to
- // current Scalar/Packet to see if the intent is Input or Output.
- typedef typename result_of<UnaryOp(typename XprType::Scalar)>::type Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprType::Nested XprTypeNested;
- typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename UnaryOp, typename XprType>
-struct eval<TensorCwiseUnaryOp<UnaryOp, XprType>, Eigen::Dense>
-{
- typedef const TensorCwiseUnaryOp<UnaryOp, XprType>& type;
-};
-
-template<typename UnaryOp, typename XprType>
-struct nested<TensorCwiseUnaryOp<UnaryOp, XprType>, 1, typename eval<TensorCwiseUnaryOp<UnaryOp, XprType> >::type>
-{
- typedef TensorCwiseUnaryOp<UnaryOp, XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename UnaryOp, typename XprType>
-class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType>, ReadOnlyAccessors>
-{
- public:
- // TODO(phli): Add InputScalar, InputPacket. Check references to
- // current Scalar/Packet to see if the intent is Input or Output.
- typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef Scalar CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
- : m_xpr(xpr), m_functor(func) {}
-
- EIGEN_DEVICE_FUNC
- const UnaryOp& functor() const { return m_functor; }
-
- /** \returns the nested expression */
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- nestedExpression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
- const UnaryOp m_functor;
-};
-
-
-namespace internal {
-template<typename BinaryOp, typename LhsXprType, typename RhsXprType>
-struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
-{
- // Type promotion to handle the case where the types of the lhs and the rhs
- // are different.
- // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to
- // current Scalar/Packet to see if the intent is Inputs or Output.
- typedef typename result_of<
- BinaryOp(typename LhsXprType::Scalar,
- typename RhsXprType::Scalar)>::type Scalar;
- typedef traits<LhsXprType> XprTraits;
- typedef typename promote_storage_type<
- typename traits<LhsXprType>::StorageKind,
- typename traits<RhsXprType>::StorageKind>::ret StorageKind;
- typedef typename promote_index_type<
- typename traits<LhsXprType>::Index,
- typename traits<RhsXprType>::Index>::type Index;
- typedef typename LhsXprType::Nested LhsNested;
- typedef typename RhsXprType::Nested RhsNested;
- typedef typename remove_reference<LhsNested>::type _LhsNested;
- typedef typename remove_reference<RhsNested>::type _RhsNested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-
- enum {
- Flags = 0
- };
-};
-
-template<typename BinaryOp, typename LhsXprType, typename RhsXprType>
-struct eval<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, Eigen::Dense>
-{
- typedef const TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>& type;
-};
-
-template<typename BinaryOp, typename LhsXprType, typename RhsXprType>
-struct nested<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, 1, typename eval<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >::type>
-{
- typedef TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename BinaryOp, typename LhsXprType, typename RhsXprType>
-class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, ReadOnlyAccessors>
-{
- public:
- // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to
- // current Scalar/Packet to see if the intent is Inputs or Output.
- typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef Scalar CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorCwiseBinaryOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp())
- : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {}
-
- EIGEN_DEVICE_FUNC
- const BinaryOp& functor() const { return m_functor; }
-
- /** \returns the nested expressions */
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename LhsXprType::Nested>::type&
- lhsExpression() const { return m_lhs_xpr; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename RhsXprType::Nested>::type&
- rhsExpression() const { return m_rhs_xpr; }
-
- protected:
- typename LhsXprType::Nested m_lhs_xpr;
- typename RhsXprType::Nested m_rhs_xpr;
- const BinaryOp m_functor;
-};
-
-
-namespace internal {
-template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType>
-struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> >
-{
- // Type promotion to handle the case where the types of the args are different.
- typedef typename result_of<
- TernaryOp(typename Arg1XprType::Scalar,
- typename Arg2XprType::Scalar,
- typename Arg3XprType::Scalar)>::type Scalar;
- typedef traits<Arg1XprType> XprTraits;
- typedef typename traits<Arg1XprType>::StorageKind StorageKind;
- typedef typename traits<Arg1XprType>::Index Index;
- typedef typename Arg1XprType::Nested Arg1Nested;
- typedef typename Arg2XprType::Nested Arg2Nested;
- typedef typename Arg3XprType::Nested Arg3Nested;
- typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
- typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
- typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-
- enum {
- Flags = 0
- };
-};
-
-template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType>
-struct eval<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, Eigen::Dense>
-{
- typedef const TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>& type;
-};
-
-template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType>
-struct nested<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, 1, typename eval<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> >::type>
-{
- typedef TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType>
-class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef Scalar CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorCwiseTernaryOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseTernaryOp(const Arg1XprType& arg1, const Arg2XprType& arg2, const Arg3XprType& arg3, const TernaryOp& func = TernaryOp())
- : m_arg1_xpr(arg1), m_arg2_xpr(arg2), m_arg3_xpr(arg3), m_functor(func) {}
-
- EIGEN_DEVICE_FUNC
- const TernaryOp& functor() const { return m_functor; }
-
- /** \returns the nested expressions */
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename Arg1XprType::Nested>::type&
- arg1Expression() const { return m_arg1_xpr; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename Arg2XprType::Nested>::type&
- arg2Expression() const { return m_arg2_xpr; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename Arg3XprType::Nested>::type&
- arg3Expression() const { return m_arg3_xpr; }
-
- protected:
- typename Arg1XprType::Nested m_arg1_xpr;
- typename Arg2XprType::Nested m_arg2_xpr;
- typename Arg3XprType::Nested m_arg3_xpr;
- const TernaryOp m_functor;
-};
-
-
-namespace internal {
-template<typename IfXprType, typename ThenXprType, typename ElseXprType>
-struct traits<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >
- : traits<ThenXprType>
-{
- typedef typename traits<ThenXprType>::Scalar Scalar;
- typedef traits<ThenXprType> XprTraits;
- typedef typename promote_storage_type<typename traits<ThenXprType>::StorageKind,
- typename traits<ElseXprType>::StorageKind>::ret StorageKind;
- typedef typename promote_index_type<typename traits<ElseXprType>::Index,
- typename traits<ThenXprType>::Index>::type Index;
- typedef typename IfXprType::Nested IfNested;
- typedef typename ThenXprType::Nested ThenNested;
- typedef typename ElseXprType::Nested ElseNested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename IfXprType, typename ThenXprType, typename ElseXprType>
-struct eval<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, Eigen::Dense>
-{
- typedef const TensorSelectOp<IfXprType, ThenXprType, ElseXprType>& type;
-};
-
-template<typename IfXprType, typename ThenXprType, typename ElseXprType>
-struct nested<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, 1, typename eval<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >::type>
-{
- typedef TensorSelectOp<IfXprType, ThenXprType, ElseXprType> type;
-};
-
-} // end namespace internal
-
-
-template<typename IfXprType, typename ThenXprType, typename ElseXprType>
-class TensorSelectOp : public TensorBase<TensorSelectOp<IfXprType, ThenXprType, ElseXprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorSelectOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename internal::promote_storage_type<typename ThenXprType::CoeffReturnType,
- typename ElseXprType::CoeffReturnType>::ret CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorSelectOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorSelectOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorSelectOp>::Index Index;
-
- EIGEN_DEVICE_FUNC
- TensorSelectOp(const IfXprType& a_condition,
- const ThenXprType& a_then,
- const ElseXprType& a_else)
- : m_condition(a_condition), m_then(a_then), m_else(a_else)
- { }
-
- EIGEN_DEVICE_FUNC
- const IfXprType& ifExpression() const { return m_condition; }
-
- EIGEN_DEVICE_FUNC
- const ThenXprType& thenExpression() const { return m_then; }
-
- EIGEN_DEVICE_FUNC
- const ElseXprType& elseExpression() const { return m_else; }
-
- protected:
- typename IfXprType::Nested m_condition;
- typename ThenXprType::Nested m_then;
- typename ElseXprType::Nested m_else;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_EXPR_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
deleted file mode 100644
index 08eb559..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
+++ /dev/null
@@ -1,651 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
-#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H
-
-// This code requires the ability to initialize arrays of constant
-// values directly inside a class.
-#if __cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1900
-
-namespace Eigen {
-
-/** \class TensorFFT
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor FFT class.
- *
- * TODO:
- * Vectorize the Cooley Tukey and the Bluestein algorithm
- * Add support for multithreaded evaluation
- * Improve the performance on GPU
- */
-
-template <bool NeedUprade> struct MakeComplex {
- template <typename T>
- EIGEN_DEVICE_FUNC
- T operator() (const T& val) const { return val; }
-};
-
-template <> struct MakeComplex<true> {
- template <typename T>
- EIGEN_DEVICE_FUNC
- std::complex<T> operator() (const T& val) const { return std::complex<T>(val, 0); }
-};
-
-template <> struct MakeComplex<false> {
- template <typename T>
- EIGEN_DEVICE_FUNC
- std::complex<T> operator() (const std::complex<T>& val) const { return val; }
-};
-
-template <int ResultType> struct PartOf {
- template <typename T> T operator() (const T& val) const { return val; }
-};
-
-template <> struct PartOf<RealPart> {
- template <typename T> T operator() (const std::complex<T>& val) const { return val.real(); }
-};
-
-template <> struct PartOf<ImagPart> {
- template <typename T> T operator() (const std::complex<T>& val) const { return val.imag(); }
-};
-
-namespace internal {
-template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
-struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> {
- typedef traits<XprType> XprTraits;
- typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar;
- typedef typename std::complex<RealScalar> ComplexScalar;
- typedef typename XprTraits::Scalar InputScalar;
- typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
-struct eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, Eigen::Dense> {
- typedef const TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>& type;
-};
-
-template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
-struct nested<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, 1, typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> {
- typedef TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> type;
-};
-
-} // end namespace internal
-
-template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
-class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> {
- public:
- typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename std::complex<RealScalar> ComplexScalar;
- typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
- typedef OutputScalar CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorFFTOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft)
- : m_xpr(expr), m_fft(fft) {}
-
- EIGEN_DEVICE_FUNC
- const FFT& fft() const { return m_fft; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type& expression() const {
- return m_xpr;
- }
-
- protected:
- typename XprType::Nested m_xpr;
- const FFT m_fft;
-};
-
-// Eval as rvalue
-template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir>
-struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> {
- typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename std::complex<RealScalar> ComplexScalar;
- typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
- typedef internal::traits<XprType> XprTraits;
- typedef typename XprTraits::Scalar InputScalar;
- typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
- typedef OutputScalar CoeffReturnType;
- typedef typename PacketType<OutputScalar, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = true,
- BlockAccess = false,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) {
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
- for (int i = 0; i < NumDims; ++i) {
- eigen_assert(input_dims[i] > 0);
- m_dimensions[i] = input_dims[i];
- }
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_strides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
- }
- } else {
- m_strides[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
- }
- }
- m_size = m_dimensions.TotalSize();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
- return m_dimensions;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) {
- m_impl.evalSubExprsIfNeeded(NULL);
- if (data) {
- evalToBuf(data);
- return false;
- } else {
- m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size);
- evalToBuf(m_data);
- return true;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- if (m_data) {
- m_device.deallocate(m_data);
- m_data = NULL;
- }
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const {
- return m_data[index];
- }
-
- template <int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType
- packet(Index index) const {
- return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
-
-
- private:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) {
- const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value;
- ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size);
-
- for (Index i = 0; i < m_size; ++i) {
- buf[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(i));
- }
-
- for (size_t i = 0; i < m_fft.size(); ++i) {
- Index dim = m_fft[i];
- eigen_assert(dim >= 0 && dim < NumDims);
- Index line_len = m_dimensions[dim];
- eigen_assert(line_len >= 1);
- ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len);
- const bool is_power_of_two = isPowerOfTwo(line_len);
- const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len);
- const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite);
-
- ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
- ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
- ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1));
- if (!is_power_of_two) {
- // Compute twiddle factors
- // t_n = exp(sqrt(-1) * pi * n^2 / line_len)
- // for n = 0, 1,..., line_len-1.
- // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2
- pos_j_base_powered[0] = ComplexScalar(1, 0);
- if (line_len > 1) {
- const RealScalar pi_over_len(EIGEN_PI / line_len);
- const ComplexScalar pos_j_base = ComplexScalar(
- std::cos(pi_over_len), std::sin(pi_over_len));
- pos_j_base_powered[1] = pos_j_base;
- if (line_len > 2) {
- const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base;
- for (int j = 2; j < line_len + 1; ++j) {
- pos_j_base_powered[j] = pos_j_base_powered[j - 1] *
- pos_j_base_powered[j - 1] /
- pos_j_base_powered[j - 2] * pos_j_base_sq;
- }
- }
- }
- }
-
- for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) {
- const Index base_offset = getBaseOffsetFromIndex(partial_index, dim);
-
- // get data into line_buf
- const Index stride = m_strides[dim];
- if (stride == 1) {
- memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar));
- } else {
- Index offset = base_offset;
- for (int j = 0; j < line_len; ++j, offset += stride) {
- line_buf[j] = buf[offset];
- }
- }
-
- // processs the line
- if (is_power_of_two) {
- processDataLineCooleyTukey(line_buf, line_len, log_len);
- }
- else {
- processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered);
- }
-
- // write back
- if (FFTDir == FFT_FORWARD && stride == 1) {
- memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar));
- } else {
- Index offset = base_offset;
- const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0);
- for (int j = 0; j < line_len; ++j, offset += stride) {
- buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor;
- }
- }
- }
- m_device.deallocate(line_buf);
- if (!is_power_of_two) {
- m_device.deallocate(a);
- m_device.deallocate(b);
- m_device.deallocate(pos_j_base_powered);
- }
- }
-
- if(!write_to_out) {
- for (Index i = 0; i < m_size; ++i) {
- data[i] = PartOf<FFTResultType>()(buf[i]);
- }
- m_device.deallocate(buf);
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) {
- eigen_assert(x > 0);
- return !(x & (x - 1));
- }
-
- // The composite number for padding, used in Bluestein's FFT algorithm
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) {
- Index i = 2;
- while (i < 2 * n - 1) i *= 2;
- return i;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) {
- Index log2m = 0;
- while (m >>= 1) log2m++;
- return log2m;
- }
-
- // Call Cooley Tukey algorithm directly, data length must be power of 2
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) {
- eigen_assert(isPowerOfTwo(line_len));
- scramble_FFT(line_buf, line_len);
- compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len);
- }
-
- // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) {
- Index n = line_len;
- Index m = good_composite;
- ComplexScalar* data = line_buf;
-
- for (Index i = 0; i < n; ++i) {
- if(FFTDir == FFT_FORWARD) {
- a[i] = data[i] * numext::conj(pos_j_base_powered[i]);
- }
- else {
- a[i] = data[i] * pos_j_base_powered[i];
- }
- }
- for (Index i = n; i < m; ++i) {
- a[i] = ComplexScalar(0, 0);
- }
-
- for (Index i = 0; i < n; ++i) {
- if(FFTDir == FFT_FORWARD) {
- b[i] = pos_j_base_powered[i];
- }
- else {
- b[i] = numext::conj(pos_j_base_powered[i]);
- }
- }
- for (Index i = n; i < m - n; ++i) {
- b[i] = ComplexScalar(0, 0);
- }
- for (Index i = m - n; i < m; ++i) {
- if(FFTDir == FFT_FORWARD) {
- b[i] = pos_j_base_powered[m-i];
- }
- else {
- b[i] = numext::conj(pos_j_base_powered[m-i]);
- }
- }
-
- scramble_FFT(a, m);
- compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len);
-
- scramble_FFT(b, m);
- compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len);
-
- for (Index i = 0; i < m; ++i) {
- a[i] *= b[i];
- }
-
- scramble_FFT(a, m);
- compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len);
-
- //Do the scaling after ifft
- for (Index i = 0; i < m; ++i) {
- a[i] /= m;
- }
-
- for (Index i = 0; i < n; ++i) {
- if(FFTDir == FFT_FORWARD) {
- data[i] = a[i] * numext::conj(pos_j_base_powered[i]);
- }
- else {
- data[i] = a[i] * pos_j_base_powered[i];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) {
- eigen_assert(isPowerOfTwo(n));
- Index j = 1;
- for (Index i = 1; i < n; ++i){
- if (j > i) {
- std::swap(data[j-1], data[i-1]);
- }
- Index m = n >> 1;
- while (m >= 2 && j > m) {
- j -= m;
- m >>= 1;
- }
- j += m;
- }
- }
-
- template <int Dir>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) {
- ComplexScalar tmp = data[1];
- data[1] = data[0] - data[1];
- data[0] += tmp;
- }
-
- template <int Dir>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) {
- ComplexScalar tmp[4];
- tmp[0] = data[0] + data[1];
- tmp[1] = data[0] - data[1];
- tmp[2] = data[2] + data[3];
- if (Dir == FFT_FORWARD) {
- tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]);
- } else {
- tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]);
- }
- data[0] = tmp[0] + tmp[2];
- data[1] = tmp[1] + tmp[3];
- data[2] = tmp[0] - tmp[2];
- data[3] = tmp[1] - tmp[3];
- }
-
- template <int Dir>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) {
- ComplexScalar tmp_1[8];
- ComplexScalar tmp_2[8];
-
- tmp_1[0] = data[0] + data[1];
- tmp_1[1] = data[0] - data[1];
- tmp_1[2] = data[2] + data[3];
- if (Dir == FFT_FORWARD) {
- tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1);
- } else {
- tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1);
- }
- tmp_1[4] = data[4] + data[5];
- tmp_1[5] = data[4] - data[5];
- tmp_1[6] = data[6] + data[7];
- if (Dir == FFT_FORWARD) {
- tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1);
- } else {
- tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1);
- }
- tmp_2[0] = tmp_1[0] + tmp_1[2];
- tmp_2[1] = tmp_1[1] + tmp_1[3];
- tmp_2[2] = tmp_1[0] - tmp_1[2];
- tmp_2[3] = tmp_1[1] - tmp_1[3];
- tmp_2[4] = tmp_1[4] + tmp_1[6];
-// SQRT2DIV2 = sqrt(2)/2
-#define SQRT2DIV2 0.7071067811865476
- if (Dir == FFT_FORWARD) {
- tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2);
- tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1);
- tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2);
- } else {
- tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2);
- tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1);
- tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2);
- }
- data[0] = tmp_2[0] + tmp_2[4];
- data[1] = tmp_2[1] + tmp_2[5];
- data[2] = tmp_2[2] + tmp_2[6];
- data[3] = tmp_2[3] + tmp_2[7];
- data[4] = tmp_2[0] - tmp_2[4];
- data[5] = tmp_2[1] - tmp_2[5];
- data[6] = tmp_2[2] - tmp_2[6];
- data[7] = tmp_2[3] - tmp_2[7];
- }
-
- template <int Dir>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge(
- ComplexScalar* data, Index n, Index n_power_of_2) {
- // Original code:
- // RealScalar wtemp = std::sin(M_PI/n);
- // RealScalar wpi = -std::sin(2 * M_PI/n);
- const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2];
- const RealScalar wpi = (Dir == FFT_FORWARD)
- ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2]
- : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2];
-
- const ComplexScalar wp(wtemp, wpi);
- const ComplexScalar wp_one = wp + ComplexScalar(1, 0);
- const ComplexScalar wp_one_2 = wp_one * wp_one;
- const ComplexScalar wp_one_3 = wp_one_2 * wp_one;
- const ComplexScalar wp_one_4 = wp_one_3 * wp_one;
- const Index n2 = n / 2;
- ComplexScalar w(1.0, 0.0);
- for (Index i = 0; i < n2; i += 4) {
- ComplexScalar temp0(data[i + n2] * w);
- ComplexScalar temp1(data[i + 1 + n2] * w * wp_one);
- ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2);
- ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3);
- w = w * wp_one_4;
-
- data[i + n2] = data[i] - temp0;
- data[i] += temp0;
-
- data[i + 1 + n2] = data[i + 1] - temp1;
- data[i + 1] += temp1;
-
- data[i + 2 + n2] = data[i + 2] - temp2;
- data[i + 2] += temp2;
-
- data[i + 3 + n2] = data[i + 3] - temp3;
- data[i + 3] += temp3;
- }
- }
-
- template <int Dir>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(
- ComplexScalar* data, Index n, Index n_power_of_2) {
- eigen_assert(isPowerOfTwo(n));
- if (n > 8) {
- compute_1D_Butterfly<Dir>(data, n / 2, n_power_of_2 - 1);
- compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1);
- butterfly_1D_merge<Dir>(data, n, n_power_of_2);
- } else if (n == 8) {
- butterfly_8<Dir>(data);
- } else if (n == 4) {
- butterfly_4<Dir>(data);
- } else if (n == 2) {
- butterfly_2<Dir>(data);
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const {
- Index result = 0;
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > omitted_dim; --i) {
- const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
- const Index idx = index / partial_m_stride;
- index -= idx * partial_m_stride;
- result += idx * m_strides[i];
- }
- result += index;
- }
- else {
- for (Index i = 0; i < omitted_dim; ++i) {
- const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
- const Index idx = index / partial_m_stride;
- index -= idx * partial_m_stride;
- result += idx * m_strides[i];
- }
- result += index;
- }
- // Value of index_coords[omitted_dim] is not determined to this step
- return result;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const {
- Index result = base + offset * m_strides[omitted_dim] ;
- return result;
- }
-
- protected:
- Index m_size;
- const FFT& m_fft;
- Dimensions m_dimensions;
- array<Index, NumDims> m_strides;
- TensorEvaluator<ArgType, Device> m_impl;
- CoeffReturnType* m_data;
- const Device& m_device;
-
- // This will support a maximum FFT size of 2^32 for each dimension
- // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2;
- const RealScalar m_sin_PI_div_n_LUT[32] = {
- RealScalar(0.0),
- RealScalar(-2),
- RealScalar(-0.999999999999999),
- RealScalar(-0.292893218813453),
- RealScalar(-0.0761204674887130),
- RealScalar(-0.0192147195967696),
- RealScalar(-0.00481527332780311),
- RealScalar(-0.00120454379482761),
- RealScalar(-3.01181303795779e-04),
- RealScalar(-7.52981608554592e-05),
- RealScalar(-1.88247173988574e-05),
- RealScalar(-4.70619042382852e-06),
- RealScalar(-1.17654829809007e-06),
- RealScalar(-2.94137117780840e-07),
- RealScalar(-7.35342821488550e-08),
- RealScalar(-1.83835707061916e-08),
- RealScalar(-4.59589268710903e-09),
- RealScalar(-1.14897317243732e-09),
- RealScalar(-2.87243293150586e-10),
- RealScalar( -7.18108232902250e-11),
- RealScalar(-1.79527058227174e-11),
- RealScalar(-4.48817645568941e-12),
- RealScalar(-1.12204411392298e-12),
- RealScalar(-2.80511028480785e-13),
- RealScalar(-7.01277571201985e-14),
- RealScalar(-1.75319392800498e-14),
- RealScalar(-4.38298482001247e-15),
- RealScalar(-1.09574620500312e-15),
- RealScalar(-2.73936551250781e-16),
- RealScalar(-6.84841378126949e-17),
- RealScalar(-1.71210344531737e-17),
- RealScalar(-4.28025861329343e-18)
- };
-
- // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i));
- const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = {
- RealScalar(0.0),
- RealScalar(0.0),
- RealScalar(-1.00000000000000e+00),
- RealScalar(-7.07106781186547e-01),
- RealScalar(-3.82683432365090e-01),
- RealScalar(-1.95090322016128e-01),
- RealScalar(-9.80171403295606e-02),
- RealScalar(-4.90676743274180e-02),
- RealScalar(-2.45412285229123e-02),
- RealScalar(-1.22715382857199e-02),
- RealScalar(-6.13588464915448e-03),
- RealScalar(-3.06795676296598e-03),
- RealScalar(-1.53398018628477e-03),
- RealScalar(-7.66990318742704e-04),
- RealScalar(-3.83495187571396e-04),
- RealScalar(-1.91747597310703e-04),
- RealScalar(-9.58737990959773e-05),
- RealScalar(-4.79368996030669e-05),
- RealScalar(-2.39684498084182e-05),
- RealScalar(-1.19842249050697e-05),
- RealScalar(-5.99211245264243e-06),
- RealScalar(-2.99605622633466e-06),
- RealScalar(-1.49802811316901e-06),
- RealScalar(-7.49014056584716e-07),
- RealScalar(-3.74507028292384e-07),
- RealScalar(-1.87253514146195e-07),
- RealScalar(-9.36267570730981e-08),
- RealScalar(-4.68133785365491e-08),
- RealScalar(-2.34066892682746e-08),
- RealScalar(-1.17033446341373e-08),
- RealScalar(-5.85167231706864e-09),
- RealScalar(-2.92583615853432e-09)
- };
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_HAS_CONSTEXPR
-
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
deleted file mode 100644
index fcee5f6..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h
+++ /dev/null
@@ -1,389 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
-#define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
-
-namespace Eigen {
-
-/** \class TensorFixedSize
- * \ingroup CXX11_Tensor_Module
- *
- * \brief The fixed sized version of the tensor class.
- *
- * The fixed sized equivalent of
- * Eigen::Tensor<float, 3> t(3, 5, 7);
- * is
- * Eigen::TensorFixedSize<float, Size<3,5,7>> t;
- */
-
-template<typename Scalar_, typename Dimensions_, int Options_, typename IndexType>
-class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> >
-{
- public:
- typedef TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> Self;
- typedef TensorBase<TensorFixedSize<Scalar_, Dimensions_, Options_, IndexType> > Base;
- typedef typename Eigen::internal::nested<Self>::type Nested;
- typedef typename internal::traits<Self>::StorageKind StorageKind;
- typedef typename internal::traits<Self>::Index Index;
- typedef Scalar_ Scalar;
- typedef typename NumTraits<Scalar>::Real RealScalar;
- typedef typename Base::CoeffReturnType CoeffReturnType;
-
- static const int Options = Options_;
-
- enum {
- IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0),
- Layout = Options_ & RowMajor ? RowMajor : ColMajor,
- CoordAccess = true,
- RawAccess = true
- };
-
- typedef Dimensions_ Dimensions;
- static const std::size_t NumIndices = Dimensions::count;
-
- protected:
- TensorStorage<Scalar, Dimensions, Options> m_storage;
-
- public:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); }
-
- // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
- // work, because that uses base().coeffRef() - and we don't yet
- // implement a similar class hierarchy
- inline Self& base() { return *this; }
- inline const Self& base() const { return *this; }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const
- {
- // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return coeff(array<Index, NumIndices>{{firstIndex, otherIndices...}});
- }
-#endif
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& coeff(const array<Index, NumIndices>& indices) const
- {
- eigen_internal_assert(checkIndexRange(indices));
- return m_storage.data()[linearizedIndex(indices)];
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
- {
- eigen_internal_assert(index >= 0 && index < size());
- return m_storage.data()[index];
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& coeff() const
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return m_storage.data()[0];
- }
-
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices)
- {
- // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return coeffRef(array<Index, NumIndices>{{firstIndex, otherIndices...}});
- }
-#endif
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
- {
- eigen_internal_assert(checkIndexRange(indices));
- return m_storage.data()[linearizedIndex(indices)];
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
- {
- eigen_internal_assert(index >= 0 && index < size());
- return m_storage.data()[index];
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef()
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return m_storage.data()[0];
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const
- {
- // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
- }
-#else
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
- {
- if (Options&RowMajor) {
- const Index index = i1 + i0 * m_storage.dimensions()[1];
- return m_storage.data()[index];
- } else {
- const Index index = i0 + i1 * m_storage.dimensions()[0];
- return m_storage.data()[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
- {
- if (Options&RowMajor) {
- const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0);
- return m_storage.data()[index];
- } else {
- const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2);
- return m_storage.data()[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
- {
- if (Options&RowMajor) {
- const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0));
- return m_storage.data()[index];
- } else {
- const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3));
- return m_storage.data()[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
- {
- if (Options&RowMajor) {
- const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)));
- return m_storage.data()[index];
- } else {
- const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4)));
- return m_storage.data()[index];
- }
- }
-#endif
-
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
- {
- eigen_assert(checkIndexRange(indices));
- return coeff(indices);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const
- {
- eigen_internal_assert(index >= 0 && index < size());
- return coeff(index);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()() const
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return coeff();
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const
- {
- // The bracket operator is only for vectors, use the parenthesis operator instead.
- EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return coeff(index);
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
- {
- // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
- }
-#else
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
- {
- if (Options&RowMajor) {
- const Index index = i1 + i0 * m_storage.dimensions()[1];
- return m_storage.data()[index];
- } else {
- const Index index = i0 + i1 * m_storage.dimensions()[0];
- return m_storage.data()[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
- {
- if (Options&RowMajor) {
- const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0);
- return m_storage.data()[index];
- } else {
- const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2);
- return m_storage.data()[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
- {
- if (Options&RowMajor) {
- const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0));
- return m_storage.data()[index];
- } else {
- const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3));
- return m_storage.data()[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
- {
- if (Options&RowMajor) {
- const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)));
- return m_storage.data()[index];
- } else {
- const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4)));
- return m_storage.data()[index];
- }
- }
-#endif
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
- {
- eigen_assert(checkIndexRange(indices));
- return coeffRef(indices);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index index)
- {
- eigen_assert(index >= 0 && index < size());
- return coeffRef(index);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()()
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return coeffRef();
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator[](Index index)
- {
- // The bracket operator is only for vectors, use the parenthesis operator instead
- EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return coeffRef(index);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorFixedSize()
- : m_storage()
- {
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorFixedSize(const Self& other)
- : m_storage(other.m_storage)
- {
- }
-
-#if EIGEN_HAS_RVALUE_REFERENCES
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize(Self&& other)
- : m_storage(other.m_storage)
- {
- }
-#endif
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase<OtherDerived, ReadOnlyAccessors>& other)
- {
- typedef TensorAssignOp<TensorFixedSize, const OtherDerived> Assign;
- Assign assign(*this, other.derived());
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- }
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase<OtherDerived, WriteAccessors>& other)
- {
- typedef TensorAssignOp<TensorFixedSize, const OtherDerived> Assign;
- Assign assign(*this, other.derived());
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorFixedSize& operator=(const TensorFixedSize& other)
- {
- // FIXME: check that the dimensions of other match the dimensions of *this.
- // Unfortunately this isn't possible yet when the rhs is an expression.
- typedef TensorAssignOp<Self, const TensorFixedSize> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorFixedSize& operator=(const OtherDerived& other)
- {
- // FIXME: check that the dimensions of other match the dimensions of *this.
- // Unfortunately this isn't possible yet when the rhs is an expression.
- typedef TensorAssignOp<Self, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- protected:
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE bool checkIndexRange(const array<Index, NumIndices>& /*indices*/) const
- {
- using internal::array_apply_and_reduce;
- using internal::array_zip_and_reduce;
- using internal::greater_equal_zero_op;
- using internal::logical_and_op;
- using internal::lesser_op;
-
- return true;
- // check whether the indices are all >= 0
- /* array_apply_and_reduce<logical_and_op, greater_equal_zero_op>(indices) &&
- // check whether the indices fit in the dimensions
- array_zip_and_reduce<logical_and_op, lesser_op>(indices, m_storage.dimensions());*/
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Index linearizedIndex(const array<Index, NumIndices>& indices) const
- {
- if (Options&RowMajor) {
- return m_storage.dimensions().IndexOfRowMajor(indices);
- } else {
- return m_storage.dimensions().IndexOfColMajor(indices);
- }
- }
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
deleted file mode 100644
index 8bece4e..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ /dev/null
@@ -1,169 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
-#define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
-
-namespace Eigen {
-
-namespace internal {
-template<typename XprType, template <class> class MakePointer_>
-struct traits<TensorForcedEvalOp<XprType, MakePointer_> >
-{
- // Type promotion to handle the case where the types of the lhs and the rhs are different.
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename traits<XprType>::StorageKind StorageKind;
- typedef typename traits<XprType>::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-
- enum {
- Flags = 0
- };
- template <class T> struct MakePointer {
- // Intermediate typedef to workaround MSVC issue.
- typedef MakePointer_<T> MakePointerT;
- typedef typename MakePointerT::Type Type;
- };
-};
-
-template<typename XprType, template <class> class MakePointer_>
-struct eval<TensorForcedEvalOp<XprType, MakePointer_>, Eigen::Dense>
-{
- typedef const TensorForcedEvalOp<XprType, MakePointer_>& type;
-};
-
-template<typename XprType, template <class> class MakePointer_>
-struct nested<TensorForcedEvalOp<XprType, MakePointer_>, 1, typename eval<TensorForcedEvalOp<XprType, MakePointer_> >::type>
-{
- typedef TensorForcedEvalOp<XprType, MakePointer_> type;
-};
-
-} // end namespace internal
-
-
-
-// FIXME use proper doxygen documentation (e.g. \tparam MakePointer_)
-
-/** \class TensorForcedEvalOp
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor reshaping class.
- *
- *
- */
-/// `template <class> class MakePointer_` is added to convert the host pointer to the device pointer.
-/// It is added due to the fact that for our device compiler `T*` is not allowed.
-/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer `T`.
-/// This is done through our `MakePointer_` class. By default the Type in the `MakePointer_<T>` is `T*` .
-/// Therefore, by adding the default value, we managed to convert the type and it does not break any
-/// existing code as its default value is `T*`.
-template<typename XprType, template <class> class MakePointer_>
-class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType, MakePointer_>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType& expr)
- : m_xpr(expr) {}
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
-};
-
-
-template<typename ArgType, typename Device, template <class> class MakePointer_>
-struct TensorEvaluator<const TensorForcedEvalOp<ArgType, MakePointer_>, Device>
-{
- typedef TensorForcedEvalOp<ArgType, MakePointer_> XprType;
- typedef typename ArgType::Scalar Scalar;
- typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
- typedef typename XprType::Index Index;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = true,
- PacketAccess = (PacketSize > 1),
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- RawAccess = true
- };
-
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
- /// op_ is used for sycl
- : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL)
- { }
-
- EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
- const Index numValues = internal::array_prod(m_impl.dimensions());
- m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType));
- // Should initialize the memory in case we're dealing with non POD types.
- if (NumTraits<CoeffReturnType>::RequireInitialization) {
- for (Index i = 0; i < numValues; ++i) {
- new(m_buffer+i) CoeffReturnType();
- }
- }
- typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
- EvalTo evalToTmp(m_buffer, m_op);
- const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
- internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_device.deallocate(m_buffer);
- m_buffer = NULL;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return m_buffer[index];
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC typename MakePointer<Scalar>::Type data() const { return m_buffer; }
-
- /// required by sycl in order to extract the sycl accessor
- const TensorEvaluator<ArgType, Device>& impl() { return m_impl; }
- /// used by sycl in order to build the sycl buffer
- const Device& device() const{return m_device;}
- private:
- TensorEvaluator<ArgType, Device> m_impl;
- const ArgType m_op;
- const Device& m_device;
- typename MakePointer<CoeffReturnType>::Type m_buffer;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
deleted file mode 100644
index 52b803d..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ /dev/null
@@ -1,109 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
-
-namespace Eigen {
-
-// MakePointer class is used as a container of the adress space of the pointer
-// on the host and on the device. From the host side it generates the T* pointer
-// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to
-// T* m_data on the host. It is always called on the device.
-// Specialisation of MakePointer class for creating the sycl buffer with
-// map_allocator.
-template<typename T> struct MakePointer {
- typedef T* Type;
-};
-
-template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap;
-template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
-template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize;
-template<typename PlainObjectType> class TensorRef;
-template<typename Derived, int AccessLevel> class TensorBase;
-
-template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryOp;
-template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp;
-template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp;
-template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> class TensorCwiseTernaryOp;
-template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp;
-template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_ = MakePointer > class TensorReductionOp;
-template<typename XprType> class TensorIndexTupleOp;
-template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp;
-template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp;
-template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp;
-template<typename TargetType, typename XprType> class TensorConversionOp;
-template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp;
-template<typename FFT, typename XprType, int FFTDataType, int FFTDirection> class TensorFFTOp;
-template<typename PatchDim, typename XprType> class TensorPatchOp;
-template<DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorImagePatchOp;
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorVolumePatchOp;
-template<typename Broadcast, typename XprType> class TensorBroadcastingOp;
-template<DenseIndex DimId, typename XprType> class TensorChippingOp;
-template<typename NewDimensions, typename XprType> class TensorReshapingOp;
-template<typename XprType> class TensorLayoutSwapOp;
-template<typename StartIndices, typename Sizes, typename XprType> class TensorSlicingOp;
-template<typename ReverseDimensions, typename XprType> class TensorReverseOp;
-template<typename PaddingDimensions, typename XprType> class TensorPaddingOp;
-template<typename Shuffle, typename XprType> class TensorShufflingOp;
-template<typename Strides, typename XprType> class TensorStridingOp;
-template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> class TensorStridingSlicingOp;
-template<typename Strides, typename XprType> class TensorInflationOp;
-template<typename Generator, typename XprType> class TensorGeneratorOp;
-template<typename LeftXprType, typename RightXprType> class TensorAssignOp;
-template<typename Op, typename XprType> class TensorScanOp;
-
-template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
-template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
-
-template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp;
-template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorForcedEvalOp;
-
-template<typename ExpressionType, typename DeviceType> class TensorDevice;
-template<typename Derived, typename Device> struct TensorEvaluator;
-
-struct DefaultDevice;
-struct ThreadPoolDevice;
-struct GpuDevice;
-struct SyclDevice;
-
-enum FFTResultType {
- RealPart = 0,
- ImagPart = 1,
- BothParts = 2
-};
-
-enum FFTDirection {
- FFT_FORWARD = 0,
- FFT_REVERSE = 1
-};
-
-
-namespace internal {
-
-template <typename Device, typename Expression>
-struct IsVectorizable {
- static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
-};
-
-template <typename Expression>
-struct IsVectorizable<GpuDevice, Expression> {
- static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess &&
- TensorEvaluator<Expression, GpuDevice>::IsAligned;
-};
-
-template <typename Expression, typename Device,
- bool Vectorizable = IsVectorizable<Device, Expression>::value>
-class TensorExecutor;
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
deleted file mode 100644
index d73f6dc..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ /dev/null
@@ -1,489 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
-
-namespace Eigen {
-namespace internal {
-
-
-/** \internal
- * \brief Template functor to compute the modulo between an array and a scalar.
- */
-template <typename Scalar>
-struct scalar_mod_op {
- EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {}
- EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; }
- const Scalar m_divisor;
-};
-template <typename Scalar>
-struct functor_traits<scalar_mod_op<Scalar> >
-{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; };
-
-
-/** \internal
- * \brief Template functor to compute the modulo between 2 arrays.
- */
-template <typename Scalar>
-struct scalar_mod2_op {
- EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op);
- EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; }
-};
-template <typename Scalar>
-struct functor_traits<scalar_mod2_op<Scalar> >
-{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; };
-
-template <typename Scalar>
-struct scalar_fmod_op {
- EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op);
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar
- operator()(const Scalar& a, const Scalar& b) const {
- return numext::fmod(a, b);
- }
-};
-template <typename Scalar>
-struct functor_traits<scalar_fmod_op<Scalar> > {
- enum { Cost = 13, // Reciprocal throughput of FPREM on Haswell.
- PacketAccess = false };
-};
-
-
-/** \internal
- * \brief Template functor to compute the sigmoid of a scalar
- * \sa class CwiseUnaryOp, ArrayBase::sigmoid()
- */
-template <typename T>
-struct scalar_sigmoid_op {
- EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
- const T one = T(1);
- return one / (one + numext::exp(-x));
- }
-
- template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Packet packetOp(const Packet& x) const {
- const Packet one = pset1<Packet>(T(1));
- return pdiv(one, padd(one, pexp(pnegate(x))));
- }
-};
-
-template <typename T>
-struct functor_traits<scalar_sigmoid_op<T> > {
- enum {
- Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
- PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
- packet_traits<T>::HasNegate && packet_traits<T>::HasExp
- };
-};
-
-
-template<typename Reducer, typename Device>
-struct reducer_traits {
- enum {
- Cost = 1,
- PacketAccess = false
- };
-};
-
-// Standard reduction functors
-template <typename T> struct SumReducer
-{
- static const bool PacketAccess = packet_traits<T>::HasAdd;
- static const bool IsStateful = false;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
- internal::scalar_sum_op<T> sum_op;
- *accum = sum_op(*accum, t);
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
- (*accum) = padd<Packet>(*accum, p);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- internal::scalar_cast_op<int, T> conv;
- return conv(0);
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>(initialize());
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
- return accum;
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
- return vaccum;
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
- internal::scalar_sum_op<T> sum_op;
- return sum_op(saccum, predux(vaccum));
- }
-};
-
-template <typename T, typename Device>
-struct reducer_traits<SumReducer<T>, Device> {
- enum {
- Cost = NumTraits<T>::AddCost,
- PacketAccess = PacketType<T, Device>::HasAdd
- };
-};
-
-
-template <typename T> struct MeanReducer
-{
- static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
- static const bool IsStateful = true;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- MeanReducer() : scalarCount_(0), packetCount_(0) { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
- internal::scalar_sum_op<T> sum_op;
- *accum = sum_op(*accum, t);
- scalarCount_++;
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) {
- (*accum) = padd<Packet>(*accum, p);
- packetCount_++;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- internal::scalar_cast_op<int, T> conv;
- return conv(0);
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>(initialize());
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
- return accum / scalarCount_;
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
- return pdiv(vaccum, pset1<Packet>(packetCount_));
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
- internal::scalar_sum_op<T> sum_op;
- return sum_op(saccum, predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size);
- }
-
- protected:
- DenseIndex scalarCount_;
- DenseIndex packetCount_;
-};
-
-template <typename T, typename Device>
-struct reducer_traits<MeanReducer<T>, Device> {
- enum {
- Cost = NumTraits<T>::AddCost,
- PacketAccess = PacketType<T, Device>::HasAdd
- };
-};
-
-
-template <typename T, bool IsMax = true, bool IsInteger = true>
-struct MinMaxBottomValue {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() {
- return Eigen::NumTraits<T>::lowest();
- }
-};
-template <typename T>
-struct MinMaxBottomValue<T, true, false> {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() {
- return -Eigen::NumTraits<T>::infinity();
- }
-};
-template <typename T>
-struct MinMaxBottomValue<T, false, true> {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() {
- return Eigen::NumTraits<T>::highest();
- }
-};
-template <typename T>
-struct MinMaxBottomValue<T, false, false> {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() {
- return Eigen::NumTraits<T>::infinity();
- }
-};
-
-
-template <typename T> struct MaxReducer
-{
- static const bool PacketAccess = packet_traits<T>::HasMax;
- static const bool IsStateful = false;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
- if (t > *accum) { *accum = t; }
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
- (*accum) = pmax<Packet>(*accum, p);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return MinMaxBottomValue<T, true, Eigen::NumTraits<T>::IsInteger>::bottom_value();
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>(initialize());
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
- return accum;
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
- return vaccum;
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
- return numext::maxi(saccum, predux_max(vaccum));
- }
-};
-
-template <typename T, typename Device>
-struct reducer_traits<MaxReducer<T>, Device> {
- enum {
- Cost = NumTraits<T>::AddCost,
- PacketAccess = PacketType<T, Device>::HasMax
- };
-};
-
-
-template <typename T> struct MinReducer
-{
- static const bool PacketAccess = packet_traits<T>::HasMin;
- static const bool IsStateful = false;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
- if (t < *accum) { *accum = t; }
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
- (*accum) = pmin<Packet>(*accum, p);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return MinMaxBottomValue<T, false, Eigen::NumTraits<T>::IsInteger>::bottom_value();
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>(initialize());
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
- return accum;
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
- return vaccum;
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
- return numext::mini(saccum, predux_min(vaccum));
- }
-};
-
-template <typename T, typename Device>
-struct reducer_traits<MinReducer<T>, Device> {
- enum {
- Cost = NumTraits<T>::AddCost,
- PacketAccess = PacketType<T, Device>::HasMin
- };
-};
-
-
-template <typename T> struct ProdReducer
-{
- static const bool PacketAccess = packet_traits<T>::HasMul;
- static const bool IsStateful = false;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
- internal::scalar_product_op<T> prod_op;
- (*accum) = prod_op(*accum, t);
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
- (*accum) = pmul<Packet>(*accum, p);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- internal::scalar_cast_op<int, T> conv;
- return conv(1);
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>(initialize());
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
- return accum;
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
- return vaccum;
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
- internal::scalar_product_op<T> prod_op;
- return prod_op(saccum, predux_mul(vaccum));
- }
-};
-
-template <typename T, typename Device>
-struct reducer_traits<ProdReducer<T>, Device> {
- enum {
- Cost = NumTraits<T>::MulCost,
- PacketAccess = PacketType<T, Device>::HasMul
- };
-};
-
-
-struct AndReducer
-{
- static const bool PacketAccess = false;
- static const bool IsStateful = false;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
- *accum = *accum && t;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const {
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const {
- return accum;
- }
-};
-
-template <typename Device>
-struct reducer_traits<AndReducer, Device> {
- enum {
- Cost = 1,
- PacketAccess = false
- };
-};
-
-
-struct OrReducer {
- static const bool PacketAccess = false;
- static const bool IsStateful = false;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
- *accum = *accum || t;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const {
- return false;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const {
- return accum;
- }
-};
-
-template <typename Device>
-struct reducer_traits<OrReducer, Device> {
- enum {
- Cost = 1,
- PacketAccess = false
- };
-};
-
-
-// Argmin/Argmax reducers
-template <typename T> struct ArgMaxTupleReducer
-{
- static const bool PacketAccess = false;
- static const bool IsStateful = false;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
- if (t.second > accum->second) { *accum = t; }
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return T(0, NumTraits<typename T::second_type>::lowest());
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const {
- return accum;
- }
-};
-
-template <typename T, typename Device>
-struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
- enum {
- Cost = NumTraits<T>::AddCost,
- PacketAccess = false
- };
-};
-
-
-template <typename T> struct ArgMinTupleReducer
-{
- static const bool PacketAccess = false;
- static const bool IsStateful = false;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const {
- if (t.second < accum->second) { *accum = t; }
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return T(0, NumTraits<typename T::second_type>::highest());
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const {
- return accum;
- }
-};
-
-template <typename T, typename Device>
-struct reducer_traits<ArgMinTupleReducer<T>, Device> {
- enum {
- Cost = NumTraits<T>::AddCost,
- PacketAccess = false
- };
-};
-
-
-template <typename T, typename Index, size_t NumDims>
-class GaussianGenerator {
- public:
- static const bool PacketAccess = false;
-
- EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means,
- const array<T, NumDims>& std_devs)
- : m_means(means)
- {
- for (size_t i = 0; i < NumDims; ++i) {
- m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2;
- }
- }
-
- EIGEN_DEVICE_FUNC T operator()(const array<Index, NumDims>& coordinates) const {
- T tmp = T(0);
- for (size_t i = 0; i < NumDims; ++i) {
- T offset = coordinates[i] - m_means[i];
- tmp += offset * offset / m_two_sigmas[i];
- }
- return numext::exp(-tmp);
- }
-
- private:
- array<T, NumDims> m_means;
- array<T, NumDims> m_two_sigmas;
-};
-
-template <typename T, typename Index, size_t NumDims>
-struct functor_traits<GaussianGenerator<T, Index, NumDims> > {
- enum {
- Cost = NumDims * (2 * NumTraits<T>::AddCost + NumTraits<T>::MulCost +
- functor_traits<scalar_quotient_op<T, T> >::Cost) +
- functor_traits<scalar_exp_op<T> >::Cost,
- PacketAccess = GaussianGenerator<T, Index, NumDims>::PacketAccess
- };
-};
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
deleted file mode 100644
index e27753b..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
+++ /dev/null
@@ -1,185 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
-#define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
-
-namespace Eigen {
-
-/** \class TensorGeneratorOp
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor generator class.
- *
- *
- */
-namespace internal {
-template<typename Generator, typename XprType>
-struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename Generator, typename XprType>
-struct eval<TensorGeneratorOp<Generator, XprType>, Eigen::Dense>
-{
- typedef const TensorGeneratorOp<Generator, XprType>& type;
-};
-
-template<typename Generator, typename XprType>
-struct nested<TensorGeneratorOp<Generator, XprType>, 1, typename eval<TensorGeneratorOp<Generator, XprType> >::type>
-{
- typedef TensorGeneratorOp<Generator, XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename Generator, typename XprType>
-class TensorGeneratorOp : public TensorBase<TensorGeneratorOp<Generator, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorGeneratorOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorGeneratorOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorGeneratorOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorGeneratorOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType& expr, const Generator& generator)
- : m_xpr(expr), m_generator(generator) {}
-
- EIGEN_DEVICE_FUNC
- const Generator& generator() const { return m_generator; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
- const Generator m_generator;
-};
-
-
-// Eval as rvalue
-template<typename Generator, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
-{
- typedef TensorGeneratorOp<Generator, ArgType> XprType;
- typedef typename XprType::Index Index;
- typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
- static const int NumDims = internal::array_size<Dimensions>::value;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- enum {
- IsAligned = false,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
- BlockAccess = false,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_generator(op.generator())
- {
- TensorEvaluator<ArgType, Device> impl(op.expression(), device);
- m_dimensions = impl.dimensions();
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_strides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
- }
- } else {
- m_strides[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- array<Index, NumDims> coords;
- extract_coordinates(index, coords);
- return m_generator(coords);
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
- EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+packetSize-1 < dimensions().TotalSize());
-
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
- for (int i = 0; i < packetSize; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool) const {
- // TODO(rmlarsen): This is just a placeholder. Define interface to make
- // generators return their cost.
- return TensorOpCost(0, 0, TensorOpCost::AddCost<Scalar>() +
- TensorOpCost::MulCost<Scalar>());
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void extract_coordinates(Index index, array<Index, NumDims>& coords) const {
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_strides[i];
- index -= idx * m_strides[i];
- coords[i] = idx;
- }
- coords[0] = index;
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = index / m_strides[i];
- index -= idx * m_strides[i];
- coords[i] = idx;
- }
- coords[NumDims-1] = index;
- }
- }
-
- Dimensions m_dimensions;
- array<Index, NumDims> m_strides;
- Generator m_generator;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
deleted file mode 100644
index 665b861..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
-
-namespace Eigen {
-
-/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors.
- *
- * This function computes the regularized incomplete beta function (integral).
- *
- */
-template <typename ADerived, typename BDerived, typename XDerived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const
- TensorCwiseTernaryOp<internal::scalar_betainc_op<typename XDerived::Scalar>,
- const ADerived, const BDerived, const XDerived>
- betainc(const ADerived& a, const BDerived& b, const XDerived& x) {
- return TensorCwiseTernaryOp<
- internal::scalar_betainc_op<typename XDerived::Scalar>, const ADerived,
- const BDerived, const XDerived>(
- a, b, x, internal::scalar_betainc_op<typename XDerived::Scalar>());
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
deleted file mode 100644
index a901c5d..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H
-#define EIGEN_CXX11_TENSOR_TENSOR_IO_H
-
-namespace Eigen {
-
-namespace internal {
-
-// Print the tensor as a 2d matrix
-template <typename Tensor, int Rank>
-struct TensorPrinter {
- static void run (std::ostream& os, const Tensor& tensor) {
- typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
- typedef typename Tensor::Index Index;
- const Index total_size = internal::array_prod(tensor.dimensions());
- if (total_size > 0) {
- const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
- static const int layout = Tensor::Layout;
- Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim);
- os << matrix;
- }
- }
-};
-
-
-// Print the tensor as a vector
-template <typename Tensor>
-struct TensorPrinter<Tensor, 1> {
- static void run (std::ostream& os, const Tensor& tensor) {
- typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
- typedef typename Tensor::Index Index;
- const Index total_size = internal::array_prod(tensor.dimensions());
- if (total_size > 0) {
- Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size);
- os << array;
- }
- }
-};
-
-
-// Print the tensor as a scalar
-template <typename Tensor>
-struct TensorPrinter<Tensor, 0> {
- static void run (std::ostream& os, const Tensor& tensor) {
- os << tensor.coeff(0);
- }
-};
-}
-
-template <typename T>
-std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccessors>& expr) {
- typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
- typedef typename Evaluator::Dimensions Dimensions;
-
- // Evaluate the expression if needed
- TensorForcedEvalOp<const T> eval = expr.eval();
- Evaluator tensor(eval, DefaultDevice());
- tensor.evalSubExprsIfNeeded(NULL);
-
- // Print the result
- static const int rank = internal::array_size<Dimensions>::value;
- internal::TensorPrinter<Evaluator, rank>::run(os, tensor);
-
- // Cleanup.
- tensor.cleanup();
- return os;
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
deleted file mode 100644
index 566856e..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ /dev/null
@@ -1,509 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
-#define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
-
-namespace Eigen {
-
-/** \class TensorImagePatch
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Patch extraction specialized for image processing.
- * This assumes that the input has a least 3 dimensions ordered as follow:
- * 1st dimension: channels (of size d)
- * 2nd dimension: rows (of size r)
- * 3rd dimension: columns (of size c)
- * There can be additional dimensions such as time (for video) or batch (for
- * bulk processing after the first 3.
- * Calling the image patch code with patch_rows and patch_cols is equivalent
- * to calling the regular patch extraction code with parameters d, patch_rows,
- * patch_cols, and 1 for all the additional dimensions.
- */
-namespace internal {
-template<DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct traits<TensorImagePatchOp<Rows, Cols, XprType> > : public traits<XprType>
-{
- typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions + 1;
- static const int Layout = XprTraits::Layout;
-};
-
-template<DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct eval<TensorImagePatchOp<Rows, Cols, XprType>, Eigen::Dense>
-{
- typedef const TensorImagePatchOp<Rows, Cols, XprType>& type;
-};
-
-template<DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct nested<TensorImagePatchOp<Rows, Cols, XprType>, 1, typename eval<TensorImagePatchOp<Rows, Cols, XprType> >::type>
-{
- typedef TensorImagePatchOp<Rows, Cols, XprType> type;
-};
-
-} // end namespace internal
-
-template<DenseIndex Rows, DenseIndex Cols, typename XprType>
-class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorImagePatchOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorImagePatchOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorImagePatchOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorImagePatchOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols,
- DenseIndex row_strides, DenseIndex col_strides,
- DenseIndex in_row_strides, DenseIndex in_col_strides,
- DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
- PaddingType padding_type, Scalar padding_value)
- : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
- m_row_strides(row_strides), m_col_strides(col_strides),
- m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
- m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
- m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0),
- m_padding_type(padding_type), m_padding_value(padding_value) {}
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols,
- DenseIndex row_strides, DenseIndex col_strides,
- DenseIndex in_row_strides, DenseIndex in_col_strides,
- DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
- DenseIndex padding_top, DenseIndex padding_bottom,
- DenseIndex padding_left, DenseIndex padding_right,
- Scalar padding_value)
- : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
- m_row_strides(row_strides), m_col_strides(col_strides),
- m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
- m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
- m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
- m_padding_left(padding_left), m_padding_right(padding_right),
- m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
-
- EIGEN_DEVICE_FUNC
- DenseIndex patch_rows() const { return m_patch_rows; }
- EIGEN_DEVICE_FUNC
- DenseIndex patch_cols() const { return m_patch_cols; }
- EIGEN_DEVICE_FUNC
- DenseIndex row_strides() const { return m_row_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex col_strides() const { return m_col_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex in_row_strides() const { return m_in_row_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex in_col_strides() const { return m_in_col_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex row_inflate_strides() const { return m_row_inflate_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex col_inflate_strides() const { return m_col_inflate_strides; }
- EIGEN_DEVICE_FUNC
- bool padding_explicit() const { return m_padding_explicit; }
- EIGEN_DEVICE_FUNC
- DenseIndex padding_top() const { return m_padding_top; }
- EIGEN_DEVICE_FUNC
- DenseIndex padding_bottom() const { return m_padding_bottom; }
- EIGEN_DEVICE_FUNC
- DenseIndex padding_left() const { return m_padding_left; }
- EIGEN_DEVICE_FUNC
- DenseIndex padding_right() const { return m_padding_right; }
- EIGEN_DEVICE_FUNC
- PaddingType padding_type() const { return m_padding_type; }
- EIGEN_DEVICE_FUNC
- Scalar padding_value() const { return m_padding_value; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
- const DenseIndex m_patch_rows;
- const DenseIndex m_patch_cols;
- const DenseIndex m_row_strides;
- const DenseIndex m_col_strides;
- const DenseIndex m_in_row_strides;
- const DenseIndex m_in_col_strides;
- const DenseIndex m_row_inflate_strides;
- const DenseIndex m_col_inflate_strides;
- const bool m_padding_explicit;
- const DenseIndex m_padding_top;
- const DenseIndex m_padding_bottom;
- const DenseIndex m_padding_left;
- const DenseIndex m_padding_right;
- const PaddingType m_padding_type;
- const Scalar m_padding_value;
-};
-
-// Eval as rvalue
-template<DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
-{
- typedef TensorImagePatchOp<Rows, Cols, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- static const int NumDims = NumInputDims + 1;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
- typedef TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>,
- Device> Self;
- typedef TensorEvaluator<ArgType, Device> Impl;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device)
- {
- EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- m_paddingValue = op.padding_value();
-
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-
- // Caches a few variables.
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_inputDepth = input_dims[0];
- m_inputRows = input_dims[1];
- m_inputCols = input_dims[2];
- } else {
- m_inputDepth = input_dims[NumInputDims-1];
- m_inputRows = input_dims[NumInputDims-2];
- m_inputCols = input_dims[NumInputDims-3];
- }
-
- m_row_strides = op.row_strides();
- m_col_strides = op.col_strides();
-
- // Input strides and effective input/patch size
- m_in_row_strides = op.in_row_strides();
- m_in_col_strides = op.in_col_strides();
- m_row_inflate_strides = op.row_inflate_strides();
- m_col_inflate_strides = op.col_inflate_strides();
- // The "effective" input rows and input cols are the input rows and cols
- // after inflating them with zeros.
- // For examples, a 2x3 matrix with row_inflate_strides and
- // col_inflate_strides of 2 comes from:
- // A B C
- // D E F
- //
- // to a matrix is 3 x 5:
- //
- // A . B . C
- // . . . . .
- // D . E . F
-
- m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1;
- m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1;
- m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1);
- m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1);
-
- if (op.padding_explicit()) {
- m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
- m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
- m_rowPaddingTop = op.padding_top();
- m_colPaddingLeft = op.padding_left();
- } else {
- // Computing padding from the type
- switch (op.padding_type()) {
- case PADDING_VALID:
- m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
- m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
- // Calculate the padding
- m_rowPaddingTop = numext::maxi<Index>(0, ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2);
- m_colPaddingLeft = numext::maxi<Index>(0, ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2);
- break;
- case PADDING_SAME:
- m_outputRows = numext::ceil(m_input_rows_eff / static_cast<float>(m_row_strides));
- m_outputCols = numext::ceil(m_input_cols_eff / static_cast<float>(m_col_strides));
- // Calculate the padding
- m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2;
- m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2;
- break;
- default:
- eigen_assert(false && "unexpected padding");
- }
- }
- eigen_assert(m_outputRows > 0);
- eigen_assert(m_outputCols > 0);
-
- // Dimensions for result of extraction.
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- // ColMajor
- // 0: depth
- // 1: patch_rows
- // 2: patch_cols
- // 3: number of patches
- // 4 and beyond: anything else (such as batch).
- m_dimensions[0] = input_dims[0];
- m_dimensions[1] = op.patch_rows();
- m_dimensions[2] = op.patch_cols();
- m_dimensions[3] = m_outputRows * m_outputCols;
- for (int i = 4; i < NumDims; ++i) {
- m_dimensions[i] = input_dims[i-1];
- }
- } else {
- // RowMajor
- // NumDims-1: depth
- // NumDims-2: patch_rows
- // NumDims-3: patch_cols
- // NumDims-4: number of patches
- // NumDims-5 and beyond: anything else (such as batch).
- m_dimensions[NumDims-1] = input_dims[NumInputDims-1];
- m_dimensions[NumDims-2] = op.patch_rows();
- m_dimensions[NumDims-3] = op.patch_cols();
- m_dimensions[NumDims-4] = m_outputRows * m_outputCols;
- for (int i = NumDims-5; i >= 0; --i) {
- m_dimensions[i] = input_dims[i];
- }
- }
-
- // Strides for moving the patch in various dimensions.
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_colStride = m_dimensions[1];
- m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0];
- m_otherStride = m_patchStride * m_dimensions[3];
- } else {
- m_colStride = m_dimensions[NumDims-2];
- m_patchStride = m_colStride * m_dimensions[NumDims-3] * m_dimensions[NumDims-1];
- m_otherStride = m_patchStride * m_dimensions[NumDims-4];
- }
-
- // Strides for navigating through the input tensor.
- m_rowInputStride = m_inputDepth;
- m_colInputStride = m_inputDepth * m_inputRows;
- m_patchInputStride = m_inputDepth * m_inputRows * m_inputCols;
-
- // Fast representations of different variables.
- m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride);
- m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride);
- m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride);
- m_fastInflateRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides);
- m_fastInflateColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides);
- m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff);
-
- // Number of patches in the width dimension.
- m_fastOutputRows = internal::TensorIntDivisor<Index>(m_outputRows);
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]);
- } else {
- m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- // Patch index corresponding to the passed in index.
- const Index patchIndex = index / m_fastPatchStride;
- // Find the offset of the element wrt the location of the first element.
- const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth;
-
- // Other ways to index this element.
- const Index otherIndex = (NumDims == 4) ? 0 : index / m_fastOtherStride;
- const Index patch2DIndex = (NumDims == 4) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride;
-
- // Calculate col index in the input original tensor.
- const Index colIndex = patch2DIndex / m_fastOutputRows;
- const Index colOffset = patchOffset / m_fastColStride;
- const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft;
- const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInflateColStride) : 0);
- if (inputCol < 0 || inputCol >= m_input_cols_eff ||
- ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) {
- return Scalar(m_paddingValue);
- }
-
- // Calculate row index in the original input tensor.
- const Index rowIndex = patch2DIndex - colIndex * m_outputRows;
- const Index rowOffset = patchOffset - colOffset * m_colStride;
- const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop;
- const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInflateRowStride) : 0);
- if (inputRow < 0 || inputRow >= m_input_rows_eff ||
- ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
- return Scalar(m_paddingValue);
- }
-
- const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
- const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
-
- const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex * m_patchInputStride;
- return m_impl.coeff(inputIndex);
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) {
- return packetWithPossibleZero(index);
- }
-
- const Index indices[2] = {index, index + PacketSize - 1};
- const Index patchIndex = indices[0] / m_fastPatchStride;
- if (patchIndex != indices[1] / m_fastPatchStride) {
- return packetWithPossibleZero(index);
- }
- const Index otherIndex = (NumDims == 4) ? 0 : indices[0] / m_fastOtherStride;
- eigen_assert(otherIndex == indices[1] / m_fastOtherStride);
-
- // Find the offset of the element wrt the location of the first element.
- const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth,
- (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth};
-
- const Index patch2DIndex = (NumDims == 4) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride;
- eigen_assert(patch2DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride);
-
- const Index colIndex = patch2DIndex / m_fastOutputRows;
- const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride};
-
- // Calculate col indices in the original input tensor.
- const Index inputCols[2] = {colIndex * m_col_strides + colOffsets[0] -
- m_colPaddingLeft, colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft};
- if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) {
- return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
- }
-
- if (inputCols[0] == inputCols[1]) {
- const Index rowIndex = patch2DIndex - colIndex * m_outputRows;
- const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride};
- eigen_assert(rowOffsets[0] <= rowOffsets[1]);
- // Calculate col indices in the original input tensor.
- const Index inputRows[2] = {rowIndex * m_row_strides + rowOffsets[0] -
- m_rowPaddingTop, rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop};
-
- if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) {
- return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
- }
-
- if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) {
- // no padding
- const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
- const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
- const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex * m_patchInputStride;
- return m_impl.template packet<Unaligned>(inputIndex);
- }
- }
-
- return packetWithPossibleZero(index);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
-
- Index rowPaddingTop() const { return m_rowPaddingTop; }
- Index colPaddingLeft() const { return m_colPaddingLeft; }
- Index outputRows() const { return m_outputRows; }
- Index outputCols() const { return m_outputCols; }
- Index userRowStride() const { return m_row_strides; }
- Index userColStride() const { return m_col_strides; }
- Index userInRowStride() const { return m_in_row_strides; }
- Index userInColStride() const { return m_in_col_strides; }
- Index rowInflateStride() const { return m_row_inflate_strides; }
- Index colInflateStride() const { return m_col_inflate_strides; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- // We conservatively estimate the cost for the code path where the computed
- // index is inside the original image and
- // TensorEvaluator<ArgType, Device>::CoordAccess is false.
- const double compute_cost = 3 * TensorOpCost::DivCost<Index>() +
- 6 * TensorOpCost::MulCost<Index>() +
- 8 * TensorOpCost::MulCost<Index>();
- return m_impl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
- }
-
- protected:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
- {
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
-
- Dimensions m_dimensions;
-
- Index m_otherStride;
- Index m_patchStride;
- Index m_colStride;
- Index m_row_strides;
- Index m_col_strides;
-
- Index m_in_row_strides;
- Index m_in_col_strides;
- Index m_row_inflate_strides;
- Index m_col_inflate_strides;
-
- Index m_input_rows_eff;
- Index m_input_cols_eff;
- Index m_patch_rows_eff;
- Index m_patch_cols_eff;
-
- internal::TensorIntDivisor<Index> m_fastOtherStride;
- internal::TensorIntDivisor<Index> m_fastPatchStride;
- internal::TensorIntDivisor<Index> m_fastColStride;
- internal::TensorIntDivisor<Index> m_fastInflateRowStride;
- internal::TensorIntDivisor<Index> m_fastInflateColStride;
- internal::TensorIntDivisor<Index> m_fastInputColsEff;
-
- Index m_rowInputStride;
- Index m_colInputStride;
- Index m_patchInputStride;
-
- Index m_inputDepth;
- Index m_inputRows;
- Index m_inputCols;
-
- Index m_outputRows;
- Index m_outputCols;
-
- Index m_rowPaddingTop;
- Index m_colPaddingLeft;
-
- internal::TensorIntDivisor<Index> m_fastOutputRows;
- internal::TensorIntDivisor<Index> m_fastOutputDepth;
-
- Scalar m_paddingValue;
-
- TensorEvaluator<ArgType, Device> m_impl;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
deleted file mode 100644
index 3209fec..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h
+++ /dev/null
@@ -1,725 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
-#define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
-
-
-#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES
-
-#define EIGEN_HAS_INDEX_LIST
-
-namespace Eigen {
-
-/** \internal
- *
- * \class TensorIndexList
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Set of classes used to encode a set of Tensor dimensions/indices.
- *
- * The indices in the list can be known at compile time or at runtime. A mix
- * of static and dynamic indices can also be provided if needed. The tensor
- * code will attempt to take advantage of the indices that are known at
- * compile time to optimize the code it generates.
- *
- * This functionality requires a c++11 compliant compiler. If your compiler
- * is older you need to use arrays of indices instead.
- *
- * Several examples are provided in the cxx11_tensor_index_list.cpp file.
- *
- * \sa Tensor
- */
-
-template <DenseIndex n>
-struct type2index {
- static const DenseIndex value = n;
- EIGEN_DEVICE_FUNC constexpr operator DenseIndex() const { return n; }
- EIGEN_DEVICE_FUNC void set(DenseIndex val) {
- eigen_assert(val == n);
- }
-};
-
-// This can be used with IndexPairList to get compile-time constant pairs,
-// such as IndexPairList<type2indexpair<1,2>, type2indexpair<3,4>>().
-template <DenseIndex f, DenseIndex s>
-struct type2indexpair {
- static const DenseIndex first = f;
- static const DenseIndex second = s;
-
- constexpr EIGEN_DEVICE_FUNC operator IndexPair<DenseIndex>() const {
- return IndexPair<DenseIndex>(f, s);
- }
-
- EIGEN_DEVICE_FUNC void set(const IndexPair<DenseIndex>& val) {
- eigen_assert(val.first == f);
- eigen_assert(val.second == s);
- }
-};
-
-
-template<DenseIndex n> struct NumTraits<type2index<n> >
-{
- typedef DenseIndex Real;
- enum {
- IsComplex = 0,
- RequireInitialization = false,
- ReadCost = 1,
- AddCost = 1,
- MulCost = 1
- };
-
- EIGEN_DEVICE_FUNC static inline Real epsilon() { return 0; }
- EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return 0; }
- EIGEN_DEVICE_FUNC static inline Real highest() { return n; }
- EIGEN_DEVICE_FUNC static inline Real lowest() { return n; }
-};
-
-namespace internal {
-template <typename T>
-EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) {
- val = new_val;
-}
-template <DenseIndex n>
-EIGEN_DEVICE_FUNC void update_value(type2index<n>& val, DenseIndex new_val) {
- val.set(new_val);
-}
-
-template <typename T>
-EIGEN_DEVICE_FUNC void update_value(T& val, IndexPair<DenseIndex> new_val) {
- val = new_val;
-}
-template <DenseIndex f, DenseIndex s>
-EIGEN_DEVICE_FUNC void update_value(type2indexpair<f, s>& val, IndexPair<DenseIndex> new_val) {
- val.set(new_val);
-}
-
-
-template <typename T>
-struct is_compile_time_constant {
- static constexpr bool value = false;
-};
-
-template <DenseIndex idx>
-struct is_compile_time_constant<type2index<idx> > {
- static constexpr bool value = true;
-};
-template <DenseIndex idx>
-struct is_compile_time_constant<const type2index<idx> > {
- static constexpr bool value = true;
-};
-template <DenseIndex idx>
-struct is_compile_time_constant<type2index<idx>& > {
- static constexpr bool value = true;
-};
-template <DenseIndex idx>
-struct is_compile_time_constant<const type2index<idx>& > {
- static constexpr bool value = true;
-};
-
-template <DenseIndex f, DenseIndex s>
-struct is_compile_time_constant<type2indexpair<f, s> > {
- static constexpr bool value = true;
-};
-template <DenseIndex f, DenseIndex s>
-struct is_compile_time_constant<const type2indexpair<f, s> > {
- static constexpr bool value = true;
-};
-template <DenseIndex f, DenseIndex s>
-struct is_compile_time_constant<type2indexpair<f, s>& > {
- static constexpr bool value = true;
-};
-template <DenseIndex f, DenseIndex s>
-struct is_compile_time_constant<const type2indexpair<f, s>& > {
- static constexpr bool value = true;
-};
-
-
-template<typename... T>
-struct IndexTuple;
-
-template<typename T, typename... O>
-struct IndexTuple<T, O...> {
- EIGEN_DEVICE_FUNC constexpr IndexTuple() : head(), others() { }
- EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v, const O... o) : head(v), others(o...) { }
-
- constexpr static int count = 1 + sizeof...(O);
- T head;
- IndexTuple<O...> others;
- typedef T Head;
- typedef IndexTuple<O...> Other;
-};
-
-template<typename T>
- struct IndexTuple<T> {
- EIGEN_DEVICE_FUNC constexpr IndexTuple() : head() { }
- EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v) : head(v) { }
-
- constexpr static int count = 1;
- T head;
- typedef T Head;
-};
-
-
-template<int N, typename... T>
-struct IndexTupleExtractor;
-
-template<int N, typename T, typename... O>
-struct IndexTupleExtractor<N, T, O...> {
-
- typedef typename IndexTupleExtractor<N-1, O...>::ValType ValType;
-
- EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple<T, O...>& val) {
- return IndexTupleExtractor<N-1, O...>::get_val(val.others);
- }
-
- EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple<T, O...>& val) {
- return IndexTupleExtractor<N-1, O...>::get_val(val.others);
- }
- template <typename V>
- EIGEN_DEVICE_FUNC static void set_val(IndexTuple<T, O...>& val, V& new_val) {
- IndexTupleExtractor<N-1, O...>::set_val(val.others, new_val);
- }
-
-};
-
-template<typename T, typename... O>
- struct IndexTupleExtractor<0, T, O...> {
-
- typedef T ValType;
-
- EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple<T, O...>& val) {
- return val.head;
- }
- EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple<T, O...>& val) {
- return val.head;
- }
- template <typename V>
- EIGEN_DEVICE_FUNC static void set_val(IndexTuple<T, O...>& val, V& new_val) {
- val.head = new_val;
- }
-};
-
-
-
-template <int N, typename T, typename... O>
-EIGEN_DEVICE_FUNC constexpr typename IndexTupleExtractor<N, T, O...>::ValType& array_get(IndexTuple<T, O...>& tuple) {
- return IndexTupleExtractor<N, T, O...>::get_val(tuple);
-}
-template <int N, typename T, typename... O>
-EIGEN_DEVICE_FUNC constexpr const typename IndexTupleExtractor<N, T, O...>::ValType& array_get(const IndexTuple<T, O...>& tuple) {
- return IndexTupleExtractor<N, T, O...>::get_val(tuple);
-}
-template <typename T, typename... O>
- struct array_size<IndexTuple<T, O...> > {
- static const size_t value = IndexTuple<T, O...>::count;
-};
-template <typename T, typename... O>
- struct array_size<const IndexTuple<T, O...> > {
- static const size_t value = IndexTuple<T, O...>::count;
-};
-
-
-
-
-template <DenseIndex Idx, typename ValueT>
-struct tuple_coeff {
- template <typename... T>
- EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex i, const IndexTuple<T...>& t) {
- // return array_get<Idx>(t) * (i == Idx) + tuple_coeff<Idx-1>::get(i, t) * (i != Idx);
- return (i == Idx ? array_get<Idx>(t) : tuple_coeff<Idx-1, ValueT>::get(i, t));
- }
- template <typename... T>
- EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple<T...>& t, const ValueT& value) {
- if (i == Idx) {
- update_value(array_get<Idx>(t), value);
- } else {
- tuple_coeff<Idx-1, ValueT>::set(i, t, value);
- }
- }
-
- template <typename... T>
- EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple<T...>& t) {
- return ((i == Idx) & is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value) ||
- tuple_coeff<Idx-1, ValueT>::value_known_statically(i, t);
- }
-
- template <typename... T>
- EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple<T...>& t) {
- return is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value &&
- tuple_coeff<Idx-1, ValueT>::values_up_to_known_statically(t);
- }
-
- template <typename... T>
- EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple<T...>& t) {
- return is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value &&
- is_compile_time_constant<typename IndexTupleExtractor<Idx, T...>::ValType>::value &&
- array_get<Idx>(t) > array_get<Idx-1>(t) &&
- tuple_coeff<Idx-1, ValueT>::values_up_to_statically_known_to_increase(t);
- }
-};
-
-template <typename ValueT>
-struct tuple_coeff<0, ValueT> {
- template <typename... T>
- EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex /*i*/, const IndexTuple<T...>& t) {
- // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr
- return array_get<0>(t)/* * (i == 0)*/;
- }
- template <typename... T>
- EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple<T...>& t, const ValueT value) {
- eigen_assert (i == 0);
- update_value(array_get<0>(t), value);
- }
- template <typename... T>
- EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple<T...>&) {
- return is_compile_time_constant<typename IndexTupleExtractor<0, T...>::ValType>::value & (i == 0);
- }
-
- template <typename... T>
- EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple<T...>&) {
- return is_compile_time_constant<typename IndexTupleExtractor<0, T...>::ValType>::value;
- }
-
- template <typename... T>
- EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple<T...>&) {
- return true;
- }
-};
-} // namespace internal
-
-
-
-template<typename FirstType, typename... OtherTypes>
-struct IndexList : internal::IndexTuple<FirstType, OtherTypes...> {
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex operator[] (const DenseIndex i) const {
- return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::get(i, *this);
- }
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex get(const DenseIndex i) const {
- return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::get(i, *this);
- }
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const DenseIndex value) {
- return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::set(i, *this, value);
- }
-
- EIGEN_DEVICE_FUNC constexpr IndexList(const internal::IndexTuple<FirstType, OtherTypes...>& other) : internal::IndexTuple<FirstType, OtherTypes...>(other) { }
- EIGEN_DEVICE_FUNC constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple<FirstType, OtherTypes...>(first, other...) { }
- EIGEN_DEVICE_FUNC constexpr IndexList() : internal::IndexTuple<FirstType, OtherTypes...>() { }
-
- EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const {
- return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::value_known_statically(i, *this);
- }
- EIGEN_DEVICE_FUNC constexpr bool all_values_known_statically() const {
- return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::values_up_to_known_statically(*this);
- }
-
- EIGEN_DEVICE_FUNC constexpr bool values_statically_known_to_increase() const {
- return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::values_up_to_statically_known_to_increase(*this);
- }
-};
-
-
-template<typename FirstType, typename... OtherTypes>
-constexpr IndexList<FirstType, OtherTypes...> make_index_list(FirstType val1, OtherTypes... other_vals) {
- return IndexList<FirstType, OtherTypes...>(val1, other_vals...);
-}
-
-
-template<typename FirstType, typename... OtherTypes>
-struct IndexPairList : internal::IndexTuple<FirstType, OtherTypes...> {
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr IndexPair<DenseIndex> operator[] (const DenseIndex i) const {
- return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, IndexPair<DenseIndex>>::get(i, *this);
- }
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const IndexPair<DenseIndex> value) {
- return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...>>::value-1, IndexPair<DenseIndex> >::set(i, *this, value);
- }
-
- EIGEN_DEVICE_FUNC constexpr IndexPairList(const internal::IndexTuple<FirstType, OtherTypes...>& other) : internal::IndexTuple<FirstType, OtherTypes...>(other) { }
- EIGEN_DEVICE_FUNC constexpr IndexPairList() : internal::IndexTuple<FirstType, OtherTypes...>() { }
-
- EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const {
- return internal::tuple_coeff<internal::array_size<internal::IndexTuple<FirstType, OtherTypes...> >::value-1, DenseIndex>::value_known_statically(i, *this);
- }
-};
-
-namespace internal {
-
-template<typename FirstType, typename... OtherTypes> size_t array_prod(const IndexList<FirstType, OtherTypes...>& sizes) {
- size_t result = 1;
- for (int i = 0; i < array_size<IndexList<FirstType, OtherTypes...> >::value; ++i) {
- result *= sizes[i];
- }
- return result;
-}
-
-template<typename FirstType, typename... OtherTypes> struct array_size<IndexList<FirstType, OtherTypes...> > {
- static const size_t value = array_size<IndexTuple<FirstType, OtherTypes...> >::value;
-};
-template<typename FirstType, typename... OtherTypes> struct array_size<const IndexList<FirstType, OtherTypes...> > {
- static const size_t value = array_size<IndexTuple<FirstType, OtherTypes...> >::value;
-};
-
-template<typename FirstType, typename... OtherTypes> struct array_size<IndexPairList<FirstType, OtherTypes...> > {
- static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value;
-};
-template<typename FirstType, typename... OtherTypes> struct array_size<const IndexPairList<FirstType, OtherTypes...> > {
- static const size_t value = std::tuple_size<std::tuple<FirstType, OtherTypes...> >::value;
-};
-
-template<DenseIndex N, typename FirstType, typename... OtherTypes> EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(IndexList<FirstType, OtherTypes...>& a) {
- return IndexTupleExtractor<N, FirstType, OtherTypes...>::get_val(a);
-}
-template<DenseIndex N, typename FirstType, typename... OtherTypes> EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(const IndexList<FirstType, OtherTypes...>& a) {
- return IndexTupleExtractor<N, FirstType, OtherTypes...>::get_val(a);
-}
-
-template <typename T>
-struct index_known_statically_impl {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) {
- return false;
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_known_statically_impl<IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i) {
- return IndexList<FirstType, OtherTypes...>().value_known_statically(i);
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_known_statically_impl<const IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i) {
- return IndexList<FirstType, OtherTypes...>().value_known_statically(i);
- }
-};
-
-
-template <typename T>
-struct all_indices_known_statically_impl {
- static constexpr bool run() {
- return false;
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct all_indices_known_statically_impl<IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run() {
- return IndexList<FirstType, OtherTypes...>().all_values_known_statically();
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct all_indices_known_statically_impl<const IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run() {
- return IndexList<FirstType, OtherTypes...>().all_values_known_statically();
- }
-};
-
-
-template <typename T>
-struct indices_statically_known_to_increase_impl {
- EIGEN_DEVICE_FUNC static constexpr bool run() {
- return false;
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
- struct indices_statically_known_to_increase_impl<IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run() {
- return Eigen::IndexList<FirstType, OtherTypes...>().values_statically_known_to_increase();
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
- struct indices_statically_known_to_increase_impl<const IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run() {
- return Eigen::IndexList<FirstType, OtherTypes...>().values_statically_known_to_increase();
- }
-};
-
-
-template <typename Tx>
-struct index_statically_eq_impl {
- EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_eq_impl<IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexList<FirstType, OtherTypes...>().get(i) == value);
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_eq_impl<const IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexList<FirstType, OtherTypes...>().get(i) == value);
- }
-};
-
-
-template <typename T>
-struct index_statically_ne_impl {
- EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_ne_impl<IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexList<FirstType, OtherTypes...>().get(i) != value);
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_ne_impl<const IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexList<FirstType, OtherTypes...>().get(i) != value);
- }
-};
-
-
-template <typename T>
-struct index_statically_gt_impl {
- EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_gt_impl<IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexList<FirstType, OtherTypes...>().get(i) > value);
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_gt_impl<const IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexList<FirstType, OtherTypes...>().get(i) > value);
- }
-};
-
-
-
-template <typename T>
-struct index_statically_lt_impl {
- EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_lt_impl<IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexList<FirstType, OtherTypes...>().get(i) < value);
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_statically_lt_impl<const IndexList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexList<FirstType, OtherTypes...>().get(i) < value);
- }
-};
-
-
-
-template <typename Tx>
-struct index_pair_first_statically_eq_impl {
- EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_pair_first_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value);
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_pair_first_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexPairList<FirstType, OtherTypes...>().operator[](i).first == value);
- }
-};
-
-
-
-template <typename Tx>
-struct index_pair_second_statically_eq_impl {
- EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_pair_second_statically_eq_impl<IndexPairList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value);
- }
-};
-
-template <typename FirstType, typename... OtherTypes>
-struct index_pair_second_statically_eq_impl<const IndexPairList<FirstType, OtherTypes...> > {
- EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) {
- return IndexPairList<FirstType, OtherTypes...>().value_known_statically(i) &
- (IndexPairList<FirstType, OtherTypes...>().operator[](i).second == value);
- }
-};
-
-
-} // end namespace internal
-} // end namespace Eigen
-
-#else
-
-namespace Eigen {
-namespace internal {
-
-template <typename T>
-struct index_known_statically_impl {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex) {
- return false;
- }
-};
-
-template <typename T>
-struct all_indices_known_statically_impl {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
- return false;
- }
-};
-
-template <typename T>
-struct indices_statically_known_to_increase_impl {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() {
- return false;
- }
-};
-
-template <typename T>
-struct index_statically_eq_impl {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename T>
-struct index_statically_ne_impl {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename T>
-struct index_statically_gt_impl {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename T>
-struct index_statically_lt_impl {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename Tx>
-struct index_pair_first_statically_eq_impl {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-template <typename Tx>
-struct index_pair_second_statically_eq_impl {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) {
- return false;
- }
-};
-
-
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif
-
-
-namespace Eigen {
-namespace internal {
-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_known_statically(DenseIndex i) {
- return index_known_statically_impl<T>::run(i);
-}
-
-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool all_indices_known_statically() {
- return all_indices_known_statically_impl<T>::run();
-}
-
-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool indices_statically_known_to_increase() {
- return indices_statically_known_to_increase_impl<T>::run();
-}
-
-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_eq(DenseIndex i, DenseIndex value) {
- return index_statically_eq_impl<T>::run(i, value);
-}
-
-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_ne(DenseIndex i, DenseIndex value) {
- return index_statically_ne_impl<T>::run(i, value);
-}
-
-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_gt(DenseIndex i, DenseIndex value) {
- return index_statically_gt_impl<T>::run(i, value);
-}
-
-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_lt(DenseIndex i, DenseIndex value) {
- return index_statically_lt_impl<T>::run(i, value);
-}
-
-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_first_statically_eq(DenseIndex i, DenseIndex value) {
- return index_pair_first_statically_eq_impl<T>::run(i, value);
-}
-
-template <typename T>
-static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_second_statically_eq(DenseIndex i, DenseIndex value) {
- return index_pair_second_statically_eq_impl<T>::run(i, value);
-}
-
-} // end namespace internal
-} // end namespace Eigen
-
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
deleted file mode 100644
index f391fb9..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
+++ /dev/null
@@ -1,229 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Ke Yang <yangke@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
-
-namespace Eigen {
-
-/** \class TensorInflation
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor inflation class.
- *
- *
- */
-namespace internal {
-template<typename Strides, typename XprType>
-struct traits<TensorInflationOp<Strides, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename Strides, typename XprType>
-struct eval<TensorInflationOp<Strides, XprType>, Eigen::Dense>
-{
- typedef const TensorInflationOp<Strides, XprType>& type;
-};
-
-template<typename Strides, typename XprType>
-struct nested<TensorInflationOp<Strides, XprType>, 1, typename eval<TensorInflationOp<Strides, XprType> >::type>
-{
- typedef TensorInflationOp<Strides, XprType> type;
-};
-
-} // end namespace internal
-
-template<typename Strides, typename XprType>
-class TensorInflationOp : public TensorBase<TensorInflationOp<Strides, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorInflationOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorInflationOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorInflationOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorInflationOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorInflationOp(const XprType& expr, const Strides& strides)
- : m_xpr(expr), m_strides(strides) {}
-
- EIGEN_DEVICE_FUNC
- const Strides& strides() const { return m_strides; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
- const Strides m_strides;
-};
-
-// Eval as rvalue
-template<typename Strides, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
-{
- typedef TensorInflationOp<Strides, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- BlockAccess = false,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_strides(op.strides())
- {
- m_dimensions = m_impl.dimensions();
- // Expand each dimension to the inflated dimension.
- for (int i = 0; i < NumDims; ++i) {
- m_dimensions[i] = (m_dimensions[i] - 1) * op.strides()[i] + 1;
- }
-
- // Remember the strides for fast division.
- for (int i = 0; i < NumDims; ++i) {
- m_fastStrides[i] = internal::TensorIntDivisor<Index>(m_strides[i]);
- }
-
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_outputStrides[0] = 1;
- m_inputStrides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
- m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
- }
- } else { // RowMajor
- m_outputStrides[NumDims-1] = 1;
- m_inputStrides[NumDims-1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
- m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- // Computes the input index given the output index. Returns true if the output
- // index doesn't fall into a hole.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool getInputIndex(Index index, Index* inputIndex) const
- {
- eigen_assert(index < dimensions().TotalSize());
- *inputIndex = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_outputStrides[i];
- if (idx != idx / m_fastStrides[i] * m_strides[i]) {
- return false;
- }
- *inputIndex += idx / m_strides[i] * m_inputStrides[i];
- index -= idx * m_outputStrides[i];
- }
- if (index != index / m_fastStrides[0] * m_strides[0]) {
- return false;
- }
- *inputIndex += index / m_strides[0];
- return true;
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = index / m_outputStrides[i];
- if (idx != idx / m_fastStrides[i] * m_strides[i]) {
- return false;
- }
- *inputIndex += idx / m_strides[i] * m_inputStrides[i];
- index -= idx * m_outputStrides[i];
- }
- if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) {
- return false;
- }
- *inputIndex += index / m_strides[NumDims - 1];
- }
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- Index inputIndex = 0;
- if (getInputIndex(index, &inputIndex)) {
- return m_impl.coeff(inputIndex);
- } else {
- return Scalar(0);
- }
- }
-
- // TODO(yangke): optimize this function so that we can detect and produce
- // all-zero packets
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- const double compute_cost = NumDims * (3 * TensorOpCost::DivCost<Index>() +
- 3 * TensorOpCost::MulCost<Index>() +
- 2 * TensorOpCost::AddCost<Index>());
- const double input_size = m_impl.dimensions().TotalSize();
- const double output_size = m_dimensions.TotalSize();
- if (output_size == 0)
- return TensorOpCost();
- return m_impl.costPerCoeff(vectorized) +
- TensorOpCost(sizeof(CoeffReturnType) * input_size / output_size, 0,
- compute_cost, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
- Dimensions m_dimensions;
- array<Index, NumDims> m_outputStrides;
- array<Index, NumDims> m_inputStrides;
- TensorEvaluator<ArgType, Device> m_impl;
- const Strides m_strides;
- array<internal::TensorIntDivisor<Index>, NumDims> m_fastStrides;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h
deleted file mode 100644
index 33edc49..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
-#define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
-
-#include <initializer_list>
-
-namespace Eigen {
-
-/** \class TensorInitializer
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Helper template to initialize Tensors from std::initializer_lists.
- */
-namespace internal {
-
-template <typename Derived, int N>
-struct Initializer {
- typedef std::initializer_list<
- typename Initializer<Derived, N - 1>::InitList> InitList;
-
- static void run(TensorEvaluator<Derived, DefaultDevice>& tensor,
- Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices,
- const InitList& vals) {
- int i = 0;
- for (auto v : vals) {
- (*indices)[traits<Derived>::NumDimensions - N] = i++;
- Initializer<Derived, N - 1>::run(tensor, indices, v);
- }
- }
-};
-
-template <typename Derived>
-struct Initializer<Derived, 1> {
- typedef std::initializer_list<typename traits<Derived>::Scalar> InitList;
-
- static void run(TensorEvaluator<Derived, DefaultDevice>& tensor,
- Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>* indices,
- const InitList& vals) {
- int i = 0;
- // There is likely a faster way to do that than iterating.
- for (auto v : vals) {
- (*indices)[traits<Derived>::NumDimensions - 1] = i++;
- tensor.coeffRef(*indices) = v;
- }
- }
-};
-
-template <typename Derived>
-struct Initializer<Derived, 0> {
- typedef typename traits<Derived>::Scalar InitList;
-
- static void run(TensorEvaluator<Derived, DefaultDevice>& tensor,
- Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions>*,
- const InitList& v) {
- tensor.coeffRef(0) = v;
- }
-};
-
-
-template <typename Derived, int N>
-void initialize_tensor(TensorEvaluator<Derived, DefaultDevice>& tensor,
- const typename Initializer<Derived, traits<Derived>::NumDimensions>::InitList& vals) {
- Eigen::array<typename traits<Derived>::Index, traits<Derived>::NumDimensions> indices;
- Initializer<Derived, traits<Derived>::NumDimensions>::run(tensor, &indices, vals);
-}
-
-} // namespace internal
-} // namespace Eigen
-
-#endif // EIGEN_HAS_VARIADIC_TEMPLATES
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
deleted file mode 100644
index ede3939..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ /dev/null
@@ -1,253 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
-#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
-
-
-namespace Eigen {
-
-/** \internal
- *
- * \class TensorIntDiv
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Fast integer division by a constant.
- *
- * See the paper from Granlund and Montgomery for explanation.
- * (at http://dx.doi.org/10.1145/773473.178249)
- *
- * \sa Tensor
- */
-
-namespace internal {
-
-namespace {
-
- // Note: result is undefined if val == 0
- template <typename T>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val)
- {
-#ifdef __CUDA_ARCH__
- return __clz(val);
-#elif EIGEN_COMP_MSVC
- unsigned long index;
- _BitScanReverse(&index, val);
- return 31 - index;
-#else
- EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return __builtin_clz(static_cast<uint32_t>(val));
-#endif
- }
-
- template <typename T>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val)
- {
-#ifdef __CUDA_ARCH__
- return __clzll(val);
-#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64
- unsigned long index;
- _BitScanReverse64(&index, val);
- return 63 - index;
-#elif EIGEN_COMP_MSVC
- // MSVC's _BitScanReverse64 is not available for 32bits builds.
- unsigned int lo = (unsigned int)(val&0xffffffff);
- unsigned int hi = (unsigned int)((val>>32)&0xffffffff);
- int n;
- if(hi==0)
- n = 32 + count_leading_zeros<unsigned int>(lo);
- else
- n = count_leading_zeros<unsigned int>(hi);
- return n;
-#else
- EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return __builtin_clzll(static_cast<uint64_t>(val));
-#endif
- }
-
- template <typename T>
- struct UnsignedTraits {
- typedef typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type type;
- };
-
- template <typename T>
- struct DividerTraits {
- typedef typename UnsignedTraits<T>::type type;
- static const int N = sizeof(T) * 8;
- };
-
- template <typename T>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) {
-#if defined(__CUDA_ARCH__)
- return __umulhi(a, b);
-#else
- return (static_cast<uint64_t>(a) * b) >> 32;
-#endif
- }
-
- template <typename T>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
-#if defined(__CUDA_ARCH__)
- return __umul64hi(a, b);
-#elif defined(__SIZEOF_INT128__)
- __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
- return static_cast<uint64_t>(v >> 64);
-#else
- return (TensorUInt128<static_val<0>, uint64_t>(a) * TensorUInt128<static_val<0>, uint64_t>(b)).upper();
-#endif
- }
-
- template <int N, typename T>
- struct DividerHelper {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) {
- EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return static_cast<uint32_t>((static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1);
- }
- };
-
- template <typename T>
- struct DividerHelper<64, T> {
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
-#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__)
- return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
-#else
- const uint64_t shift = 1ULL << log_div;
- TensorUInt128<uint64_t, uint64_t> result = TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider)
- - TensorUInt128<static_val<1>, static_val<0> >(1, 0)
- + TensorUInt128<static_val<0>, static_val<1> >(1);
- return static_cast<uint64_t>(result);
-#endif
- }
- };
-}
-
-
-template <typename T, bool div_gt_one = false>
-struct TensorIntDivisor {
- public:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
- multiplier = 0;
- shift1 = 0;
- shift2 = 0;
- }
-
- // Must have 0 < divider < 2^31. This is relaxed to
- // 0 < divider < 2^63 when using 64-bit indices on platforms that support
- // the __uint128_t type.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) {
- const int N = DividerTraits<T>::N;
- eigen_assert(static_cast<typename UnsignedTraits<T>::type>(divider) < NumTraits<UnsignedType>::highest()/2);
- eigen_assert(divider > 0);
-
- // fast ln2
- const int leading_zeros = count_leading_zeros(static_cast<UnsignedType>(divider));
- int log_div = N - leading_zeros;
- // if divider is a power of two then log_div is 1 more than it should be.
- if ((static_cast<typename UnsignedTraits<T>::type>(1) << (log_div-1)) == static_cast<typename UnsignedTraits<T>::type>(divider))
- log_div--;
-
- multiplier = DividerHelper<N, T>::computeMultiplier(log_div, divider);
- shift1 = log_div > 1 ? 1 : log_div;
- shift2 = log_div > 1 ? log_div-1 : 0;
- }
-
- // Must have 0 <= numerator. On platforms that dont support the __uint128_t
- // type numerator should also be less than 2^32-1.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const {
- eigen_assert(static_cast<typename UnsignedTraits<T>::type>(numerator) < NumTraits<UnsignedType>::highest()/2);
- //eigen_assert(numerator >= 0); // this is implicitly asserted by the line above
-
- UnsignedType t1 = muluh(multiplier, numerator);
- UnsignedType t = (static_cast<UnsignedType>(numerator) - t1) >> shift1;
- return (t1 + t) >> shift2;
- }
-
- private:
- typedef typename DividerTraits<T>::type UnsignedType;
- UnsignedType multiplier;
- int32_t shift1;
- int32_t shift2;
-};
-
-
-// Optimized version for signed 32 bit integers.
-// Derived from Hacker's Delight.
-// Only works for divisors strictly greater than one
-template <>
-class TensorIntDivisor<int32_t, true> {
- public:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
- magic = 0;
- shift = 0;
- }
- // Must have 2 <= divider
- EIGEN_DEVICE_FUNC TensorIntDivisor(int32_t divider) {
- eigen_assert(divider >= 2);
- calcMagic(divider);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const {
-#ifdef __CUDA_ARCH__
- return (__umulhi(magic, n) >> shift);
-#else
- uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n);
- return (static_cast<uint32_t>(v >> 32) >> shift);
-#endif
- }
-
-private:
- // Compute the magic numbers. See Hacker's Delight section 10 for an in
- // depth explanation.
- EIGEN_DEVICE_FUNC void calcMagic(int32_t d) {
- const unsigned two31 = 0x80000000; // 2**31.
- unsigned ad = d;
- unsigned t = two31 + (ad >> 31);
- unsigned anc = t - 1 - t%ad; // Absolute value of nc.
- int p = 31; // Init. p.
- unsigned q1 = two31/anc; // Init. q1 = 2**p/|nc|.
- unsigned r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|).
- unsigned q2 = two31/ad; // Init. q2 = 2**p/|d|.
- unsigned r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|).
- unsigned delta = 0;
- do {
- p = p + 1;
- q1 = 2*q1; // Update q1 = 2**p/|nc|.
- r1 = 2*r1; // Update r1 = rem(2**p, |nc|).
- if (r1 >= anc) { // (Must be an unsigned
- q1 = q1 + 1; // comparison here).
- r1 = r1 - anc;}
- q2 = 2*q2; // Update q2 = 2**p/|d|.
- r2 = 2*r2; // Update r2 = rem(2**p, |d|).
- if (r2 >= ad) { // (Must be an unsigned
- q2 = q2 + 1; // comparison here).
- r2 = r2 - ad;}
- delta = ad - r2;
- } while (q1 < delta || (q1 == delta && r1 == 0));
-
- magic = (unsigned)(q2 + 1);
- shift = p - 32;
- }
-
- uint32_t magic;
- int32_t shift;
-};
-
-
-template <typename T, bool div_gt_one>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
- return divisor.divide(numerator);
-}
-
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
deleted file mode 100644
index cd0109e..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h
+++ /dev/null
@@ -1,209 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
-#define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
-
-namespace Eigen {
-
-/** \class TensorLayoutSwap
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Swap the layout from col-major to row-major, or row-major
- * to col-major, and invert the order of the dimensions.
- *
- * Beware: the dimensions are reversed by this operation. If you want to
- * preserve the ordering of the dimensions, you need to combine this
- * operation with a shuffle.
- *
- * \example:
- * Tensor<float, 2, ColMajor> input(2, 4);
- * Tensor<float, 2, RowMajor> output = input.swap_layout();
- * eigen_assert(output.dimension(0) == 4);
- * eigen_assert(output.dimension(1) == 2);
- *
- * array<int, 2> shuffle(1, 0);
- * output = input.swap_layout().shuffle(shuffle);
- * eigen_assert(output.dimension(0) == 2);
- * eigen_assert(output.dimension(1) == 4);
- *
- */
-namespace internal {
-template<typename XprType>
-struct traits<TensorLayoutSwapOp<XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = traits<XprType>::NumDimensions;
- static const int Layout = (traits<XprType>::Layout == ColMajor) ? RowMajor : ColMajor;
-};
-
-template<typename XprType>
-struct eval<TensorLayoutSwapOp<XprType>, Eigen::Dense>
-{
- typedef const TensorLayoutSwapOp<XprType>& type;
-};
-
-template<typename XprType>
-struct nested<TensorLayoutSwapOp<XprType>, 1, typename eval<TensorLayoutSwapOp<XprType> >::type>
-{
- typedef TensorLayoutSwapOp<XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename XprType>
-class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorLayoutSwapOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorLayoutSwapOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorLayoutSwapOp(const XprType& expr)
- : m_xpr(expr) {}
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const TensorLayoutSwapOp& other)
- {
- typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- protected:
- typename XprType::Nested m_xpr;
-};
-
-
-// Eval as rvalue
-template<typename ArgType, typename Device>
-struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
-{
- typedef TensorLayoutSwapOp<ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
-
- enum {
- IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
- CoordAccess = false, // to be implemented
- RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device)
- {
- for(int i = 0; i < NumDims; ++i) {
- m_dimensions[i] = m_impl.dimensions()[NumDims-1-i];
- }
- }
-
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
- return m_impl.evalSubExprsIfNeeded(data);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return m_impl.coeff(index);
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- return m_impl.template packet<LoadMode>(index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- return m_impl.costPerCoeff(vectorized);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return m_impl.data(); }
-
- const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
-
- protected:
- TensorEvaluator<ArgType, Device> m_impl;
- Dimensions m_dimensions;
-};
-
-
-// Eval as lvalue
-template<typename ArgType, typename Device>
- struct TensorEvaluator<TensorLayoutSwapOp<ArgType>, Device>
- : public TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
-{
- typedef TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device> Base;
- typedef TensorLayoutSwapOp<ArgType> XprType;
-
- enum {
- IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
- CoordAccess = false // to be implemented
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : Base(op, device)
- { }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
- {
- return this->m_impl.coeffRef(index);
- }
- template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void writePacket(Index index, const PacketReturnType& x)
- {
- this->m_impl.template writePacket<StoreMode>(index, x);
- }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h
deleted file mode 100644
index ee0078b..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H
-
-
-/** use this macro in sfinae selection in templated functions
- *
- * template<typename T,
- * typename std::enable_if< isBanana<T>::value , int >::type = 0
- * >
- * void foo(){}
- *
- * becomes =>
- *
- * template<typename TopoType,
- * SFINAE_ENABLE_IF( isBanana<T>::value )
- * >
- * void foo(){}
- */
-
-// SFINAE requires variadic templates
-#ifndef __CUDACC__
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- // SFINAE doesn't work for gcc <= 4.7
- #ifdef EIGEN_COMP_GNUC
- #if EIGEN_GNUC_AT_LEAST(4,8)
- #define EIGEN_HAS_SFINAE
- #endif
- #else
- #define EIGEN_HAS_SFINAE
- #endif
-#endif
-#endif
-
-#define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \
- typename internal::enable_if< ( __condition__ ) , int >::type = 0
-
-
-#if EIGEN_HAS_CONSTEXPR
-#define EIGEN_CONSTEXPR constexpr
-#else
-#define EIGEN_CONSTEXPR
-#endif
-
-
-#endif
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
deleted file mode 100644
index e4fc86a..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h
+++ /dev/null
@@ -1,323 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H
-#define EIGEN_CXX11_TENSOR_TENSOR_MAP_H
-
-namespace Eigen {
-
-// FIXME use proper doxygen documentation (e.g. \tparam MakePointer_)
-
-/** \class TensorMap
- * \ingroup CXX11_Tensor_Module
- *
- * \brief A tensor expression mapping an existing array of data.
- *
- */
-/// `template <class> class MakePointer_` is added to convert the host pointer to the device pointer.
-/// It is added due to the fact that for our device compiler `T*` is not allowed.
-/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer `T`.
-/// This is done through our `MakePointer_` class. By default the Type in the `MakePointer_<T>` is `T*` .
-/// Therefore, by adding the default value, we managed to convert the type and it does not break any
-/// existing code as its default value is `T*`.
-template<typename PlainObjectType, int Options_, template <class> class MakePointer_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_, MakePointer_> >
-{
- public:
- typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self;
- typedef typename PlainObjectType::Base Base;
- typedef typename Eigen::internal::nested<Self>::type Nested;
- typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
- typedef typename internal::traits<PlainObjectType>::Index Index;
- typedef typename internal::traits<PlainObjectType>::Scalar Scalar;
- typedef typename NumTraits<Scalar>::Real RealScalar;
- typedef typename Base::CoeffReturnType CoeffReturnType;
-
- /* typedef typename internal::conditional<
- bool(internal::is_lvalue<PlainObjectType>::value),
- Scalar *,
- const Scalar *>::type
- PointerType;*/
- typedef typename MakePointer_<Scalar>::Type PointerType;
- typedef PointerType PointerArgType;
-
- static const int Options = Options_;
-
- static const Index NumIndices = PlainObjectType::NumIndices;
- typedef typename PlainObjectType::Dimensions Dimensions;
-
- enum {
- IsAligned = ((int(Options_)&Aligned)==Aligned),
- Layout = PlainObjectType::Layout,
- CoordAccess = true,
- RawAccess = true
- };
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr), m_dimensions() {
- // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) {
- // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
-#else
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) {
- // The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
- EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) {
- EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) {
- EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) {
- EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) {
- EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE)
- }
-#endif
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const array<Index, NumIndices>& dimensions)
- : m_data(dataPtr), m_dimensions(dimensions)
- { }
-
- template <typename Dimensions>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions)
- : m_data(dataPtr), m_dimensions(dimensions)
- { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PlainObjectType& tensor)
- : m_data(tensor.data()), m_dimensions(tensor.dimensions())
- { }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Index rank() const { return m_dimensions.rank(); }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_dimensions[n]; }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE PointerType data() { return m_data; }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const PointerType data() const { return m_data; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
- {
- // eigen_assert(checkIndexRange(indices));
- if (PlainObjectType::Options&RowMajor) {
- const Index index = m_dimensions.IndexOfRowMajor(indices);
- return m_data[index];
- } else {
- const Index index = m_dimensions.IndexOfColMajor(indices);
- return m_data[index];
- }
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()() const
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return m_data[0];
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const
- {
- eigen_internal_assert(index >= 0 && index < size());
- return m_data[index];
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
- {
- EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
- if (PlainObjectType::Options&RowMajor) {
- const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
- return m_data[index];
- } else {
- const Index index = m_dimensions.IndexOfColMajor(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
- return m_data[index];
- }
- }
-#else
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
- {
- if (PlainObjectType::Options&RowMajor) {
- const Index index = i1 + i0 * m_dimensions[1];
- return m_data[index];
- } else {
- const Index index = i0 + i1 * m_dimensions[0];
- return m_data[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
- {
- if (PlainObjectType::Options&RowMajor) {
- const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0);
- return m_data[index];
- } else {
- const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
- return m_data[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
- {
- if (PlainObjectType::Options&RowMajor) {
- const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
- return m_data[index];
- } else {
- const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
- return m_data[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
- {
- if (PlainObjectType::Options&RowMajor) {
- const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
- return m_data[index];
- } else {
- const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
- return m_data[index];
- }
- }
-#endif
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, NumIndices>& indices)
- {
- // eigen_assert(checkIndexRange(indices));
- if (PlainObjectType::Options&RowMajor) {
- const Index index = m_dimensions.IndexOfRowMajor(indices);
- return m_data[index];
- } else {
- const Index index = m_dimensions.IndexOfColMajor(indices);
- return m_data[index];
- }
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()()
- {
- EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE)
- return m_data[0];
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index index)
- {
- eigen_internal_assert(index >= 0 && index < size());
- return m_data[index];
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
- {
- static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
- const std::size_t NumDims = sizeof...(otherIndices) + 2;
- if (PlainObjectType::Options&RowMajor) {
- const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumDims>{{firstIndex, secondIndex, otherIndices...}});
- return m_data[index];
- } else {
- const Index index = m_dimensions.IndexOfColMajor(array<Index, NumDims>{{firstIndex, secondIndex, otherIndices...}});
- return m_data[index];
- }
- }
-#else
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
- {
- if (PlainObjectType::Options&RowMajor) {
- const Index index = i1 + i0 * m_dimensions[1];
- return m_data[index];
- } else {
- const Index index = i0 + i1 * m_dimensions[0];
- return m_data[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
- {
- if (PlainObjectType::Options&RowMajor) {
- const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0);
- return m_data[index];
- } else {
- const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
- return m_data[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
- {
- if (PlainObjectType::Options&RowMajor) {
- const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
- return m_data[index];
- } else {
- const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
- return m_data[index];
- }
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
- {
- if (PlainObjectType::Options&RowMajor) {
- const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
- return m_data[index];
- } else {
- const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
- return m_data[index];
- }
- }
-#endif
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Self& operator=(const Self& other)
- {
- typedef TensorAssignOp<Self, const Self> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Self& operator=(const OtherDerived& other)
- {
- typedef TensorAssignOp<Self, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- private:
- typename MakePointer_<Scalar>::Type m_data;
- Dimensions m_dimensions;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_MAP_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
deleted file mode 100644
index 615559d..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ /dev/null
@@ -1,218 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H
-#define EIGEN_CXX11_TENSOR_TENSOR_META_H
-
-namespace Eigen {
-
-template<bool cond> struct Cond {};
-
-template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-const T1& choose(Cond<true>, const T1& first, const T2&) {
- return first;
-}
-
-template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-const T2& choose(Cond<false>, const T1&, const T2& second) {
- return second;
-}
-
-
-template <typename T, typename X, typename Y>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-T divup(const X x, const Y y) {
- return static_cast<T>((x + y - 1) / y);
-}
-
-template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-T divup(const T x, const T y) {
- return static_cast<T>((x + y - 1) / y);
-}
-
-template <size_t n> struct max_n_1 {
- static const size_t size = n;
-};
-template <> struct max_n_1<0> {
- static const size_t size = 1;
-};
-
-
-// Default packet types
-template <typename Scalar, typename Device>
-struct PacketType : internal::packet_traits<Scalar> {
- typedef typename internal::packet_traits<Scalar>::type type;
-};
-
-// For CUDA packet types when using a GpuDevice
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16)
-template <>
-struct PacketType<half, GpuDevice> {
- typedef half2 type;
- static const int size = 2;
- enum {
- HasAdd = 1,
- HasSub = 1,
- HasMul = 1,
- HasNegate = 1,
- HasAbs = 1,
- HasArg = 0,
- HasAbs2 = 0,
- HasMin = 1,
- HasMax = 1,
- HasConj = 0,
- HasSetLinear = 0,
- HasBlend = 0,
-
- HasDiv = 1,
- HasSqrt = 1,
- HasRsqrt = 1,
- HasExp = 1,
- HasLog = 1,
- HasLog1p = 0,
- HasLog10 = 0,
- HasPow = 1,
- };
-};
-#endif
-
-#if defined(EIGEN_USE_SYCL)
-template <typename T>
- struct PacketType<T, SyclDevice> {
- typedef T type;
- static const int size = 1;
- enum {
- HasAdd = 0,
- HasSub = 0,
- HasMul = 0,
- HasNegate = 0,
- HasAbs = 0,
- HasArg = 0,
- HasAbs2 = 0,
- HasMin = 0,
- HasMax = 0,
- HasConj = 0,
- HasSetLinear = 0,
- HasBlend = 0
- };
-};
-#endif
-
-
-// Tuple mimics std::pair but works on e.g. nvcc.
-template <typename U, typename V> struct Tuple {
- public:
- U first;
- V second;
-
- typedef U first_type;
- typedef V second_type;
-
- EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Tuple() : first(), second() {}
-
- EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Tuple(const U& f, const V& s) : first(f), second(s) {}
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Tuple& operator= (const Tuple& rhs) {
- if (&rhs == this) return *this;
- first = rhs.first;
- second = rhs.second;
- return *this;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void swap(Tuple& rhs) {
- using numext::swap;
- swap(first, rhs.first);
- swap(second, rhs.second);
- }
-};
-
-template <typename U, typename V>
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-bool operator==(const Tuple<U, V>& x, const Tuple<U, V>& y) {
- return (x.first == y.first && x.second == y.second);
-}
-
-template <typename U, typename V>
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-bool operator!=(const Tuple<U, V>& x, const Tuple<U, V>& y) {
- return !(x == y);
-}
-
-
-// Can't use std::pairs on cuda devices
-template <typename Idx> struct IndexPair {
- EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) {}
- EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Idx f, Idx s) : first(f), second(s) {}
-
- EIGEN_DEVICE_FUNC void set(IndexPair<Idx> val) {
- first = val.first;
- second = val.second;
- }
-
- Idx first;
- Idx second;
-};
-
-
-#ifdef EIGEN_HAS_SFINAE
-namespace internal {
-
- template<typename IndexType, Index... Is>
- EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- array<Index, sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, Is...>) {
- return { idx[Is]... };
- }
- template<typename IndexType>
- EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- array<Index, 0> customIndices2Array(IndexType&, numeric_list<Index>) {
- return array<Index, 0>();
- }
-
- /** Make an array (for index/dimensions) out of a custom index */
- template<typename Index, std::size_t NumIndices, typename IndexType>
- EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- array<Index, NumIndices> customIndices2Array(IndexType& idx) {
- return customIndices2Array(idx, typename gen_numeric_list<Index, NumIndices>::type{});
- }
-
-
- template <typename B, typename D>
- struct is_base_of
- {
-
- typedef char (&yes)[1];
- typedef char (&no)[2];
-
- template <typename BB, typename DD>
- struct Host
- {
- operator BB*() const;
- operator DD*();
- };
-
- template<typename T>
- static yes check(D*, T);
- static no check(B*, int);
-
- static const bool value = sizeof(check(Host<B,D>(), int())) == sizeof(yes);
- };
-
-}
-#endif
-
-
-
-} // namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_META_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
deleted file mode 100644
index d34f1e3..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ /dev/null
@@ -1,888 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
-
-namespace Eigen {
-
-/** \class TensorReshaping
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor reshaping class.
- *
- *
- */
-namespace internal {
-template<typename NewDimensions, typename XprType>
-struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = array_size<NewDimensions>::value;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename NewDimensions, typename XprType>
-struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
-{
- typedef const TensorReshapingOp<NewDimensions, XprType>& type;
-};
-
-template<typename NewDimensions, typename XprType>
-struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
-{
- typedef TensorReshapingOp<NewDimensions, XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename NewDimensions, typename XprType>
-class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
- typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
- : m_xpr(expr), m_dims(dims) {}
-
- EIGEN_DEVICE_FUNC
- const NewDimensions& dimensions() const { return m_dims; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other)
- {
- typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- protected:
- typename XprType::Nested m_xpr;
- const NewDimensions m_dims;
-};
-
-
-// Eval as rvalue
-template<typename NewDimensions, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
-{
- typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
- typedef NewDimensions Dimensions;
-
- enum {
- IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_dimensions(op.dimensions())
- {
- // The total size of the reshaped tensor must be equal to the total size
- // of the input tensor.
- eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
- }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
- return m_impl.evalSubExprsIfNeeded(data);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return m_impl.coeff(index);
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- return m_impl.template packet<LoadMode>(index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- return m_impl.costPerCoeff(vectorized);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
-
- EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
-
- protected:
- TensorEvaluator<ArgType, Device> m_impl;
- NewDimensions m_dimensions;
-};
-
-
-// Eval as lvalue
-template<typename NewDimensions, typename ArgType, typename Device>
- struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
- : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
-
-{
- typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
- typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
- typedef NewDimensions Dimensions;
-
- enum {
- IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : Base(op, device)
- { }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
- {
- return this->m_impl.coeffRef(index);
- }
- template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void writePacket(Index index, const PacketReturnType& x)
- {
- this->m_impl.template writePacket<StoreMode>(index, x);
- }
-};
-
-
-/** \class TensorSlicing
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor slicing class.
- *
- *
- */
-namespace internal {
-template<typename StartIndices, typename Sizes, typename XprType>
-struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = array_size<StartIndices>::value;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename StartIndices, typename Sizes, typename XprType>
-struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
-{
- typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
-};
-
-template<typename StartIndices, typename Sizes, typename XprType>
-struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
-{
- typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename StartIndices, typename Sizes, typename XprType>
-class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
-{
- public:
- typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
- : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
-
- EIGEN_DEVICE_FUNC
- const StartIndices& startIndices() const { return m_indices; }
- EIGEN_DEVICE_FUNC
- const Sizes& sizes() const { return m_sizes; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other)
- {
- typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
-
- protected:
- typename XprType::Nested m_xpr;
- const StartIndices m_indices;
- const Sizes m_sizes;
-};
-
-
-// Fixme: figure out the exact threshold
-namespace {
-template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
- EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
- EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }
-
- private:
- Index threshold_;
-};
-
-// It is very expensive to start the memcpy kernel on GPU: we therefore only
-// use it for large copies.
-#ifdef EIGEN_USE_GPU
-template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice> {
- EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
- EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
-};
-#endif
-}
-
-// Eval as rvalue
-template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
-{
- typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
- static const int NumDims = internal::array_size<Sizes>::value;
-
- enum {
- // Alignment can't be guaranteed at compile time since it depends on the
- // slice offsets and sizes.
- IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
- {
- for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
- eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
- }
-
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
- const Sizes& output_dims = op.sizes();
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_inputStrides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
- }
-
- // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
- m_outputStrides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
- m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
- }
- } else {
- m_inputStrides[NumDims-1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
- }
-
- // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
- m_outputStrides[NumDims-1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
- m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
- }
- }
- }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef Sizes Dimensions;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
- m_impl.evalSubExprsIfNeeded(NULL);
- if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) {
- Index contiguous_values = 1;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = 0; i < NumDims; ++i) {
- contiguous_values *= dimensions()[i];
- if (dimensions()[i] != m_impl.dimensions()[i]) {
- break;
- }
- }
- } else {
- for (int i = NumDims-1; i >= 0; --i) {
- contiguous_values *= dimensions()[i];
- if (dimensions()[i] != m_impl.dimensions()[i]) {
- break;
- }
- }
- }
- // Use memcpy if it's going to be faster than using the regular evaluation.
- const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
- if (trigger(contiguous_values)) {
- Scalar* src = (Scalar*)m_impl.data();
- for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
- Index offset = srcCoeff(i);
- m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
- }
- return false;
- }
- }
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return m_impl.coeff(srcCoeff(index));
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
- EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
-
- Index inputIndices[] = {0, 0};
- Index indices[] = {index, index + packetSize - 1};
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx0 = indices[0] / m_fastOutputStrides[i];
- const Index idx1 = indices[1] / m_fastOutputStrides[i];
- inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
- inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
- indices[0] -= idx0 * m_outputStrides[i];
- indices[1] -= idx1 * m_outputStrides[i];
- }
- inputIndices[0] += (indices[0] + m_offsets[0]);
- inputIndices[1] += (indices[1] + m_offsets[0]);
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx0 = indices[0] / m_fastOutputStrides[i];
- const Index idx1 = indices[1] / m_fastOutputStrides[i];
- inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
- inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
- indices[0] -= idx0 * m_outputStrides[i];
- indices[1] -= idx1 * m_outputStrides[i];
- }
- inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
- inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
- }
- if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
- PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
- return rslt;
- }
- else {
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
- values[0] = m_impl.coeff(inputIndices[0]);
- values[packetSize-1] = m_impl.coeff(inputIndices[1]);
- for (int i = 1; i < packetSize-1; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
- }
-
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
- Scalar* result = m_impl.data();
- if (result) {
- Index offset = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = 0; i < NumDims; ++i) {
- if (m_dimensions[i] != m_impl.dimensions()[i]) {
- offset += m_offsets[i] * m_inputStrides[i];
- for (int j = i+1; j < NumDims; ++j) {
- if (m_dimensions[j] > 1) {
- return NULL;
- }
- offset += m_offsets[j] * m_inputStrides[j];
- }
- break;
- }
- }
- } else {
- for (int i = NumDims - 1; i >= 0; --i) {
- if (m_dimensions[i] != m_impl.dimensions()[i]) {
- offset += m_offsets[i] * m_inputStrides[i];
- for (int j = i-1; j >= 0; --j) {
- if (m_dimensions[j] > 1) {
- return NULL;
- }
- offset += m_offsets[j] * m_inputStrides[j];
- }
- break;
- }
- }
- }
- return result + offset;
- }
- return NULL;
- }
-
- protected:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
- {
- Index inputIndex = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_fastOutputStrides[i];
- inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
- index -= idx * m_outputStrides[i];
- }
- inputIndex += (index + m_offsets[0]);
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = index / m_fastOutputStrides[i];
- inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
- index -= idx * m_outputStrides[i];
- }
- inputIndex += (index + m_offsets[NumDims-1]);
- }
- return inputIndex;
- }
-
- array<Index, NumDims> m_outputStrides;
- array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
- array<Index, NumDims> m_inputStrides;
- TensorEvaluator<ArgType, Device> m_impl;
- const Device& m_device;
- Dimensions m_dimensions;
- const StartIndices m_offsets;
-};
-
-
-// Eval as lvalue
-template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
-struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
- : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
-{
- typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
- typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
- static const int NumDims = internal::array_size<Sizes>::value;
-
- enum {
- IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : Base(op, device)
- { }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef Sizes Dimensions;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
- {
- return this->m_impl.coeffRef(this->srcCoeff(index));
- }
-
- template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void writePacket(Index index, const PacketReturnType& x)
- {
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
- Index inputIndices[] = {0, 0};
- Index indices[] = {index, index + packetSize - 1};
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
- const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
- inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
- inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
- indices[0] -= idx0 * this->m_outputStrides[i];
- indices[1] -= idx1 * this->m_outputStrides[i];
- }
- inputIndices[0] += (indices[0] + this->m_offsets[0]);
- inputIndices[1] += (indices[1] + this->m_offsets[0]);
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
- const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
- inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
- inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
- indices[0] -= idx0 * this->m_outputStrides[i];
- indices[1] -= idx1 * this->m_outputStrides[i];
- }
- inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
- inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
- }
- if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
- this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
- }
- else {
- EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
- internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
- this->m_impl.coeffRef(inputIndices[0]) = values[0];
- this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
- for (int i = 1; i < packetSize-1; ++i) {
- this->coeffRef(index+i) = values[i];
- }
- }
- }
-};
-
-
-
-namespace internal {
-template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
-struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = array_size<StartIndices>::value;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
-struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
-{
- typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type;
-};
-
-template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
-struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
-{
- typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
-};
-
-} // end namespace internal
-
-
-template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
-class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
-{
- public:
- typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
- typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
- typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
- const XprType& expr, const StartIndices& startIndices,
- const StopIndices& stopIndices, const Strides& strides)
- : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
- m_strides(strides) {}
-
- EIGEN_DEVICE_FUNC
- const StartIndices& startIndices() const { return m_startIndices; }
- EIGEN_DEVICE_FUNC
- const StartIndices& stopIndices() const { return m_stopIndices; }
- EIGEN_DEVICE_FUNC
- const StartIndices& strides() const { return m_strides; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other)
- {
- typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(
- assign, DefaultDevice());
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(
- assign, DefaultDevice());
- return *this;
- }
-
- protected:
- typename XprType::Nested m_xpr;
- const StartIndices m_startIndices;
- const StopIndices m_stopIndices;
- const Strides m_strides;
-};
-
-// Eval as rvalue
-template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
-{
- typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
- static const int NumDims = internal::array_size<Strides>::value;
-
- enum {
- // Alignment can't be guaranteed at compile time since it depends on the
- // slice offsets and sizes.
- IsAligned = false,
- PacketAccess = false,
- BlockAccess = false,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_device(device), m_strides(op.strides())
- {
- // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
- DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
- for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
- eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
- if(m_strides[i]>0){
- startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
- stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
- }else{
- /* implies m_strides[i]<0 by assert */
- startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
- stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
- }
- m_startIndices[i] = startIndicesClamped[i];
- }
-
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-
- // check for degenerate intervals and compute output tensor shape
- bool degenerate = false;;
- for(int i = 0; i < NumDims; i++){
- Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
- if(interval == 0 || ((interval<0) != (m_strides[i]<0))){
- m_dimensions[i] = 0;
- degenerate = true;
- }else{
- m_dimensions[i] = interval / m_strides[i]
- + (interval % m_strides[i] != 0 ? 1 : 0);
- eigen_assert(m_dimensions[i] >= 0);
- }
- }
- Strides output_dims = m_dimensions;
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_inputStrides[0] = m_strides[0];
- m_offsets[0] = startIndicesClamped[0];
- Index previousDimProduct = 1;
- for (int i = 1; i < NumDims; ++i) {
- previousDimProduct *= input_dims[i-1];
- m_inputStrides[i] = previousDimProduct * m_strides[i];
- m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
- }
-
- // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
- m_outputStrides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
- // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
- m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
- }
- } else {
- m_inputStrides[NumDims-1] = m_strides[NumDims-1];
- m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
- Index previousDimProduct = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- previousDimProduct *= input_dims[i+1];
- m_inputStrides[i] = previousDimProduct * m_strides[i];
- m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
- }
-
- m_outputStrides[NumDims-1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
- // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
- m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
- }
- }
- m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
- device.lastLevelCacheSize() /
- sizeof(Scalar));
- }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef Strides Dimensions;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return m_impl.coeff(srcCoeff(index));
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
- return NULL;
- }
-
- protected:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
- {
- Index inputIndex = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i >= 0; --i) {
- const Index idx = index / m_fastOutputStrides[i];
- inputIndex += idx * m_inputStrides[i] + m_offsets[i];
- index -= idx * m_outputStrides[i];
- }
- } else {
- for (int i = 0; i < NumDims; ++i) {
- const Index idx = index / m_fastOutputStrides[i];
- inputIndex += idx * m_inputStrides[i] + m_offsets[i];
- index -= idx * m_outputStrides[i];
- }
- }
- return inputIndex;
- }
-
- static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
- return numext::maxi(min, numext::mini(max,value));
- }
-
- array<Index, NumDims> m_outputStrides;
- array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
- array<Index, NumDims> m_inputStrides;
- TensorEvaluator<ArgType, Device> m_impl;
- const Device& m_device;
- DSizes<Index, NumDims> m_startIndices; // clamped startIndices
- DSizes<Index, NumDims> m_dimensions;
- DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
- const Strides m_strides;
- std::size_t m_block_total_size_max;
-};
-
-// Eval as lvalue
-template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
-struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
- : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
-{
- typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
- typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
- static const int NumDims = internal::array_size<Strides>::value;
-
- enum {
- IsAligned = false,
- PacketAccess = false,
- BlockAccess = false,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : Base(op, device)
- { }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef Strides Dimensions;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
- {
- return this->m_impl.coeffRef(this->srcCoeff(index));
- }
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
deleted file mode 100644
index 647bcf1..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ /dev/null
@@ -1,397 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
-
-namespace Eigen {
-
-/** \class TensorPadding
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor padding class.
- * At the moment only padding with a constant value is supported.
- *
- */
-namespace internal {
-template<typename PaddingDimensions, typename XprType>
-struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename PaddingDimensions, typename XprType>
-struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense>
-{
- typedef const TensorPaddingOp<PaddingDimensions, XprType>& type;
-};
-
-template<typename PaddingDimensions, typename XprType>
-struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type>
-{
- typedef TensorPaddingOp<PaddingDimensions, XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename PaddingDimensions, typename XprType>
-class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value)
- : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
-
- EIGEN_DEVICE_FUNC
- const PaddingDimensions& padding() const { return m_padding_dims; }
- EIGEN_DEVICE_FUNC
- Scalar padding_value() const { return m_padding_value; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
- const PaddingDimensions m_padding_dims;
- const Scalar m_padding_value;
-};
-
-
-// Eval as rvalue
-template<typename PaddingDimensions, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device>
-{
- typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<PaddingDimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = true,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = true,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value())
- {
- // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead
- // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector
- // of 1 element first and then pad.
- EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- // Compute dimensions
- m_dimensions = m_impl.dimensions();
- for (int i = 0; i < NumDims; ++i) {
- m_dimensions[i] += m_padding[i].first + m_padding[i].second;
- }
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_inputStrides[0] = 1;
- m_outputStrides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
- m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
- }
- m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
- } else {
- m_inputStrides[NumDims - 1] = 1;
- m_outputStrides[NumDims] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
- m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1];
- }
- m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- eigen_assert(index < dimensions().TotalSize());
- Index inputIndex = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_outputStrides[i];
- if (isPaddingAtIndexForDim(idx, i)) {
- return m_paddingValue;
- }
- inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
- index -= idx * m_outputStrides[i];
- }
- if (isPaddingAtIndexForDim(index, 0)) {
- return m_paddingValue;
- }
- inputIndex += (index - m_padding[0].first);
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = index / m_outputStrides[i+1];
- if (isPaddingAtIndexForDim(idx, i)) {
- return m_paddingValue;
- }
- inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
- index -= idx * m_outputStrides[i+1];
- }
- if (isPaddingAtIndexForDim(index, NumDims-1)) {
- return m_paddingValue;
- }
- inputIndex += (index - m_padding[NumDims-1].first);
- }
- return m_impl.coeff(inputIndex);
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- return packetColMajor(index);
- }
- return packetRowMajor(index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- TensorOpCost cost = m_impl.costPerCoeff(vectorized);
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = 0; i < NumDims; ++i)
- updateCostPerDimension(cost, i, i == 0);
- } else {
- for (int i = NumDims - 1; i >= 0; --i)
- updateCostPerDimension(cost, i, i == NumDims - 1);
- }
- return cost;
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- private:
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
- Index index, int dim_index) const {
-#if defined(EIGEN_HAS_INDEX_LIST)
- return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
- index < m_padding[dim_index].first) ||
- (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
- index >= m_dimensions[dim_index] - m_padding[dim_index].second);
-#else
- return (index < m_padding[dim_index].first) ||
- (index >= m_dimensions[dim_index] - m_padding[dim_index].second);
-#endif
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
- int dim_index) const {
-#if defined(EIGEN_HAS_INDEX_LIST)
- return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
-#else
- EIGEN_UNUSED_VARIABLE(dim_index);
- return false;
-#endif
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
- int dim_index) const {
-#if defined(EIGEN_HAS_INDEX_LIST)
- return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
-#else
- EIGEN_UNUSED_VARIABLE(dim_index);
- return false;
-#endif
- }
-
-
- void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
- const double in = static_cast<double>(m_impl.dimensions()[i]);
- const double out = in + m_padding[i].first + m_padding[i].second;
- if (out == 0)
- return;
- const double reduction = in / out;
- cost *= reduction;
- if (first) {
- cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
- reduction * (1 * TensorOpCost::AddCost<Index>()));
- } else {
- cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
- 2 * TensorOpCost::MulCost<Index>() +
- reduction * (2 * TensorOpCost::MulCost<Index>() +
- 1 * TensorOpCost::DivCost<Index>()));
- }
- }
-
- protected:
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- const Index initialIndex = index;
- Index inputIndex = 0;
- for (int i = NumDims - 1; i > 0; --i) {
- const Index first = index;
- const Index last = index + PacketSize - 1;
- const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
- const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
- const Index lastPaddedRight = m_outputStrides[i+1];
-
- if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) {
- // all the coefficient are in the padding zone.
- return internal::pset1<PacketReturnType>(m_paddingValue);
- }
- else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) {
- // all the coefficient are in the padding zone.
- return internal::pset1<PacketReturnType>(m_paddingValue);
- }
- else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
- // all the coefficient are between the 2 padding zones.
- const Index idx = index / m_outputStrides[i];
- inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
- index -= idx * m_outputStrides[i];
- }
- else {
- // Every other case
- return packetWithPossibleZero(initialIndex);
- }
- }
-
- const Index last = index + PacketSize - 1;
- const Index first = index;
- const Index lastPaddedLeft = m_padding[0].first;
- const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
- const Index lastPaddedRight = m_outputStrides[1];
-
- if (!isLeftPaddingCompileTimeZero(0) && last < lastPaddedLeft) {
- // all the coefficient are in the padding zone.
- return internal::pset1<PacketReturnType>(m_paddingValue);
- }
- else if (!isRightPaddingCompileTimeZero(0) && first >= firstPaddedRight && last < lastPaddedRight) {
- // all the coefficient are in the padding zone.
- return internal::pset1<PacketReturnType>(m_paddingValue);
- }
- else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
- // all the coefficient are between the 2 padding zones.
- inputIndex += (index - m_padding[0].first);
- return m_impl.template packet<Unaligned>(inputIndex);
- }
- // Every other case
- return packetWithPossibleZero(initialIndex);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- const Index initialIndex = index;
- Index inputIndex = 0;
-
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index first = index;
- const Index last = index + PacketSize - 1;
- const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1];
- const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
- const Index lastPaddedRight = m_outputStrides[i];
-
- if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) {
- // all the coefficient are in the padding zone.
- return internal::pset1<PacketReturnType>(m_paddingValue);
- }
- else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) {
- // all the coefficient are in the padding zone.
- return internal::pset1<PacketReturnType>(m_paddingValue);
- }
- else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
- // all the coefficient are between the 2 padding zones.
- const Index idx = index / m_outputStrides[i+1];
- inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
- index -= idx * m_outputStrides[i+1];
- }
- else {
- // Every other case
- return packetWithPossibleZero(initialIndex);
- }
- }
-
- const Index last = index + PacketSize - 1;
- const Index first = index;
- const Index lastPaddedLeft = m_padding[NumDims-1].first;
- const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
- const Index lastPaddedRight = m_outputStrides[NumDims-1];
-
- if (!isLeftPaddingCompileTimeZero(NumDims-1) && last < lastPaddedLeft) {
- // all the coefficient are in the padding zone.
- return internal::pset1<PacketReturnType>(m_paddingValue);
- }
- else if (!isRightPaddingCompileTimeZero(NumDims-1) && first >= firstPaddedRight && last < lastPaddedRight) {
- // all the coefficient are in the padding zone.
- return internal::pset1<PacketReturnType>(m_paddingValue);
- }
- else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (first >= lastPaddedLeft && last < firstPaddedRight)) {
- // all the coefficient are between the 2 padding zones.
- inputIndex += (index - m_padding[NumDims-1].first);
- return m_impl.template packet<Unaligned>(inputIndex);
- }
- // Every other case
- return packetWithPossibleZero(initialIndex);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
- {
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
-
- Dimensions m_dimensions;
- array<Index, NumDims+1> m_outputStrides;
- array<Index, NumDims> m_inputStrides;
- TensorEvaluator<ArgType, Device> m_impl;
- PaddingDimensions m_padding;
-
- Scalar m_paddingValue;
-};
-
-
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
deleted file mode 100644
index 886a254..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h
+++ /dev/null
@@ -1,269 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
-#define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
-
-namespace Eigen {
-
-/** \class TensorPatch
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor patch class.
- *
- *
- */
-namespace internal {
-template<typename PatchDim, typename XprType>
-struct traits<TensorPatchOp<PatchDim, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions + 1;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename PatchDim, typename XprType>
-struct eval<TensorPatchOp<PatchDim, XprType>, Eigen::Dense>
-{
- typedef const TensorPatchOp<PatchDim, XprType>& type;
-};
-
-template<typename PatchDim, typename XprType>
-struct nested<TensorPatchOp<PatchDim, XprType>, 1, typename eval<TensorPatchOp<PatchDim, XprType> >::type>
-{
- typedef TensorPatchOp<PatchDim, XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename PatchDim, typename XprType>
-class TensorPatchOp : public TensorBase<TensorPatchOp<PatchDim, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorPatchOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorPatchOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorPatchOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorPatchOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPatchOp(const XprType& expr, const PatchDim& patch_dims)
- : m_xpr(expr), m_patch_dims(patch_dims) {}
-
- EIGEN_DEVICE_FUNC
- const PatchDim& patch_dims() const { return m_patch_dims; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
- const PatchDim m_patch_dims;
-};
-
-
-// Eval as rvalue
-template<typename PatchDim, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
-{
- typedef TensorPatchOp<PatchDim, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
-
- enum {
- IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device)
- {
- Index num_patches = 1;
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
- const PatchDim& patch_dims = op.patch_dims();
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = 0; i < NumDims-1; ++i) {
- m_dimensions[i] = patch_dims[i];
- num_patches *= (input_dims[i] - patch_dims[i] + 1);
- }
- m_dimensions[NumDims-1] = num_patches;
-
- m_inputStrides[0] = 1;
- m_patchStrides[0] = 1;
- for (int i = 1; i < NumDims-1; ++i) {
- m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
- m_patchStrides[i] = m_patchStrides[i-1] * (input_dims[i-1] - patch_dims[i-1] + 1);
- }
- m_outputStrides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
- }
- } else {
- for (int i = 0; i < NumDims-1; ++i) {
- m_dimensions[i+1] = patch_dims[i];
- num_patches *= (input_dims[i] - patch_dims[i] + 1);
- }
- m_dimensions[0] = num_patches;
-
- m_inputStrides[NumDims-2] = 1;
- m_patchStrides[NumDims-2] = 1;
- for (int i = NumDims-3; i >= 0; --i) {
- m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
- m_patchStrides[i] = m_patchStrides[i+1] * (input_dims[i+1] - patch_dims[i+1] + 1);
- }
- m_outputStrides[NumDims-1] = 1;
- for (int i = NumDims-2; i >= 0; --i) {
- m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0;
- // Find the location of the first element of the patch.
- Index patchIndex = index / m_outputStrides[output_stride_index];
- // Find the offset of the element wrt the location of the first element.
- Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index];
- Index inputIndex = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 2; i > 0; --i) {
- const Index patchIdx = patchIndex / m_patchStrides[i];
- patchIndex -= patchIdx * m_patchStrides[i];
- const Index offsetIdx = patchOffset / m_outputStrides[i];
- patchOffset -= offsetIdx * m_outputStrides[i];
- inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i];
- }
- } else {
- for (int i = 0; i < NumDims - 2; ++i) {
- const Index patchIdx = patchIndex / m_patchStrides[i];
- patchIndex -= patchIdx * m_patchStrides[i];
- const Index offsetIdx = patchOffset / m_outputStrides[i+1];
- patchOffset -= offsetIdx * m_outputStrides[i+1];
- inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i];
- }
- }
- inputIndex += (patchIndex + patchOffset);
- return m_impl.coeff(inputIndex);
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0;
- Index indices[2] = {index, index + PacketSize - 1};
- Index patchIndices[2] = {indices[0] / m_outputStrides[output_stride_index],
- indices[1] / m_outputStrides[output_stride_index]};
- Index patchOffsets[2] = {indices[0] - patchIndices[0] * m_outputStrides[output_stride_index],
- indices[1] - patchIndices[1] * m_outputStrides[output_stride_index]};
-
- Index inputIndices[2] = {0, 0};
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 2; i > 0; --i) {
- const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i],
- patchIndices[1] / m_patchStrides[i]};
- patchIndices[0] -= patchIdx[0] * m_patchStrides[i];
- patchIndices[1] -= patchIdx[1] * m_patchStrides[i];
-
- const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i],
- patchOffsets[1] / m_outputStrides[i]};
- patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i];
- patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i];
-
- inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i];
- inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i];
- }
- } else {
- for (int i = 0; i < NumDims - 2; ++i) {
- const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i],
- patchIndices[1] / m_patchStrides[i]};
- patchIndices[0] -= patchIdx[0] * m_patchStrides[i];
- patchIndices[1] -= patchIdx[1] * m_patchStrides[i];
-
- const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i+1],
- patchOffsets[1] / m_outputStrides[i+1]};
- patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i+1];
- patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i+1];
-
- inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i];
- inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i];
- }
- }
- inputIndices[0] += (patchIndices[0] + patchOffsets[0]);
- inputIndices[1] += (patchIndices[1] + patchOffsets[1]);
-
- if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
- PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
- return rslt;
- }
- else {
- EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
- values[0] = m_impl.coeff(inputIndices[0]);
- values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
- for (int i = 1; i < PacketSize-1; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- const double compute_cost = NumDims * (TensorOpCost::DivCost<Index>() +
- TensorOpCost::MulCost<Index>() +
- 2 * TensorOpCost::AddCost<Index>());
- return m_impl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
- Dimensions m_dimensions;
- array<Index, NumDims> m_outputStrides;
- array<Index, NumDims-1> m_inputStrides;
- array<Index, NumDims-1> m_patchStrides;
-
- TensorEvaluator<ArgType, Device> m_impl;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
deleted file mode 100644
index 1655a81..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h
+++ /dev/null
@@ -1,276 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
-#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
-
-namespace Eigen {
-namespace internal {
-
-namespace {
-
-EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
-#ifdef __CUDA_ARCH__
- // We don't support 3d kernels since we currently only use 1 and
- // 2d kernels.
- assert(threadIdx.z == 0);
- return clock64() +
- blockIdx.x * blockDim.x + threadIdx.x +
- gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y);
-
-#elif defined _WIN32
- // Use the current time as a baseline.
- SYSTEMTIME st;
- GetSystemTime(&st);
- int time = st.wSecond + 1000 * st.wMilliseconds;
- // Mix in a random number to make sure that we get different seeds if
- // we try to generate seeds faster than the clock resolution.
- // We need 2 random values since the generator only generate 16 bits at
- // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx)
- int rnd1 = ::rand();
- int rnd2 = ::rand();
- uint64_t rnd = (rnd1 | rnd2 << 16) ^ time;
- return rnd;
-
-#elif defined __APPLE__
- // Same approach as for win32, except that the random number generator
- // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random).
- uint64_t rnd = ::random() ^ mach_absolute_time();
- return rnd;
-
-#else
- // Augment the current time with pseudo random number generation
- // to ensure that we get different seeds if we try to generate seeds
- // faster than the clock resolution.
- timespec ts;
- clock_gettime(CLOCK_REALTIME, &ts);
- uint64_t rnd = ::random() ^ ts.tv_nsec;
- return rnd;
-#endif
-}
-
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) {
- // TODO: Unify with the implementation in the non blocking thread pool.
- uint64_t current = *state;
- // Update the internal state
- *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
- // Generate the random output (using the PCG-XSH-RS scheme)
- return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
-}
-
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
- seed = seed ? seed : get_random_seed();
- return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
-}
-
-} // namespace
-
-
-template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-T RandomToTypeUniform(uint64_t* state) {
- unsigned rnd = PCG_XSH_RS_generator(state);
- return static_cast<T>(rnd);
-}
-
-
-template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state) {
- Eigen::half result;
- // Generate 10 random bits for the mantissa
- unsigned rnd = PCG_XSH_RS_generator(state);
- result.x = static_cast<uint16_t>(rnd & 0x3ffu);
- // Set the exponent
- result.x |= (static_cast<uint16_t>(15) << 10);
- // Return the final result
- return result - Eigen::half(1.0f);
-}
-
-
-template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float RandomToTypeUniform<float>(uint64_t* state) {
- typedef union {
- uint32_t raw;
- float fp;
- } internal;
- internal result;
- // Generate 23 random bits for the mantissa mantissa
- const unsigned rnd = PCG_XSH_RS_generator(state);
- result.raw = rnd & 0x7fffffu;
- // Set the exponent
- result.raw |= (static_cast<uint32_t>(127) << 23);
- // Return the final result
- return result.fp - 1.0f;
-}
-
-template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double RandomToTypeUniform<double>(uint64_t* state) {
- typedef union {
- uint64_t raw;
- double dp;
- } internal;
- internal result;
- result.raw = 0;
- // Generate 52 random bits for the mantissa
- // First generate the upper 20 bits
- unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu;
- // The generate the lower 32 bits
- unsigned rnd2 = PCG_XSH_RS_generator(state);
- result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2;
- // Set the exponent
- result.raw |= (static_cast<uint64_t>(1023) << 52);
- // Return the final result
- return result.dp - 1.0;
-}
-
-template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state) {
- return std::complex<float>(RandomToTypeUniform<float>(state),
- RandomToTypeUniform<float>(state));
-}
-template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state) {
- return std::complex<double>(RandomToTypeUniform<double>(state),
- RandomToTypeUniform<double>(state));
-}
-
-template <typename T> class UniformRandomGenerator {
- public:
- static const bool PacketAccess = true;
-
- // Uses the given "seed" if non-zero, otherwise uses a random seed.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
- uint64_t seed = 0) {
- m_state = PCG_XSH_RS_state(seed);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
- const UniformRandomGenerator& other) {
- m_state = other.m_state;
- }
-
- template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- T operator()(Index i) const {
- uint64_t local_state = m_state + i;
- T result = RandomToTypeUniform<T>(&local_state);
- m_state = local_state;
- return result;
- }
-
- template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Packet packetOp(Index i) const {
- const int packetSize = internal::unpacket_traits<Packet>::size;
- EIGEN_ALIGN_MAX T values[packetSize];
- uint64_t local_state = m_state + i;
- for (int j = 0; j < packetSize; ++j) {
- values[j] = RandomToTypeUniform<T>(&local_state);
- }
- m_state = local_state;
- return internal::pload<Packet>(values);
- }
-
- private:
- mutable uint64_t m_state;
-};
-
-template <typename Scalar>
-struct functor_traits<UniformRandomGenerator<Scalar> > {
- enum {
- // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)).
- Cost = 12 * NumTraits<Scalar>::AddCost *
- ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)),
- PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess
- };
-};
-
-
-
-template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-T RandomToTypeNormal(uint64_t* state) {
- // Use the ratio of uniform method to generate numbers following a normal
- // distribution. See for example Numerical Recipes chapter 7.3.9 for the
- // details.
- T u, v, q;
- do {
- u = RandomToTypeUniform<T>(state);
- v = T(1.7156) * (RandomToTypeUniform<T>(state) - T(0.5));
- const T x = u - T(0.449871);
- const T y = numext::abs(v) + T(0.386595);
- q = x*x + y * (T(0.196)*y - T(0.25472)*x);
- } while (q > T(0.27597) &&
- (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u));
-
- return v/u;
-}
-
-template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state) {
- return std::complex<float>(RandomToTypeNormal<float>(state),
- RandomToTypeNormal<float>(state));
-}
-template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state) {
- return std::complex<double>(RandomToTypeNormal<double>(state),
- RandomToTypeNormal<double>(state));
-}
-
-
-template <typename T> class NormalRandomGenerator {
- public:
- static const bool PacketAccess = true;
-
- // Uses the given "seed" if non-zero, otherwise uses a random seed.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
- m_state = PCG_XSH_RS_state(seed);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(
- const NormalRandomGenerator& other) {
- m_state = other.m_state;
- }
-
- template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- T operator()(Index i) const {
- uint64_t local_state = m_state + i;
- T result = RandomToTypeNormal<T>(&local_state);
- m_state = local_state;
- return result;
- }
-
- template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- Packet packetOp(Index i) const {
- const int packetSize = internal::unpacket_traits<Packet>::size;
- EIGEN_ALIGN_MAX T values[packetSize];
- uint64_t local_state = m_state + i;
- for (int j = 0; j < packetSize; ++j) {
- values[j] = RandomToTypeNormal<T>(&local_state);
- }
- m_state = local_state;
- return internal::pload<Packet>(values);
- }
-
- private:
- mutable uint64_t m_state;
-};
-
-
-template <typename Scalar>
-struct functor_traits<NormalRandomGenerator<Scalar> > {
- enum {
- // On average, we need to generate about 3 random numbers
- // 15 mul, 8 add, 1.5 logs
- Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost +
- 15 * NumTraits<Scalar>::AddCost + 8 * NumTraits<Scalar>::AddCost +
- 3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2,
- PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess
- };
-};
-
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
deleted file mode 100644
index 41d0d00..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ /dev/null
@@ -1,781 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-// Copyright (C) 2016 Mehdi Goli, Codeplay Software Ltd <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H
-#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H
-
-namespace Eigen {
-
-/** \class TensorReduction
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor reduction class.
- *
- */
-
-namespace internal {
- template<typename Op, typename Dims, typename XprType,template <class> class MakePointer_ >
- struct traits<TensorReductionOp<Op, Dims, XprType, MakePointer_> >
- : traits<XprType>
-{
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::Scalar Scalar;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- static const int NumDimensions = XprTraits::NumDimensions - array_size<Dims>::value;
- static const int Layout = XprTraits::Layout;
-
- template <class T> struct MakePointer {
- // Intermediate typedef to workaround MSVC issue.
- typedef MakePointer_<T> MakePointerT;
- typedef typename MakePointerT::Type Type;
- };
-};
-
-template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_>
-struct eval<TensorReductionOp<Op, Dims, XprType, MakePointer_>, Eigen::Dense>
-{
- typedef const TensorReductionOp<Op, Dims, XprType, MakePointer_>& type;
-};
-
-template<typename Op, typename Dims, typename XprType, template <class> class MakePointer_>
-struct nested<TensorReductionOp<Op, Dims, XprType, MakePointer_>, 1, typename eval<TensorReductionOp<Op, Dims, XprType, MakePointer_> >::type>
-{
- typedef TensorReductionOp<Op, Dims, XprType, MakePointer_> type;
-};
-
-
-template <typename OutputDims> struct DimInitializer {
- template <typename InputDims, typename ReducedDims> EIGEN_DEVICE_FUNC
- static void run(const InputDims& input_dims,
- const array<bool, internal::array_size<InputDims>::value>& reduced,
- OutputDims* output_dims, ReducedDims* reduced_dims) {
- const int NumInputDims = internal::array_size<InputDims>::value;
- int outputIndex = 0;
- int reduceIndex = 0;
- for (int i = 0; i < NumInputDims; ++i) {
- if (reduced[i]) {
- (*reduced_dims)[reduceIndex] = input_dims[i];
- ++reduceIndex;
- } else {
- (*output_dims)[outputIndex] = input_dims[i];
- ++outputIndex;
- }
- }
- }
-};
-
-template <> struct DimInitializer<Sizes<> > {
- template <typename InputDims, typename Index, size_t Rank> EIGEN_DEVICE_FUNC
- static void run(const InputDims& input_dims, const array<bool, Rank>&,
- Sizes<>*, array<Index, Rank>* reduced_dims) {
- const int NumInputDims = internal::array_size<InputDims>::value;
- for (int i = 0; i < NumInputDims; ++i) {
- (*reduced_dims)[i] = input_dims[i];
- }
- }
-};
-
-
-template <typename ReducedDims, int NumTensorDims, int Layout>
-struct are_inner_most_dims {
- static const bool value = false;
-};
-template <typename ReducedDims, int NumTensorDims, int Layout>
-struct preserve_inner_most_dims {
- static const bool value = false;
-};
-
-#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES
-template <typename ReducedDims, int NumTensorDims>
-struct are_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{
- static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>();
- static const bool tmp2 = index_statically_eq<ReducedDims>(0, 0);
- static const bool tmp3 = index_statically_eq<ReducedDims>(array_size<ReducedDims>::value-1, array_size<ReducedDims>::value-1);
- static const bool value = tmp1 & tmp2 & tmp3;
-};
-template <typename ReducedDims, int NumTensorDims>
-struct are_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{
- static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>();
- static const bool tmp2 = index_statically_eq<ReducedDims>(0, NumTensorDims - array_size<ReducedDims>::value);
- static const bool tmp3 = index_statically_eq<ReducedDims>(array_size<ReducedDims>::value - 1, NumTensorDims - 1);
- static const bool value = tmp1 & tmp2 & tmp3;
-
-};
-template <typename ReducedDims, int NumTensorDims>
-struct preserve_inner_most_dims<ReducedDims, NumTensorDims, ColMajor>{
- static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>();
- static const bool tmp2 = index_statically_gt<ReducedDims>(0, 0);
- static const bool value = tmp1 & tmp2;
-
-};
-template <typename ReducedDims, int NumTensorDims>
-struct preserve_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{
- static const bool tmp1 = indices_statically_known_to_increase<ReducedDims>();
- static const bool tmp2 = index_statically_lt<ReducedDims>(array_size<ReducedDims>::value - 1, NumTensorDims - 1);
- static const bool value = tmp1 & tmp2;
-};
-#endif
-
-
-template <int DimIndex, typename Self, typename Op>
-struct GenericDimReducer {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) {
- EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
- for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
- const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
- GenericDimReducer<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
- }
- }
-};
-template <typename Self, typename Op>
-struct GenericDimReducer<0, Self, Op> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) {
- for (int j = 0; j < self.m_reducedDims[0]; ++j) {
- const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0];
- reducer.reduce(self.m_impl.coeff(input), accum);
- }
- }
-};
-template <typename Self, typename Op>
-struct GenericDimReducer<-1, Self, Op> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index index, Op& reducer, typename Self::CoeffReturnType* accum) {
- reducer.reduce(self.m_impl.coeff(index), accum);
- }
-};
-
-template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
-struct InnerMostDimReducer {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
- typename Self::CoeffReturnType accum = reducer.initialize();
- for (typename Self::Index j = 0; j < numValuesToReduce; ++j) {
- reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
- }
- return reducer.finalize(accum);
- }
-};
-
-template <typename Self, typename Op>
-struct InnerMostDimReducer<Self, Op, true> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
- const int packetSize = internal::unpacket_traits<typename Self::PacketReturnType>::size;
- const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
- typename Self::PacketReturnType p = reducer.template initializePacket<typename Self::PacketReturnType>();
- for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) {
- reducer.reducePacket(self.m_impl.template packet<Unaligned>(firstIndex + j), &p);
- }
- typename Self::CoeffReturnType accum = reducer.initialize();
- for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) {
- reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum);
- }
- return reducer.finalizeBoth(accum, p);
- }
-};
-
-template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
-struct InnerMostDimPreserver {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {
- eigen_assert(false && "should never be called");
- }
-};
-
-template <int DimIndex, typename Self, typename Op>
-struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
- EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
- for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
- const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
- InnerMostDimPreserver<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
- }
- }
-};
-
-template <typename Self, typename Op>
-struct InnerMostDimPreserver<0, Self, Op, true> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
- for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) {
- const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0];
- reducer.reducePacket(self.m_impl.template packet<Unaligned>(input), accum);
- }
- }
-};
-template <typename Self, typename Op>
-struct InnerMostDimPreserver<-1, Self, Op, true> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {
- eigen_assert(false && "should never be called");
- }
-};
-
-// Default full reducer
-template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
-struct FullReducer {
- static const bool HasOptimizedImplementation = false;
-
- static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) {
- const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions());
- *output = InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer);
- }
-};
-
-
-#ifdef EIGEN_USE_THREADS
-// Multithreaded full reducers
-template <typename Self, typename Op,
- bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
-struct FullReducerShard {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Self& self, typename Self::Index firstIndex,
- typename Self::Index numValuesToReduce, Op& reducer,
- typename Self::CoeffReturnType* output) {
- *output = InnerMostDimReducer<Self, Op, Vectorizable>::reduce(
- self, firstIndex, numValuesToReduce, reducer);
- }
-};
-
-// Multithreaded full reducer
-template <typename Self, typename Op, bool Vectorizable>
-struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
- static const bool HasOptimizedImplementation = !Op::IsStateful;
- static const int PacketSize =
- unpacket_traits<typename Self::PacketReturnType>::size;
-
- // launch one reducer per thread and accumulate the result.
- static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device,
- typename Self::CoeffReturnType* output) {
- typedef typename Self::Index Index;
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
- if (num_coeffs == 0) {
- *output = reducer.finalize(reducer.initialize());
- return;
- }
- const TensorOpCost cost =
- self.m_impl.costPerCoeff(Vectorizable) +
- TensorOpCost(0, 0, internal::functor_traits<Op>::Cost, Vectorizable,
- PacketSize);
- const int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
- num_coeffs, cost, device.numThreads());
- if (num_threads == 1) {
- *output =
- InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer);
- return;
- }
- const Index blocksize =
- std::floor<Index>(static_cast<float>(num_coeffs) / num_threads);
- const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
- eigen_assert(num_coeffs >= numblocks * blocksize);
-
- Barrier barrier(internal::convert_index<unsigned int>(numblocks));
- MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
- for (Index i = 0; i < numblocks; ++i) {
- device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, Vectorizable>::run,
- self, i * blocksize, blocksize, reducer,
- &shards[i]);
- }
- typename Self::CoeffReturnType finalShard;
- if (numblocks * blocksize < num_coeffs) {
- finalShard = InnerMostDimReducer<Self, Op, Vectorizable>::reduce(
- self, numblocks * blocksize, num_coeffs - numblocks * blocksize,
- reducer);
- } else {
- finalShard = reducer.initialize();
- }
- barrier.Wait();
-
- for (Index i = 0; i < numblocks; ++i) {
- reducer.reduce(shards[i], &finalShard);
- }
- *output = reducer.finalize(finalShard);
- }
-};
-
-#endif
-
-
-// Default inner reducer
-template <typename Self, typename Op, typename Device>
-struct InnerReducer {
- static const bool HasOptimizedImplementation = false;
-
- EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
- eigen_assert(false && "Not implemented");
- return true;
- }
-};
-
-// Default outer reducer
-template <typename Self, typename Op, typename Device>
-struct OuterReducer {
- static const bool HasOptimizedImplementation = false;
-
- EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
- eigen_assert(false && "Not implemented");
- return true;
- }
-};
-
-
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
-template <int B, int N, typename S, typename R, typename I>
-__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*);
-
-
-#ifdef EIGEN_HAS_CUDA_FP16
-template <typename S, typename R, typename I>
-__global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*);
-template <int B, int N, typename S, typename R, typename I>
-__global__ void FullReductionKernelHalfFloat(R, const S, I, half*, half2*);
-template <int NPT, typename S, typename R, typename I>
-__global__ void InnerReductionKernelHalfFloat(R, const S, I, I, half*);
-
-#endif
-
-template <int NPT, typename S, typename R, typename I>
-__global__ void InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*);
-
-template <int NPT, typename S, typename R, typename I>
-__global__ void OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*);
-#endif
-
-} // end namespace internal
-
-
-template <typename Op, typename Dims, typename XprType, template <class> class MakePointer_>
-class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType, MakePointer_>, ReadOnlyAccessors> {
- public:
- typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorReductionOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorReductionOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorReductionOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims)
- { }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer)
- { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const XprType& expression() const { return m_expr; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const Dims& dims() const { return m_dims; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const Op& reducer() const { return m_reducer; }
-
- protected:
- typename XprType::Nested m_expr;
- const Dims m_dims;
- const Op m_reducer;
-};
-
-
-// Eval as rvalue
-template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
-struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>
-{
- typedef TensorReductionOp<Op, Dims, ArgType, MakePointer_> XprType;
- typedef typename XprType::Index Index;
- typedef ArgType ChildType;
- typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
- static const int NumInputDims = internal::array_size<InputDimensions>::value;
- static const int NumReducedDims = internal::array_size<Dims>::value;
- static const int NumOutputDims = NumInputDims - NumReducedDims;
- typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self;
- static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
- typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = Self::InputPacketAccess && Op::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
- static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
- static const bool RunningFullReduction = (NumOutputDims==0);
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device), m_xpr_dims(op.dims())
- {
- EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE);
- EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
- YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- // Build the bitmap indicating if an input dimension is reduced or not.
- for (int i = 0; i < NumInputDims; ++i) {
- m_reduced[i] = false;
- }
- for (int i = 0; i < NumReducedDims; ++i) {
- eigen_assert(op.dims()[i] >= 0);
- eigen_assert(op.dims()[i] < NumInputDims);
- m_reduced[op.dims()[i]] = true;
- }
-
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
- internal::DimInitializer<Dimensions>::run(input_dims, m_reduced, &m_dimensions, &m_reducedDims);
-
- // Precompute output strides.
- if (NumOutputDims > 0) {
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_outputStrides[0] = 1;
- for (int i = 1; i < NumOutputDims; ++i) {
- m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
- }
- } else {
- m_outputStrides.back() = 1;
- for (int i = NumOutputDims - 2; i >= 0; --i) {
- m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
- }
- }
- }
-
- // Precompute input strides.
- if (NumInputDims > 0) {
- array<Index, NumInputDims> input_strides;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- input_strides[0] = 1;
- for (int i = 1; i < NumInputDims; ++i) {
- input_strides[i] = input_strides[i-1] * input_dims[i-1];
- }
- } else {
- input_strides.back() = 1;
- for (int i = NumInputDims - 2; i >= 0; --i) {
- input_strides[i] = input_strides[i + 1] * input_dims[i + 1];
- }
- }
-
- int outputIndex = 0;
- int reduceIndex = 0;
- for (int i = 0; i < NumInputDims; ++i) {
- if (m_reduced[i]) {
- m_reducedStrides[reduceIndex] = input_strides[i];
- ++reduceIndex;
- } else {
- m_preservedStrides[outputIndex] = input_strides[i];
- ++outputIndex;
- }
- }
- }
-
- // Special case for full reductions
- if (NumOutputDims == 0) {
- m_preservedStrides[0] = internal::array_prod(input_dims);
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool evalSubExprsIfNeeded(typename MakePointer_<CoeffReturnType>::Type data) {
- m_impl.evalSubExprsIfNeeded(NULL);
-
- // Use the FullReducer if possible.
- if ((RunningFullReduction && RunningOnSycl) ||(RunningFullReduction &&
- internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation &&
- ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) ||
- !RunningOnGPU))) {
- bool need_assign = false;
- if (!data) {
- m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType)));
- data = m_result;
- need_assign = true;
- }
- Op reducer(m_reducer);
- internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data);
- return need_assign;
- }
- else if(RunningOnSycl){
- const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
- const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
- if (!data) {
- data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
- m_result = data;
- }
- Op reducer(m_reducer);
- internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve);
- return (m_result != NULL);
- }
-
- // Attempt to use an optimized reduction.
- else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) {
- bool reducing_inner_dims = true;
- for (int i = 0; i < NumReducedDims; ++i) {
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- reducing_inner_dims &= m_reduced[i];
- } else {
- reducing_inner_dims &= m_reduced[NumInputDims - 1 - i];
- }
- }
- if (internal::InnerReducer<Self, Op, Device>::HasOptimizedImplementation &&
- (reducing_inner_dims || ReducingInnerMostDims)) {
- const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
- const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
- if (!data) {
- if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) {
- data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
- m_result = data;
- }
- else {
- return true;
- }
- }
- Op reducer(m_reducer);
- if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
- if (m_result) {
- m_device.deallocate(m_result);
- m_result = NULL;
- }
- return true;
- } else {
- return (m_result != NULL);
- }
- }
-
- bool preserving_inner_dims = true;
- for (int i = 0; i < NumReducedDims; ++i) {
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- preserving_inner_dims &= m_reduced[NumInputDims - 1 - i];
- } else {
- preserving_inner_dims &= m_reduced[i];
- }
- }
- if (internal::OuterReducer<Self, Op, Device>::HasOptimizedImplementation &&
- preserving_inner_dims) {
- const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
- const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
- if (!data) {
- if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) {
- data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
- m_result = data;
- }
- else {
- return true;
- }
- }
- Op reducer(m_reducer);
- if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
- if (m_result) {
- m_device.deallocate(m_result);
- m_result = NULL;
- }
- return true;
- } else {
- return (m_result != NULL);
- }
- }
- }
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- if (m_result) {
- m_device.deallocate(m_result);
- m_result = NULL;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- if ((RunningOnSycl || RunningFullReduction || RunningOnGPU) && m_result) {
- return *(m_result + index);
- }
- Op reducer(m_reducer);
- if (ReducingInnerMostDims || RunningFullReduction) {
- const Index num_values_to_reduce =
- (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
- return internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstInput(index),
- num_values_to_reduce, reducer);
- } else {
- typename Self::CoeffReturnType accum = reducer.initialize();
- internal::GenericDimReducer<NumReducedDims-1, Self, Op>::reduce(*this, firstInput(index), reducer, &accum);
- return reducer.finalize(accum);
- }
- }
-
- // TODO(bsteiner): provide a more efficient implementation.
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions())));
-
- if (RunningOnGPU && m_result) {
- return internal::pload<PacketReturnType>(m_result + index);
- }
-
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- if (ReducingInnerMostDims) {
- const Index num_values_to_reduce =
- (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1];
- const Index firstIndex = firstInput(index);
- for (Index i = 0; i < PacketSize; ++i) {
- Op reducer(m_reducer);
- values[i] = internal::InnerMostDimReducer<Self, Op>::reduce(*this, firstIndex + i * num_values_to_reduce,
- num_values_to_reduce, reducer);
- }
- } else if (PreservingInnerMostDims) {
- const Index firstIndex = firstInput(index);
- const int innermost_dim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : NumOutputDims - 1;
- // TBD: extend this the the n innermost dimensions that we preserve.
- if (((firstIndex % m_dimensions[innermost_dim]) + PacketSize - 1) < m_dimensions[innermost_dim]) {
- Op reducer(m_reducer);
- typename Self::PacketReturnType accum = reducer.template initializePacket<typename Self::PacketReturnType>();
- internal::InnerMostDimPreserver<NumReducedDims-1, Self, Op>::reduce(*this, firstIndex, reducer, &accum);
- return reducer.finalizePacket(accum);
- } else {
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = coeff(index + i);
- }
- }
- } else {
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = coeff(index + i);
- }
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
-
- // Must be called after evalSubExprsIfNeeded().
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- if (RunningFullReduction && m_result) {
- return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
- } else {
- const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
- const double compute_cost = num_values_to_reduce * internal::functor_traits<Op>::Cost;
- return m_impl.costPerCoeff(vectorized) * num_values_to_reduce +
- TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
- }
- }
-
- EIGEN_DEVICE_FUNC typename MakePointer_<Scalar>::Type data() const { return m_result; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
- /// added for sycl in order to construct the buffer from the sycl device
- const Device& device() const{return m_device;}
- /// added for sycl in order to re-construct the reduction eval on the device for the sub-kernel
- const Dims& xprDims() const {return m_xpr_dims;}
-
-
- private:
- template <int, typename, typename> friend struct internal::GenericDimReducer;
- template <typename, typename, bool> friend struct internal::InnerMostDimReducer;
- template <int, typename, typename, bool> friend struct internal::InnerMostDimPreserver;
- template <typename S, typename O, typename D, bool V> friend struct internal::FullReducer;
-#ifdef EIGEN_USE_THREADS
- template <typename S, typename O, bool V> friend struct internal::FullReducerShard;
-#endif
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
- template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*);
-#ifdef EIGEN_HAS_CUDA_FP16
- template <typename S, typename R, typename I> friend void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*);
- template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*);
- template <int NPT, typename S, typename R, typename I> friend void internal::InnerReductionKernelHalfFloat(R, const S, I, I, half*);
-#endif
- template <int NPT, typename S, typename R, typename I> friend void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*);
-
- template <int NPT, typename S, typename R, typename I> friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*);
-#endif
-
- template <typename S, typename O, typename D> friend struct internal::InnerReducer;
-
- // Returns the Index in the input tensor of the first value that needs to be
- // used to compute the reduction at output index "index".
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
- if (ReducingInnerMostDims) {
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- return index * m_preservedStrides[0];
- } else {
- return index * m_preservedStrides[NumPreservedStrides - 1];
- }
- }
- // TBD: optimize the case where we preserve the innermost dimensions.
- Index startInput = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumOutputDims - 1; i > 0; --i) {
- // This is index_i in the output tensor.
- const Index idx = index / m_outputStrides[i];
- startInput += idx * m_preservedStrides[i];
- index -= idx * m_outputStrides[i];
- }
- if (PreservingInnerMostDims) {
- eigen_assert(m_preservedStrides[0] == 1);
- startInput += index;
- } else {
- startInput += index * m_preservedStrides[0];
- }
- } else {
- for (int i = 0; i < NumOutputDims - 1; ++i) {
- // This is index_i in the output tensor.
- const Index idx = index / m_outputStrides[i];
- startInput += idx * m_preservedStrides[i];
- index -= idx * m_outputStrides[i];
- }
- if (PreservingInnerMostDims) {
- eigen_assert(m_preservedStrides[NumPreservedStrides - 1] == 1);
- startInput += index;
- } else {
- startInput += index * m_preservedStrides[NumPreservedStrides - 1];
- }
- }
- return startInput;
- }
-
- // Bitmap indicating if an input dimension is reduced or not.
- array<bool, NumInputDims> m_reduced;
- // Dimensions of the output of the operation.
- Dimensions m_dimensions;
- // Precomputed strides for the output tensor.
- array<Index, NumOutputDims> m_outputStrides;
- // Subset of strides of the input tensor for the non-reduced dimensions.
- // Indexed by output dimensions.
- static const int NumPreservedStrides = max_n_1<NumOutputDims>::size;
- array<Index, NumPreservedStrides> m_preservedStrides;
-
- // Subset of strides of the input tensor for the reduced dimensions.
- // Indexed by reduced dimensions.
- array<Index, NumReducedDims> m_reducedStrides;
- // Size of the input dimensions that are reduced.
- // Indexed by reduced dimensions.
- array<Index, NumReducedDims> m_reducedDims;
-
- // Evaluator for the input expression.
- TensorEvaluator<ArgType, Device> m_impl;
-
- // Operation to apply for computing the reduction.
- Op m_reducer;
-
- // For full reductions
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
- static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
- static const bool RunningOnSycl = false;
-#elif defined(EIGEN_USE_SYCL)
-static const bool RunningOnSycl = internal::is_same<typename internal::remove_all<Device>::type, Eigen::SyclDevice>::value;
-static const bool RunningOnGPU = false;
-#else
- static const bool RunningOnGPU = false;
- static const bool RunningOnSycl = false;
-#endif
- typename MakePointer_<CoeffReturnType>::Type m_result;
-
- const Device& m_device;
- const Dims& m_xpr_dims;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
deleted file mode 100644
index 65638b6..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ /dev/null
@@ -1,750 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H
-#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H
-
-namespace Eigen {
-namespace internal {
-
-
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
-// Full reducers for GPU, don't vectorize for now
-
-// Reducer function that enables multiple cuda thread to safely accumulate at the same
-// output address. It basically reads the current value of the output variable, and
-// attempts to update it with the new value. If in the meantime another cuda thread
-// updated the content of the output address it will try again.
-template <typename T, typename R>
-__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
-#if __CUDA_ARCH__ >= 300
- if (sizeof(T) == 4)
- {
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
- unsigned int newval = oldval;
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
- if (newval == oldval) {
- return;
- }
- unsigned int readback;
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
- oldval = readback;
- newval = oldval;
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
- if (newval == oldval) {
- return;
- }
- }
- }
- else if (sizeof(T) == 8) {
- unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output);
- unsigned long long newval = oldval;
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
- if (newval == oldval) {
- return;
- }
- unsigned long long readback;
- while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) {
- oldval = readback;
- newval = oldval;
- reducer.reduce(accum, reinterpret_cast<T*>(&newval));
- if (newval == oldval) {
- return;
- }
- }
- }
- else {
- assert(0 && "Wordsize not supported");
- }
-#else
- assert(0 && "Shouldn't be called on unsupported device");
-#endif
-}
-
-// We extend atomicExch to support extra data types
-template <typename Type>
-__device__ inline Type atomicExchCustom(Type* address, Type val) {
- return atomicExch(address, val);
-}
-
-template <>
-__device__ inline double atomicExchCustom(double* address, double val) {
- unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
- return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val)));
-}
-
-#ifdef EIGEN_HAS_CUDA_FP16
-template <template <typename T> class R>
-__device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer) {
- unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
- unsigned int newval = oldval;
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
- if (newval == oldval) {
- return;
- }
- unsigned int readback;
- while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
- oldval = readback;
- newval = oldval;
- reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
- if (newval == oldval) {
- return;
- }
- }
-}
-#endif
-
-template <>
-__device__ inline void atomicReduce(float* output, float accum, SumReducer<float>&) {
-#if __CUDA_ARCH__ >= 300
- atomicAdd(output, accum);
-#else
- assert(0 && "Shouldn't be called on unsupported device");
-#endif
-}
-
-
-template <typename CoeffType, typename Index>
-__global__ void ReductionInitKernel(const CoeffType val, Index num_preserved_coeffs, CoeffType* output) {
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
- const Index num_threads = blockDim.x * gridDim.x;
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
- output[i] = val;
- }
-}
-
-
-template <int BlockSize, int NumPerThread, typename Self,
- typename Reducer, typename Index>
-__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
- typename Self::CoeffReturnType* output, unsigned int* semaphore) {
-#if __CUDA_ARCH__ >= 300
- // Initialize the output value
- const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
- if (gridDim.x == 1) {
- if (first_index == 0) {
- *output = reducer.initialize();
- }
- }
- else {
- if (threadIdx.x == 0) {
- unsigned int block = atomicCAS(semaphore, 0u, 1u);
- if (block == 0) {
- // We're the first block to run, initialize the output value
- atomicExchCustom(output, reducer.initialize());
- __threadfence();
- atomicExch(semaphore, 2u);
- }
- else {
- // Wait for the first block to initialize the output value.
- // Use atomicCAS here to ensure that the reads aren't cached
- unsigned int val;
- do {
- val = atomicCAS(semaphore, 2u, 2u);
- }
- while (val < 2u);
- }
- }
- }
-
- __syncthreads();
-
- eigen_assert(gridDim.x == 1 || *semaphore >= 2u);
-
- typename Self::CoeffReturnType accum = reducer.initialize();
- Index max_iter = numext::mini<Index>(num_coeffs - first_index, NumPerThread*BlockSize);
- for (Index i = 0; i < max_iter; i+=BlockSize) {
- const Index index = first_index + i;
- eigen_assert(index < num_coeffs);
- typename Self::CoeffReturnType val = input.m_impl.coeff(index);
- reducer.reduce(val, &accum);
- }
-
-#pragma unroll
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
- reducer.reduce(__shfl_down(accum, offset, warpSize), &accum);
- }
-
- if ((threadIdx.x & (warpSize - 1)) == 0) {
- atomicReduce(output, accum, reducer);
- }
-
- if (gridDim.x > 1 && threadIdx.x == 0) {
- // Let the last block reset the semaphore
- atomicInc(semaphore, gridDim.x + 1);
- }
-#else
- assert(0 && "Shouldn't be called on unsupported device");
-#endif
-}
-
-
-#ifdef EIGEN_HAS_CUDA_FP16
-template <typename Self,
- typename Reducer, typename Index>
-__global__ void ReductionInitFullReduxKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half2* scratch) {
- eigen_assert(blockDim.x == 1);
- eigen_assert(gridDim.x == 1);
- if (num_coeffs % 2 != 0) {
- half last = input.m_impl.coeff(num_coeffs-1);
- *scratch = __halves2half2(last, reducer.initialize());
- } else {
- *scratch = reducer.template initializePacket<half2>();
- }
-}
-
-template <typename Self,
- typename Reducer, typename Index>
-__global__ void ReductionInitKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs, half* output) {
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
- const Index num_threads = blockDim.x * gridDim.x;
- const Index num_packets = num_coeffs / 2;
- for (Index i = thread_id; i < num_packets; i += num_threads) {
- ((half2*)output)[i] = reducer.template initializePacket<half2>();
- }
-
- if (thread_id == 0 && num_coeffs % 2 != 0) {
- output[num_coeffs-1] = reducer.initialize();
- }
-}
-
-template <int BlockSize, int NumPerThread, typename Self,
- typename Reducer, typename Index>
-__global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs,
- half* output, half2* scratch) {
- eigen_assert(NumPerThread % 2 == 0);
-
- const Index first_index = blockIdx.x * BlockSize * NumPerThread + 2*threadIdx.x;
-
- // Initialize the output value if it wasn't initialized by the ReductionInitKernel
- if (gridDim.x == 1 && first_index == 0) {
- if (num_coeffs % 2 != 0) {
- half last = input.m_impl.coeff(num_coeffs-1);
- *scratch = __halves2half2(last, reducer.initialize());
- } else {
- *scratch = reducer.template initializePacket<half2>();
- }
- __syncthreads();
- }
-
- half2 accum = reducer.template initializePacket<half2>();
- const Index max_iter = numext::mini<Index>((num_coeffs - first_index) / 2, NumPerThread*BlockSize / 2);
- for (Index i = 0; i < max_iter; i += BlockSize) {
- const Index index = first_index + 2*i;
- eigen_assert(index + 1 < num_coeffs);
- half2 val = input.m_impl.template packet<Unaligned>(index);
- reducer.reducePacket(val, &accum);
- }
-
-#pragma unroll
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
- reducer.reducePacket(__shfl_down(accum, offset, warpSize), &accum);
- }
-
- if ((threadIdx.x & (warpSize - 1)) == 0) {
- atomicReduce(scratch, accum, reducer);
- }
-
- __syncthreads();
-
- if (gridDim.x == 1 && first_index == 0) {
- half tmp = __low2half(*scratch);
- reducer.reduce(__high2half(*scratch), &tmp);
- *output = tmp;
- }
-}
-
-template <typename Op>
-__global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) {
- eigen_assert(threadIdx.x == 1);
- half tmp = __low2half(*scratch);
- reducer.reduce(__high2half(*scratch), &tmp);
- *output = tmp;
-}
-
-#endif
-
-template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
-struct FullReductionLauncher {
- static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) {
- assert(false && "Should only be called on doubles, floats and half floats");
- }
-};
-
-// Specialization for float and double
-template <typename Self, typename Op, typename OutputType, bool PacketAccess>
-struct FullReductionLauncher<
- Self, Op, OutputType, PacketAccess,
- typename internal::enable_if<
- internal::is_same<float, OutputType>::value ||
- internal::is_same<double, OutputType>::value,
- void>::type> {
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) {
- typedef typename Self::Index Index;
- typedef typename Self::CoeffReturnType Scalar;
- const int block_size = 256;
- const int num_per_thread = 128;
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
-
- unsigned int* semaphore = NULL;
- if (num_blocks > 1) {
- semaphore = device.semaphore();
- }
-
- LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>),
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore);
- }
-};
-
-#ifdef EIGEN_HAS_CUDA_FP16
-template <typename Self, typename Op>
-struct FullReductionLauncher<Self, Op, Eigen::half, false> {
- static void run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index) {
- assert(false && "Should not be called since there is no packet accessor");
- }
-};
-
-template <typename Self, typename Op>
-struct FullReductionLauncher<Self, Op, Eigen::half, true> {
- static void run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs) {
- typedef typename Self::Index Index;
-
- const int block_size = 256;
- const int num_per_thread = 128;
- const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
- half2* scratch = static_cast<half2*>(device.scratchpad());
-
- if (num_blocks > 1) {
- // We initialize the output and the scrathpad outside the reduction kernel when we can't be sure that there
- // won't be a race conditions between multiple thread blocks.
- LAUNCH_CUDA_KERNEL((ReductionInitFullReduxKernelHalfFloat<Self, Op, Index>),
- 1, 1, 0, device, reducer, self, num_coeffs, scratch);
- }
-
- LAUNCH_CUDA_KERNEL((FullReductionKernelHalfFloat<block_size, num_per_thread, Self, Op, Index>),
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, scratch);
-
- if (num_blocks > 1) {
- LAUNCH_CUDA_KERNEL((ReductionCleanupKernelHalfFloat<Op>),
- 1, 1, 0, device, reducer, output, scratch);
- }
- }
-};
-#endif
-
-
-template <typename Self, typename Op, bool Vectorizable>
-struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
- // Unfortunately nvidia doesn't support well exotic types such as complex,
- // so reduce the scope of the optimized version of the code to the simple cases
- // of doubles, floats and half floats
-#ifdef EIGEN_HAS_CUDA_FP16
- static const bool HasOptimizedImplementation = !Op::IsStateful &&
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
-#else
- static const bool HasOptimizedImplementation = !Op::IsStateful &&
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
- internal::is_same<typename Self::CoeffReturnType, double>::value);
-#endif
-
- template <typename OutputType>
- static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
- assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
- // Don't crash when we're called with an input tensor of size 0.
- if (num_coeffs == 0) {
- return;
- }
-
- FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
- }
-};
-
-
-template <int NumPerThread, typename Self,
- typename Reducer, typename Index>
-__global__ void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
- typename Self::CoeffReturnType* output) {
-#if __CUDA_ARCH__ >= 300
- typedef typename Self::CoeffReturnType Type;
- eigen_assert(blockDim.y == 1);
- eigen_assert(blockDim.z == 1);
- eigen_assert(gridDim.y == 1);
- eigen_assert(gridDim.z == 1);
-
- const int unroll_times = 16;
- eigen_assert(NumPerThread % unroll_times == 0);
-
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread);
- const Index num_input_blocks = input_col_blocks * num_preserved_coeffs;
-
- const Index num_threads = blockDim.x * gridDim.x;
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
-
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
- if (gridDim.x == 1) {
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
- output[i] = reducer.initialize();
- }
- __syncthreads();
- }
-
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
- const Index row = i / input_col_blocks;
-
- if (row < num_preserved_coeffs) {
- const Index col_block = i % input_col_blocks;
- const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x;
-
- Type reduced_val = reducer.initialize();
-
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
- const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1);
- if (last_col >= num_coeffs_to_reduce) {
- for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) {
- const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col);
- reducer.reduce(val, &reduced_val);
- }
- break;
- } else {
- // Faster version of the loop with no branches after unrolling.
-#pragma unroll
- for (int k = 0; k < unroll_times; ++k) {
- const Index col = col_begin + blockDim.x * (j + k);
- reducer.reduce(input.m_impl.coeff(row * num_coeffs_to_reduce + col), &reduced_val);
- }
- }
- }
-
-#pragma unroll
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
- reducer.reduce(__shfl_down(reduced_val, offset), &reduced_val);
- }
-
- if ((threadIdx.x & (warpSize - 1)) == 0) {
- atomicReduce(&(output[row]), reduced_val, reducer);
- }
- }
- }
-#else
- assert(0 && "Shouldn't be called on unsupported device");
-#endif
-}
-
-#ifdef EIGEN_HAS_CUDA_FP16
-
-template <int NumPerThread, typename Self,
- typename Reducer, typename Index>
-__global__ void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
- half* output) {
- eigen_assert(blockDim.y == 1);
- eigen_assert(blockDim.z == 1);
- eigen_assert(gridDim.y == 1);
- eigen_assert(gridDim.z == 1);
-
- const int unroll_times = 16;
- eigen_assert(NumPerThread % unroll_times == 0);
- eigen_assert(unroll_times % 2 == 0);
-
- const Index input_col_blocks = divup<Index>(num_coeffs_to_reduce, blockDim.x * NumPerThread * 2);
- const Index num_input_blocks = divup<Index>(input_col_blocks * num_preserved_coeffs, 2);
-
- const Index num_threads = blockDim.x * gridDim.x;
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
-
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
- if (gridDim.x == 1) {
- Index i = 2*thread_id;
- for (; i + 1 < num_preserved_coeffs; i += 2*num_threads) {
- half* loc = output + i;
- *((half2*)loc) = reducer.template initializePacket<half2>();
- }
- if (i < num_preserved_coeffs) {
- output[i] = reducer.initialize();
- }
- __syncthreads();
- }
-
- for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
- const Index row = 2 * (i / input_col_blocks);
-
- if (row + 1 < num_preserved_coeffs) {
- const Index col_block = i % input_col_blocks;
- const Index col_begin = 2 * (col_block * blockDim.x * NumPerThread + threadIdx.x);
-
- half2 reduced_val1 = reducer.template initializePacket<half2>();
- half2 reduced_val2 = reducer.template initializePacket<half2>();
-
- for (Index j = 0; j < NumPerThread; j += unroll_times) {
- const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1) * 2;
- if (last_col >= num_coeffs_to_reduce) {
- Index col = col_begin + blockDim.x * j;
- for (; col + 1 < num_coeffs_to_reduce; col += blockDim.x) {
- const half2 val1 = input.m_impl.template packet<Unaligned>(row * num_coeffs_to_reduce + col);
- reducer.reducePacket(val1, &reduced_val1);
- const half2 val2 = input.m_impl.template packet<Unaligned>((row+1) * num_coeffs_to_reduce + col);
- reducer.reducePacket(val2, &reduced_val2);
- }
- if (col < num_coeffs_to_reduce) {
- // Peel;
- const half last1 = input.m_impl.coeff(row * num_coeffs_to_reduce + col);
- const half2 val1 = __halves2half2(last1, reducer.initialize());
- reducer.reducePacket(val1, &reduced_val1);
- const half last2 = input.m_impl.coeff((row+1) * num_coeffs_to_reduce + col);
- const half2 val2 = __halves2half2(last2, reducer.initialize());
- reducer.reducePacket(val2, &reduced_val2);
- }
- break;
- } else {
- // Faster version of the loop with no branches after unrolling.
-#pragma unroll
- for (int k = 0; k < unroll_times; ++k) {
- const Index col = col_begin + blockDim.x * (j + k) * 2;
- reducer.reducePacket(input.m_impl.template packet<Unaligned>(row * num_coeffs_to_reduce + col), &reduced_val1);
- reducer.reducePacket(input.m_impl.template packet<Unaligned>((row + 1)* num_coeffs_to_reduce + col), &reduced_val2);
- }
- }
- }
-
-#pragma unroll
- for (int offset = warpSize/2; offset > 0; offset /= 2) {
- reducer.reducePacket(__shfl_down(reduced_val1, offset, warpSize), &reduced_val1);
- reducer.reducePacket(__shfl_down(reduced_val2, offset, warpSize), &reduced_val2);
- }
-
- half val1 = __low2half(reduced_val1);
- reducer.reduce(__high2half(reduced_val1), &val1);
- half val2 = __low2half(reduced_val2);
- reducer.reduce(__high2half(reduced_val2), &val2);
- half2 val = __halves2half2(val1, val2);
-
- if ((threadIdx.x & (warpSize - 1)) == 0) {
- half* loc = output + row;
- atomicReduce((half2*)loc, val, reducer);
- }
- }
- }
-}
-
-#endif
-
-template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
-struct InnerReductionLauncher {
- static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) {
- assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device");
- return true;
- }
-};
-
-// Specialization for float and double
-template <typename Self, typename Op, typename OutputType, bool PacketAccess>
-struct InnerReductionLauncher<
- Self, Op, OutputType, PacketAccess,
- typename internal::enable_if<
- internal::is_same<float, OutputType>::value ||
- internal::is_same<double, OutputType>::value,
- void>::type> {
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
- typedef typename Self::Index Index;
-
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
- const int block_size = 256;
- const int num_per_thread = 128;
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
- const int max_blocks = device.getNumCudaMultiProcessors() *
- device.maxCudaThreadsPerMultiProcessor() / block_size;
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
-
- if (num_blocks > 1) {
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
- // won't be a race conditions between multiple thread blocks.
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
- const int max_blocks = device.getNumCudaMultiProcessors() *
- device.maxCudaThreadsPerMultiProcessor() / 1024;
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
- LAUNCH_CUDA_KERNEL((ReductionInitKernel<OutputType, Index>),
- num_blocks, 1024, 0, device, reducer.initialize(),
- num_preserved_vals, output);
- }
-
- LAUNCH_CUDA_KERNEL((InnerReductionKernel<num_per_thread, Self, Op, Index>),
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
-
- return false;
- }
-};
-
-#ifdef EIGEN_HAS_CUDA_FP16
-template <typename Self, typename Op>
-struct InnerReductionLauncher<Self, Op, Eigen::half, false> {
- static bool run(const Self&, Op&, const GpuDevice&, half*, typename Self::Index, typename Self::Index) {
- assert(false && "Should not be called since there is no packet accessor");
- return true;
- }
-};
-
-template <typename Self, typename Op>
-struct InnerReductionLauncher<Self, Op, Eigen::half, true> {
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, half* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
- typedef typename Self::Index Index;
-
- if (num_preserved_vals % 2 != 0) {
- // Not supported yet, revert to the slower code path
- return true;
- }
-
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
- const int block_size = /*256*/128;
- const int num_per_thread = /*128*/64;
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
- const int max_blocks = device.getNumCudaMultiProcessors() *
- device.maxCudaThreadsPerMultiProcessor() / block_size;
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
-
- if (num_blocks > 1) {
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
- // won't be a race conditions between multiple thread blocks.
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
- const int max_blocks = device.getNumCudaMultiProcessors() *
- device.maxCudaThreadsPerMultiProcessor() / 1024;
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
- LAUNCH_CUDA_KERNEL((ReductionInitKernelHalfFloat<Self, Op, Index>),
- 1, 1, 0, device, reducer, self, num_preserved_vals, output);
- }
-
- LAUNCH_CUDA_KERNEL((InnerReductionKernelHalfFloat<num_per_thread, Self, Op, Index>),
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
-
- return false;
- }
-};
-#endif
-
-
-template <typename Self, typename Op>
-struct InnerReducer<Self, Op, GpuDevice> {
- // Unfortunately nvidia doesn't support well exotic types such as complex,
- // so reduce the scope of the optimized version of the code to the simple case
- // of floats and half floats.
-#ifdef EIGEN_HAS_CUDA_FP16
- static const bool HasOptimizedImplementation = !Op::IsStateful &&
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
- internal::is_same<typename Self::CoeffReturnType, double>::value ||
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
-#else
- static const bool HasOptimizedImplementation = !Op::IsStateful &&
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
- internal::is_same<typename Self::CoeffReturnType, double>::value);
-#endif
-
- template <typename OutputType>
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
- assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
- const Index num_coeffs = array_prod(self.m_impl.dimensions());
- // Don't crash when we're called with an input tensor of size 0.
- if (num_coeffs == 0) {
- return true;
- }
- // It's faster to use the usual code.
- if (num_coeffs_to_reduce <= 128) {
- return true;
- }
-
- return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
- }
-};
-
-template <int NumPerThread, typename Self,
- typename Reducer, typename Index>
-__global__ void OuterReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
- typename Self::CoeffReturnType* output) {
- const Index num_threads = blockDim.x * gridDim.x;
- const Index thread_id = blockIdx.x * blockDim.x + threadIdx.x;
- // Initialize the output values if they weren't initialized by the ReductionInitKernel
- if (gridDim.x == 1) {
- for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
- output[i] = reducer.initialize();
- }
- __syncthreads();
- }
-
- // Do the reduction.
- const Index max_iter = num_preserved_coeffs * divup<Index>(num_coeffs_to_reduce, NumPerThread);
- for (Index i = thread_id; i < max_iter; i += num_threads) {
- const Index input_col = i % num_preserved_coeffs;
- const Index input_row = (i / num_preserved_coeffs) * NumPerThread;
- typename Self::CoeffReturnType reduced_val = reducer.initialize();
- const Index max_row = numext::mini(input_row + NumPerThread, num_coeffs_to_reduce);
- for (Index j = input_row; j < max_row; j++) {
- typename Self::CoeffReturnType val = input.m_impl.coeff(j * num_preserved_coeffs + input_col);
- reducer.reduce(val, &reduced_val);
- }
- atomicReduce(&(output[input_col]), reduced_val, reducer);
- }
-}
-
-
-template <typename Self, typename Op>
-struct OuterReducer<Self, Op, GpuDevice> {
- // Unfortunately nvidia doesn't support well exotic types such as complex,
- // so reduce the scope of the optimized version of the code to the simple case
- // of floats.
- static const bool HasOptimizedImplementation = !Op::IsStateful &&
- (internal::is_same<typename Self::CoeffReturnType, float>::value ||
- internal::is_same<typename Self::CoeffReturnType, double>::value);
- template <typename Device, typename OutputType>
- static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
- assert(false && "Should only be called to reduce doubles or floats on a gpu device");
- return true;
- }
-
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
- typedef typename Self::Index Index;
-
- // It's faster to use the usual code.
- if (num_coeffs_to_reduce <= 32) {
- return true;
- }
-
- const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
- const int block_size = 256;
- const int num_per_thread = 16;
- const int dyn_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
- const int max_blocks = device.getNumCudaMultiProcessors() *
- device.maxCudaThreadsPerMultiProcessor() / block_size;
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
-
- if (num_blocks > 1) {
- // We initialize the outputs in the reduction kernel itself when we don't have to worry
- // about race conditions between multiple thread blocks.
- const int dyn_blocks = divup<int>(num_preserved_vals, 1024);
- const int max_blocks = device.getNumCudaMultiProcessors() *
- device.maxCudaThreadsPerMultiProcessor() / 1024;
- const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
- LAUNCH_CUDA_KERNEL((ReductionInitKernel<float, Index>),
- num_blocks, 1024, 0, device, reducer.initialize(),
- num_preserved_vals, output);
- }
-
- LAUNCH_CUDA_KERNEL((OuterReductionKernel<num_per_thread, Self, Op, Index>),
- num_blocks, block_size, 0, device, reducer, self, num_coeffs_to_reduce, num_preserved_vals, output);
-
- return false;
- }
-};
-
-#endif
-
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
deleted file mode 100644
index 3daecb0..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
+++ /dev/null
@@ -1,242 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*****************************************************************
- * TensorSyclPlaceHolderExpr.h
- *
- * \brief:
- * This is the specialisation of the placeholder expression based on the
- * operation type
- *
-*****************************************************************/
-
-#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
-#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
-
-namespace Eigen {
-namespace internal {
-
-template<typename CoeffReturnType, typename KernelName> struct syclGenericBufferReducer{
-template<typename BufferTOut, typename BufferTIn>
-static void run(BufferTOut* bufOut, BufferTIn& bufI, const Eigen::SyclDevice& dev, size_t length, size_t local){
- do {
- auto f = [length, local, bufOut, &bufI](cl::sycl::handler& h) mutable {
- cl::sycl::nd_range<1> r{cl::sycl::range<1>{std::max(length, local)},
- cl::sycl::range<1>{std::min(length, local)}};
- /* Two accessors are used: one to the buffer that is being reduced,
- * and a second to local memory, used to store intermediate data. */
- auto aI =
- bufI.template get_access<cl::sycl::access::mode::read_write>(h);
- auto aOut =
- bufOut->template get_access<cl::sycl::access::mode::discard_write>(h);
- cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write,
- cl::sycl::access::target::local>
- scratch(cl::sycl::range<1>(local), h);
-
- /* The parallel_for invocation chosen is the variant with an nd_item
- * parameter, since the code requires barriers for correctness. */
- h.parallel_for<KernelName>(
- r, [aOut, aI, scratch, local, length](cl::sycl::nd_item<1> id) {
- size_t globalid = id.get_global(0);
- size_t localid = id.get_local(0);
- /* All threads collectively read from global memory into local.
- * The barrier ensures all threads' IO is resolved before
- * execution continues (strictly speaking, all threads within
- * a single work-group - there is no co-ordination between
- * work-groups, only work-items). */
- if (globalid < length) {
- scratch[localid] = aI[globalid];
- }
- id.barrier(cl::sycl::access::fence_space::local_space);
-
- /* Apply the reduction operation between the current local
- * id and the one on the other half of the vector. */
- if (globalid < length) {
- int min = (length < local) ? length : local;
- for (size_t offset = min / 2; offset > 0; offset /= 2) {
- if (localid < offset) {
- scratch[localid] += scratch[localid + offset];
- }
- id.barrier(cl::sycl::access::fence_space::local_space);
- }
- /* The final result will be stored in local id 0. */
- if (localid == 0) {
- aI[id.get_group(0)] = scratch[localid];
- if((length<=local) && globalid ==0){
- aOut[globalid]=scratch[localid];
- }
- }
- }
- });
- };
- dev.m_queue.submit(f);
- dev.m_queue.throw_asynchronous();
-
- /* At this point, you could queue::wait_and_throw() to ensure that
- * errors are caught quickly. However, this would likely impact
- * performance negatively. */
- length = length / local;
-
- } while (length > 1);
-
-
-
-}
-
-};
-
-/// For now let's start with a full reducer
-/// Self is useless here because in expression construction we are going to treat reduction as a leafnode.
-/// we want to take reduction child and then build a construction and apply the full reducer function on it. Fullreducre applies the
-/// reduction operation on the child of the reduction. once it is done the reduction is an empty shell and can be thrown away and treated as
-// a leafNode.
-template <typename Self, typename Op, bool Vectorizable>
-struct FullReducer<Self, Op, const Eigen::SyclDevice, Vectorizable> {
-
- typedef typename Self::CoeffReturnType CoeffReturnType;
- static const bool HasOptimizedImplementation = false;
-
- static void run(const Self& self, Op& reducer, const Eigen::SyclDevice& dev, CoeffReturnType* output) {
- typedef const typename Self::ChildType HostExpr; /// this is the child of reduction
- typedef typename TensorSycl::internal::createPlaceHolderExpression<HostExpr>::Type PlaceHolderExpr;
- auto functors = TensorSycl::internal::extractFunctors(self.impl());
- int red_factor =256; /// initial reduction. If the size is less than red_factor we only creates one thread.
- size_t inputSize =self.impl().dimensions().TotalSize();
- size_t rng = inputSize/red_factor; // the total number of thread initially is half the size of the input
- size_t remaining = inputSize% red_factor;
- if(rng ==0) {
- red_factor=1;
- };
- size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2;
- size_t GRange=std::max((size_t )1, rng);
-
- // convert global range to power of 2 for redecution
- GRange--;
- GRange |= GRange >> 1;
- GRange |= GRange >> 2;
- GRange |= GRange >> 4;
- GRange |= GRange >> 8;
- GRange |= GRange >> 16;
-#if __x86_64__ || __ppc64__ || _WIN64
- GRange |= GRange >> 32;
-#endif
- GRange++;
- size_t outTileSize = tileSize;
- /// if the shared memory is less than the GRange, we set shared_mem size to the TotalSize and in this case one kernel would be created for recursion to reduce all to one.
- if (GRange < outTileSize) outTileSize=GRange;
- // getting final out buffer at the moment the created buffer is true because there is no need for assign
- auto out_buffer =dev.template get_sycl_buffer<typename Eigen::internal::remove_all<CoeffReturnType>::type>(self.dimensions().TotalSize(), output);
- /// creating the shared memory for calculating reduction.
- /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can
- /// recursively apply reduction on it in order to reduce the whole.
- auto temp_global_buffer =cl::sycl::buffer<CoeffReturnType, 1>(cl::sycl::range<1>(GRange));
- typedef typename Eigen::internal::remove_all<decltype(self.xprDims())>::type Dims;
- Dims dims= self.xprDims();
- Op functor = reducer;
- dev.m_queue.submit([&](cl::sycl::handler &cgh) {
- // create a tuple of accessors from Evaluator
- auto tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl());
- auto tmp_global_accessor = temp_global_buffer. template get_access<cl::sycl::access::mode::read_write, cl::sycl::access::target::global_buffer>(cgh);
-
- cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(outTileSize)), [=](cl::sycl::nd_item<1> itemID) {
- typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
- auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
- /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
- /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the
- /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
- const auto device_self_expr= TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor);
- /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
- /// the device_evaluator is detectable and recognisable on the device.
- auto device_self_evaluator = Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice>(device_self_expr, Eigen::DefaultDevice());
- /// const cast added as a naive solution to solve the qualifier drop error
- auto globalid=itemID.get_global_linear_id();
-
- if(globalid<rng)
- tmp_global_accessor.get_pointer()[globalid]=InnerMostDimReducer<decltype(device_self_evaluator), Op, false>::reduce(device_self_evaluator, red_factor*globalid, red_factor, const_cast<Op&>(functor));
- else
- tmp_global_accessor.get_pointer()[globalid]=static_cast<CoeffReturnType>(0);
-
- if(remaining!=0 && globalid==0 )
- // this will add the rest of input buffer when the input size is not devidable to red_factor.
- tmp_global_accessor.get_pointer()[globalid]+=InnerMostDimReducer<decltype(device_self_evaluator), Op, false>::reduce(device_self_evaluator, red_factor*(rng), remaining, const_cast<Op&>(functor));
- });
- });
- dev.m_queue.throw_asynchronous();
-
-/// This is used to recursively reduce the tmp value to an element of 1;
- syclGenericBufferReducer<CoeffReturnType,HostExpr>::run(out_buffer, temp_global_buffer,dev, GRange, outTileSize);
- }
-
-};
-
-template <typename Self, typename Op>
-struct InnerReducer<Self, Op, const Eigen::SyclDevice> {
-
- typedef typename Self::CoeffReturnType CoeffReturnType;
- static const bool HasOptimizedImplementation = false;
-
- static bool run(const Self& self, Op& reducer, const Eigen::SyclDevice& dev, CoeffReturnType* output, typename Self::Index , typename Self::Index num_coeffs_to_preserve) {
- typedef const typename Self::ChildType HostExpr; /// this is the child of reduction
- typedef typename TensorSycl::internal::createPlaceHolderExpression<HostExpr>::Type PlaceHolderExpr;
- auto functors = TensorSycl::internal::extractFunctors(self.impl());
-
- size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2;
-
- size_t GRange=num_coeffs_to_preserve;
- if (tileSize>GRange) tileSize=GRange;
- else if(GRange>tileSize){
- size_t xMode = GRange % tileSize;
- if (xMode != 0) GRange += (tileSize - xMode);
- }
- // getting final out buffer at the moment the created buffer is true because there is no need for assign
- /// creating the shared memory for calculating reduction.
- /// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can
- /// recursively apply reduction on it in order to reduce the whole.
- typedef typename Eigen::internal::remove_all<decltype(self.xprDims())>::type Dims;
- Dims dims= self.xprDims();
- Op functor = reducer;
-
- dev.m_queue.submit([&](cl::sycl::handler &cgh) {
- // create a tuple of accessors from Evaluator
- auto tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl());
- auto output_accessor = dev.template get_sycl_accessor<cl::sycl::access::mode::discard_write>(num_coeffs_to_preserve,cgh, output);
-
- cgh.parallel_for<Self>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) {
- typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
- auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
- /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
- /// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the
- /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
- const auto device_self_expr= TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor);
- /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
- /// the device_evaluator is detectable and recognisable on the device.
- typedef Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice> DeiceSelf;
- auto device_self_evaluator = Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice>(device_self_expr, Eigen::DefaultDevice());
- /// const cast added as a naive solution to solve the qualifier drop error
- auto globalid=itemID.get_global_linear_id();
- if (globalid< static_cast<size_t>(num_coeffs_to_preserve)) {
- typename DeiceSelf::CoeffReturnType accum = functor.initialize();
- GenericDimReducer<DeiceSelf::NumReducedDims-1, DeiceSelf, Op>::reduce(device_self_evaluator, device_self_evaluator.firstInput(globalid),const_cast<Op&>(functor), &accum);
- functor.finalize(accum);
- output_accessor.get_pointer()[globalid]= accum;
- }
- });
- });
- dev.m_queue.throw_asynchronous();
- return false;
- }
-};
-
-} // end namespace internal
-} // namespace Eigen
-
-#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
deleted file mode 100644
index 99245f7..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h
+++ /dev/null
@@ -1,429 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_REF_H
-#define EIGEN_CXX11_TENSOR_TENSOR_REF_H
-
-namespace Eigen {
-
-namespace internal {
-
-template <typename Dimensions, typename Scalar>
-class TensorLazyBaseEvaluator {
- public:
- TensorLazyBaseEvaluator() : m_refcount(0) { }
- virtual ~TensorLazyBaseEvaluator() { }
-
- EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const = 0;
- EIGEN_DEVICE_FUNC virtual const Scalar* data() const = 0;
-
- EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const = 0;
- EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) = 0;
-
- void incrRefCount() { ++m_refcount; }
- void decrRefCount() { --m_refcount; }
- int refCount() const { return m_refcount; }
-
- private:
- // No copy, no assigment;
- TensorLazyBaseEvaluator(const TensorLazyBaseEvaluator& other);
- TensorLazyBaseEvaluator& operator = (const TensorLazyBaseEvaluator& other);
-
- int m_refcount;
-};
-
-
-template <typename Dimensions, typename Expr, typename Device>
-class TensorLazyEvaluatorReadOnly : public TensorLazyBaseEvaluator<Dimensions, typename TensorEvaluator<Expr, Device>::Scalar> {
- public:
- // typedef typename TensorEvaluator<Expr, Device>::Dimensions Dimensions;
- typedef typename TensorEvaluator<Expr, Device>::Scalar Scalar;
-
- TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) {
- m_dims = m_impl.dimensions();
- m_impl.evalSubExprsIfNeeded(NULL);
- }
- virtual ~TensorLazyEvaluatorReadOnly() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC virtual const Dimensions& dimensions() const {
- return m_dims;
- }
- EIGEN_DEVICE_FUNC virtual const Scalar* data() const {
- return m_impl.data();
- }
-
- EIGEN_DEVICE_FUNC virtual const Scalar coeff(DenseIndex index) const {
- return m_impl.coeff(index);
- }
- EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex /*index*/) {
- eigen_assert(false && "can't reference the coefficient of a rvalue");
- return m_dummy;
- };
-
- protected:
- TensorEvaluator<Expr, Device> m_impl;
- Dimensions m_dims;
- Scalar m_dummy;
-};
-
-template <typename Dimensions, typename Expr, typename Device>
-class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> {
- public:
- typedef TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> Base;
- typedef typename Base::Scalar Scalar;
-
- TensorLazyEvaluatorWritable(const Expr& expr, const Device& device) : Base(expr, device) {
- }
- virtual ~TensorLazyEvaluatorWritable() {
- }
-
- EIGEN_DEVICE_FUNC virtual Scalar& coeffRef(DenseIndex index) {
- return this->m_impl.coeffRef(index);
- }
-};
-
-template <typename Dimensions, typename Expr, typename Device>
-class TensorLazyEvaluator : public internal::conditional<bool(internal::is_lvalue<Expr>::value),
- TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
- TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type {
- public:
- typedef typename internal::conditional<bool(internal::is_lvalue<Expr>::value),
- TensorLazyEvaluatorWritable<Dimensions, Expr, Device>,
- TensorLazyEvaluatorReadOnly<Dimensions, const Expr, Device> >::type Base;
- typedef typename Base::Scalar Scalar;
-
- TensorLazyEvaluator(const Expr& expr, const Device& device) : Base(expr, device) {
- }
- virtual ~TensorLazyEvaluator() {
- }
-};
-
-} // namespace internal
-
-
-/** \class TensorRef
- * \ingroup CXX11_Tensor_Module
- *
- * \brief A reference to a tensor expression
- * The expression will be evaluated lazily (as much as possible).
- *
- */
-template<typename PlainObjectType> class TensorRef : public TensorBase<TensorRef<PlainObjectType> >
-{
- public:
- typedef TensorRef<PlainObjectType> Self;
- typedef typename PlainObjectType::Base Base;
- typedef typename Eigen::internal::nested<Self>::type Nested;
- typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
- typedef typename internal::traits<PlainObjectType>::Index Index;
- typedef typename internal::traits<PlainObjectType>::Scalar Scalar;
- typedef typename NumTraits<Scalar>::Real RealScalar;
- typedef typename Base::CoeffReturnType CoeffReturnType;
- typedef Scalar* PointerType;
- typedef PointerType PointerArgType;
-
- static const Index NumIndices = PlainObjectType::NumIndices;
- typedef typename PlainObjectType::Dimensions Dimensions;
-
- enum {
- IsAligned = false,
- PacketAccess = false,
- Layout = PlainObjectType::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_STRONG_INLINE TensorRef() : m_evaluator(NULL) {
- }
-
- template <typename Expression>
- EIGEN_STRONG_INLINE TensorRef(const Expression& expr) : m_evaluator(new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice())) {
- m_evaluator->incrRefCount();
- }
-
- template <typename Expression>
- EIGEN_STRONG_INLINE TensorRef& operator = (const Expression& expr) {
- unrefEvaluator();
- m_evaluator = new internal::TensorLazyEvaluator<Dimensions, Expression, DefaultDevice>(expr, DefaultDevice());
- m_evaluator->incrRefCount();
- return *this;
- }
-
- ~TensorRef() {
- unrefEvaluator();
- }
-
- TensorRef(const TensorRef& other) : m_evaluator(other.m_evaluator) {
- eigen_assert(m_evaluator->refCount() > 0);
- m_evaluator->incrRefCount();
- }
-
- TensorRef& operator = (const TensorRef& other) {
- if (this != &other) {
- unrefEvaluator();
- m_evaluator = other.m_evaluator;
- eigen_assert(m_evaluator->refCount() > 0);
- m_evaluator->incrRefCount();
- }
- return *this;
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Index rank() const { return m_evaluator->dimensions().size(); }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_evaluator->dimensions()[n]; }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_evaluator->dimensions(); }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Index size() const { return m_evaluator->dimensions().TotalSize(); }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar* data() const { return m_evaluator->data(); }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar operator()(Index index) const
- {
- return m_evaluator->coeff(index);
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template<typename... IndexTypes> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar operator()(Index firstIndex, IndexTypes... otherIndices) const
- {
- const std::size_t num_indices = (sizeof...(otherIndices) + 1);
- const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
- return coeff(indices);
- }
- template<typename... IndexTypes> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices)
- {
- const std::size_t num_indices = (sizeof...(otherIndices) + 1);
- const array<Index, num_indices> indices{{firstIndex, otherIndices...}};
- return coeffRef(indices);
- }
-#else
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1) const
- {
- array<Index, 2> indices;
- indices[0] = i0;
- indices[1] = i1;
- return coeff(indices);
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2) const
- {
- array<Index, 3> indices;
- indices[0] = i0;
- indices[1] = i1;
- indices[2] = i2;
- return coeff(indices);
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3) const
- {
- array<Index, 4> indices;
- indices[0] = i0;
- indices[1] = i1;
- indices[2] = i2;
- indices[3] = i3;
- return coeff(indices);
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
- {
- array<Index, 5> indices;
- indices[0] = i0;
- indices[1] = i1;
- indices[2] = i2;
- indices[3] = i3;
- indices[4] = i4;
- return coeff(indices);
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1)
- {
- array<Index, 2> indices;
- indices[0] = i0;
- indices[1] = i1;
- return coeffRef(indices);
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2)
- {
- array<Index, 3> indices;
- indices[0] = i0;
- indices[1] = i1;
- indices[2] = i2;
- return coeffRef(indices);
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
- {
- array<Index, 4> indices;
- indices[0] = i0;
- indices[1] = i1;
- indices[2] = i2;
- indices[3] = i3;
- return coeffRef(indices);
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef(Index i0, Index i1, Index i2, Index i3, Index i4)
- {
- array<Index, 5> indices;
- indices[0] = i0;
- indices[1] = i1;
- indices[2] = i2;
- indices[3] = i3;
- indices[4] = i4;
- return coeffRef(indices);
- }
-#endif
-
- template <std::size_t NumIndices> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar coeff(const array<Index, NumIndices>& indices) const
- {
- const Dimensions& dims = this->dimensions();
- Index index = 0;
- if (PlainObjectType::Options & RowMajor) {
- index += indices[0];
- for (size_t i = 1; i < NumIndices; ++i) {
- index = index * dims[i] + indices[i];
- }
- } else {
- index += indices[NumIndices-1];
- for (int i = NumIndices-2; i >= 0; --i) {
- index = index * dims[i] + indices[i];
- }
- }
- return m_evaluator->coeff(index);
- }
- template <std::size_t NumIndices> EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef(const array<Index, NumIndices>& indices)
- {
- const Dimensions& dims = this->dimensions();
- Index index = 0;
- if (PlainObjectType::Options & RowMajor) {
- index += indices[0];
- for (size_t i = 1; i < NumIndices; ++i) {
- index = index * dims[i] + indices[i];
- }
- } else {
- index += indices[NumIndices-1];
- for (int i = NumIndices-2; i >= 0; --i) {
- index = index * dims[i] + indices[i];
- }
- }
- return m_evaluator->coeffRef(index);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
- {
- return m_evaluator->coeff(index);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
- {
- return m_evaluator->coeffRef(index);
- }
-
- private:
- EIGEN_STRONG_INLINE void unrefEvaluator() {
- if (m_evaluator) {
- m_evaluator->decrRefCount();
- if (m_evaluator->refCount() == 0) {
- delete m_evaluator;
- }
- }
- }
-
- internal::TensorLazyBaseEvaluator<Dimensions, Scalar>* m_evaluator;
-};
-
-
-// evaluator for rvalues
-template<typename Derived, typename Device>
-struct TensorEvaluator<const TensorRef<Derived>, Device>
-{
- typedef typename Derived::Index Index;
- typedef typename Derived::Scalar Scalar;
- typedef typename Derived::Scalar CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef typename Derived::Dimensions Dimensions;
-
- enum {
- IsAligned = false,
- PacketAccess = false,
- Layout = TensorRef<Derived>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const TensorRef<Derived>& m, const Device&)
- : m_ref(m)
- { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
- return m_ref.coeff(index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
- return m_ref.coeffRef(index);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return m_ref.data(); }
-
- protected:
- TensorRef<Derived> m_ref;
-};
-
-
-// evaluator for lvalues
-template<typename Derived, typename Device>
-struct TensorEvaluator<TensorRef<Derived>, Device> : public TensorEvaluator<const TensorRef<Derived>, Device>
-{
- typedef typename Derived::Index Index;
- typedef typename Derived::Scalar Scalar;
- typedef typename Derived::Scalar CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef typename Derived::Dimensions Dimensions;
-
- typedef TensorEvaluator<const TensorRef<Derived>, Device> Base;
-
- enum {
- IsAligned = false,
- PacketAccess = false,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(TensorRef<Derived>& m, const Device& d) : Base(m, d)
- { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
- return this->m_ref.coeffRef(index);
- }
-};
-
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_REF_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
deleted file mode 100644
index 14e392e..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
+++ /dev/null
@@ -1,288 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com>
-// Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
-#define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
-namespace Eigen {
-
-/** \class TensorReverse
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor reverse elements class.
- *
- */
-namespace internal {
-template<typename ReverseDimensions, typename XprType>
-struct traits<TensorReverseOp<ReverseDimensions,
- XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename ReverseDimensions, typename XprType>
-struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense>
-{
- typedef const TensorReverseOp<ReverseDimensions, XprType>& type;
-};
-
-template<typename ReverseDimensions, typename XprType>
-struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1,
- typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type>
-{
- typedef TensorReverseOp<ReverseDimensions, XprType> type;
-};
-
-} // end namespace internal
-
-template<typename ReverseDimensions, typename XprType>
-class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
- XprType>, WriteAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind
- StorageKind;
- typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(
- const XprType& expr, const ReverseDimensions& reverse_dims)
- : m_xpr(expr), m_reverse_dims(reverse_dims) { }
-
- EIGEN_DEVICE_FUNC
- const ReverseDimensions& reverse() const { return m_reverse_dims; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorReverseOp& operator = (const TensorReverseOp& other)
- {
- typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorReverseOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- protected:
- typename XprType::Nested m_xpr;
- const ReverseDimensions m_reverse_dims;
-};
-
-// Eval as rvalue
-template<typename ReverseDimensions, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device>
-{
- typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<ReverseDimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
- const Device& device)
- : m_impl(op.expression(), device), m_reverse(op.reverse())
- {
- // Reversing a scalar isn't supported yet. It would be a no-op anyway.
- EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- // Compute strides
- m_dimensions = m_impl.dimensions();
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_strides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_strides[i] = m_strides[i-1] * m_dimensions[i-1];
- }
- } else {
- m_strides[NumDims-1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_strides[i] = m_strides[i+1] * m_dimensions[i+1];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex(
- Index index) const {
- eigen_assert(index < dimensions().TotalSize());
- Index inputIndex = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- Index idx = index / m_strides[i];
- index -= idx * m_strides[i];
- if (m_reverse[i]) {
- idx = m_dimensions[i] - idx - 1;
- }
- inputIndex += idx * m_strides[i] ;
- }
- if (m_reverse[0]) {
- inputIndex += (m_dimensions[0] - index - 1);
- } else {
- inputIndex += index;
- }
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- Index idx = index / m_strides[i];
- index -= idx * m_strides[i];
- if (m_reverse[i]) {
- idx = m_dimensions[i] - idx - 1;
- }
- inputIndex += idx * m_strides[i] ;
- }
- if (m_reverse[NumDims-1]) {
- inputIndex += (m_dimensions[NumDims-1] - index - 1);
- } else {
- inputIndex += index;
- }
- }
- return inputIndex;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
- Index index) const {
- return m_impl.coeff(reverseIndex(index));
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- PacketReturnType packet(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- // TODO(ndjaitly): write a better packing routine that uses
- // local structure.
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
- values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
- 2 * TensorOpCost::MulCost<Index>() +
- TensorOpCost::DivCost<Index>());
- for (int i = 0; i < NumDims; ++i) {
- if (m_reverse[i]) {
- compute_cost += 2 * TensorOpCost::AddCost<Index>();
- }
- }
- return m_impl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
- Dimensions m_dimensions;
- array<Index, NumDims> m_strides;
- TensorEvaluator<ArgType, Device> m_impl;
- ReverseDimensions m_reverse;
-};
-
-// Eval as lvalue
-
-template <typename ReverseDimensions, typename ArgType, typename Device>
-struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
- : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
- Device> {
- typedef TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
- Device> Base;
- typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<ReverseDimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
-
- enum {
- IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
- const Device& device)
- : Base(op, device) {}
-
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const Dimensions& dimensions() const { return this->m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
- return this->m_impl.coeffRef(this->reverseIndex(index));
- }
-
- template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void writePacket(Index index, const PacketReturnType& x) {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- // This code is pilfered from TensorMorphing.h
- EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
- internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
- for (int i = 0; i < PacketSize; ++i) {
- this->coeffRef(index+i) = values[i];
- }
- }
-
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
deleted file mode 100644
index 8501466..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ /dev/null
@@ -1,287 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016 Igor Babuschkin <igor@babuschk.in>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
-#define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
-
-namespace Eigen {
-
-namespace internal {
-
-template <typename Op, typename XprType>
-struct traits<TensorScanOp<Op, XprType> >
- : public traits<XprType> {
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename Op, typename XprType>
-struct eval<TensorScanOp<Op, XprType>, Eigen::Dense>
-{
- typedef const TensorScanOp<Op, XprType>& type;
-};
-
-template<typename Op, typename XprType>
-struct nested<TensorScanOp<Op, XprType>, 1,
- typename eval<TensorScanOp<Op, XprType> >::type>
-{
- typedef TensorScanOp<Op, XprType> type;
-};
-} // end namespace internal
-
-/** \class TensorScan
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor scan class.
- */
-template <typename Op, typename XprType>
-class TensorScanOp
- : public TensorBase<TensorScanOp<Op, XprType>, ReadOnlyAccessors> {
-public:
- typedef typename Eigen::internal::traits<TensorScanOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorScanOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorScanOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorScanOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp(
- const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op())
- : m_expr(expr), m_axis(axis), m_accumulator(op), m_exclusive(exclusive) {}
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const Index axis() const { return m_axis; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const XprType& expression() const { return m_expr; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const Op accumulator() const { return m_accumulator; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- bool exclusive() const { return m_exclusive; }
-
-protected:
- typename XprType::Nested m_expr;
- const Index m_axis;
- const Op m_accumulator;
- const bool m_exclusive;
-};
-
-template <typename Self, typename Reducer, typename Device>
-struct ScanLauncher;
-
-// Eval as rvalue
-template <typename Op, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
-
- typedef TensorScanOp<Op, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self;
-
- enum {
- IsAligned = false,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
- BlockAccess = false,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false,
- RawAccess = true
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
- const Device& device)
- : m_impl(op.expression(), device),
- m_device(device),
- m_exclusive(op.exclusive()),
- m_accumulator(op.accumulator()),
- m_size(m_impl.dimensions()[op.axis()]),
- m_stride(1),
- m_output(NULL) {
-
- // Accumulating a scalar isn't supported.
- EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
- eigen_assert(op.axis() >= 0 && op.axis() < NumDims);
-
- // Compute stride of scan axis
- const Dimensions& dims = m_impl.dimensions();
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = 0; i < op.axis(); ++i) {
- m_stride = m_stride * dims[i];
- }
- } else {
- for (int i = NumDims - 1; i > op.axis(); --i) {
- m_stride = m_stride * dims[i];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
- return m_impl.dimensions();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& stride() const {
- return m_stride;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& size() const {
- return m_size;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op& accumulator() const {
- return m_accumulator;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool exclusive() const {
- return m_exclusive;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& inner() const {
- return m_impl;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const {
- return m_device;
- }
-
- EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
- m_impl.evalSubExprsIfNeeded(NULL);
- ScanLauncher<Self, Op, Device> launcher;
- if (data) {
- launcher(*this, data);
- return false;
- }
-
- const Index total_size = internal::array_prod(dimensions());
- m_output = static_cast<CoeffReturnType*>(m_device.allocate(total_size * sizeof(Scalar)));
- launcher(*this, m_output);
- return true;
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
- return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const
- {
- return m_output;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return m_output[index];
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const {
- return TensorOpCost(sizeof(CoeffReturnType), 0, 0);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- if (m_output != NULL) {
- m_device.deallocate(m_output);
- m_output = NULL;
- }
- m_impl.cleanup();
- }
-
-protected:
- TensorEvaluator<ArgType, Device> m_impl;
- const Device& m_device;
- const bool m_exclusive;
- Op m_accumulator;
- const Index m_size;
- Index m_stride;
- CoeffReturnType* m_output;
-};
-
-// CPU implementation of scan
-// TODO(ibab) This single-threaded implementation should be parallelized,
-// at least by running multiple scans at the same time.
-template <typename Self, typename Reducer, typename Device>
-struct ScanLauncher {
- void operator()(Self& self, typename Self::CoeffReturnType *data) {
- Index total_size = internal::array_prod(self.dimensions());
-
- // We fix the index along the scan axis to 0 and perform a
- // scan per remaining entry. The iteration is split into two nested
- // loops to avoid an integer division by keeping track of each idx1 and idx2.
- for (Index idx1 = 0; idx1 < total_size; idx1 += self.stride() * self.size()) {
- for (Index idx2 = 0; idx2 < self.stride(); idx2++) {
- // Calculate the starting offset for the scan
- Index offset = idx1 + idx2;
-
- // Compute the scan along the axis, starting at the calculated offset
- typename Self::CoeffReturnType accum = self.accumulator().initialize();
- for (Index idx3 = 0; idx3 < self.size(); idx3++) {
- Index curr = offset + idx3 * self.stride();
-
- if (self.exclusive()) {
- data[curr] = self.accumulator().finalize(accum);
- self.accumulator().reduce(self.inner().coeff(curr), &accum);
- } else {
- self.accumulator().reduce(self.inner().coeff(curr), &accum);
- data[curr] = self.accumulator().finalize(accum);
- }
- }
- }
- }
- }
-};
-
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
-
-// GPU implementation of scan
-// TODO(ibab) This placeholder implementation performs multiple scans in
-// parallel, but it would be better to use a parallel scan algorithm and
-// optimize memory access.
-template <typename Self, typename Reducer>
-__global__ void ScanKernel(Self self, Index total_size, typename Self::CoeffReturnType* data) {
- // Compute offset as in the CPU version
- Index val = threadIdx.x + blockIdx.x * blockDim.x;
- Index offset = (val / self.stride()) * self.stride() * self.size() + val % self.stride();
-
- if (offset + (self.size() - 1) * self.stride() < total_size) {
- // Compute the scan along the axis, starting at the calculated offset
- typename Self::CoeffReturnType accum = self.accumulator().initialize();
- for (Index idx = 0; idx < self.size(); idx++) {
- Index curr = offset + idx * self.stride();
- if (self.exclusive()) {
- data[curr] = self.accumulator().finalize(accum);
- self.accumulator().reduce(self.inner().coeff(curr), &accum);
- } else {
- self.accumulator().reduce(self.inner().coeff(curr), &accum);
- data[curr] = self.accumulator().finalize(accum);
- }
- }
- }
- __syncthreads();
-
-}
-
-template <typename Self, typename Reducer>
-struct ScanLauncher<Self, Reducer, GpuDevice> {
- void operator()(const Self& self, typename Self::CoeffReturnType* data) {
- Index total_size = internal::array_prod(self.dimensions());
- Index num_blocks = (total_size / self.size() + 63) / 64;
- Index block_size = 64;
- LAUNCH_CUDA_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0, self.device(), self, total_size, data);
- }
-};
-#endif // EIGEN_USE_GPU && __CUDACC__
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
deleted file mode 100644
index 113c060..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
-
-namespace Eigen {
-
-/** \class TensorShuffling
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor shuffling class.
- *
- *
- */
-namespace internal {
-template<typename Shuffle, typename XprType>
-struct traits<TensorShufflingOp<Shuffle, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename Shuffle, typename XprType>
-struct eval<TensorShufflingOp<Shuffle, XprType>, Eigen::Dense>
-{
- typedef const TensorShufflingOp<Shuffle, XprType>& type;
-};
-
-template<typename Shuffle, typename XprType>
-struct nested<TensorShufflingOp<Shuffle, XprType>, 1, typename eval<TensorShufflingOp<Shuffle, XprType> >::type>
-{
- typedef TensorShufflingOp<Shuffle, XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename Shuffle, typename XprType>
-class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType> >
-{
- public:
- typedef typename Eigen::internal::traits<TensorShufflingOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorShufflingOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle)
- : m_xpr(expr), m_shuffle(shuffle) {}
-
- EIGEN_DEVICE_FUNC
- const Shuffle& shufflePermutation() const { return m_shuffle; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorShufflingOp& operator = (const TensorShufflingOp& other)
- {
- typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorShufflingOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- protected:
- typename XprType::Nested m_xpr;
- const Shuffle m_shuffle;
-};
-
-
-// Eval as rvalue
-template<typename Shuffle, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
-{
- typedef TensorShufflingOp<Shuffle, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = (internal::packet_traits<Scalar>::size > 1),
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device)
- {
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
- const Shuffle& shuffle = op.shufflePermutation();
- for (int i = 0; i < NumDims; ++i) {
- m_dimensions[i] = input_dims[shuffle[i]];
- }
-
- array<Index, NumDims> inputStrides;
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- inputStrides[0] = 1;
- m_outputStrides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- inputStrides[i] = inputStrides[i - 1] * input_dims[i - 1];
- m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
- }
- } else {
- inputStrides[NumDims - 1] = 1;
- m_outputStrides[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1];
- m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
- }
- }
-
- for (int i = 0; i < NumDims; ++i) {
- m_inputStrides[i] = inputStrides[shuffle[i]];
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return m_impl.coeff(srcCoeff(index));
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
- 2 * TensorOpCost::MulCost<Index>() +
- TensorOpCost::DivCost<Index>());
- return m_impl.costPerCoeff(vectorized) +
- TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const {
- Index inputIndex = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_outputStrides[i];
- inputIndex += idx * m_inputStrides[i];
- index -= idx * m_outputStrides[i];
- }
- return inputIndex + index * m_inputStrides[0];
- } else {
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = index / m_outputStrides[i];
- inputIndex += idx * m_inputStrides[i];
- index -= idx * m_outputStrides[i];
- }
- return inputIndex + index * m_inputStrides[NumDims - 1];
- }
- }
-
- Dimensions m_dimensions;
- array<Index, NumDims> m_outputStrides;
- array<Index, NumDims> m_inputStrides;
- TensorEvaluator<ArgType, Device> m_impl;
-};
-
-
-// Eval as lvalue
-template<typename Shuffle, typename ArgType, typename Device>
-struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
- : public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
-{
- typedef TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> Base;
-
- typedef TensorShufflingOp<Shuffle, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = (internal::packet_traits<Scalar>::size > 1),
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : Base(op, device)
- { }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
- {
- return this->m_impl.coeffRef(this->srcCoeff(index));
- }
-
- template <int StoreMode> EIGEN_STRONG_INLINE
- void writePacket(Index index, const PacketReturnType& x)
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
-
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
- for (int i = 0; i < PacketSize; ++i) {
- this->coeffRef(index+i) = values[i];
- }
- }
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
deleted file mode 100644
index e6a666f..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-// Copyright (C) 2014-2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSORSTORAGE_H
-#define EIGEN_CXX11_TENSOR_TENSORSTORAGE_H
-
-#ifdef EIGEN_TENSOR_STORAGE_CTOR_PLUGIN
- #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN EIGEN_TENSOR_STORAGE_CTOR_PLUGIN;
-#else
- #define EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN
-#endif
-
-namespace Eigen {
-
-/** \internal
- *
- * \class TensorStorage
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Stores the data of a tensor
- *
- * This class stores the data of fixed-size, dynamic-size or mixed tensors
- * in a way as compact as possible.
- *
- * \sa Tensor
- */
-template<typename T, typename Dimensions, int Options> class TensorStorage;
-
-
-// Pure fixed-size storage
-template<typename T, typename FixedDimensions, int Options_>
-class TensorStorage
-{
- private:
- static const std::size_t Size = FixedDimensions::total_size;
-
- // Allocate an array of size at least one to prevent compiler warnings.
- static const std::size_t MinSize = max_n_1<Size>::size;
- EIGEN_ALIGN_MAX T m_data[MinSize];
-
- FixedDimensions m_dimensions;
-
- public:
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorStorage() {
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE T *data() { return m_data; }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const T *data() const { return m_data; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const FixedDimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE DenseIndex size() const { return m_dimensions.TotalSize(); }
-};
-
-
-// pure dynamic
-template<typename T, typename IndexType, int NumIndices_, int Options_>
-class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
-{
- public:
- typedef IndexType Index;
- typedef DSizes<IndexType, NumIndices_> Dimensions;
- typedef TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_> Self;
-
- EIGEN_DEVICE_FUNC TensorStorage() : m_data(0), m_dimensions() {
- if (NumIndices_ == 0) {
- m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(1);
- }
- }
- EIGEN_DEVICE_FUNC TensorStorage(internal::constructor_without_unaligned_array_assert)
- : m_data(0), m_dimensions(internal::template repeat<NumIndices_, Index>(0)) {}
- EIGEN_DEVICE_FUNC TensorStorage(Index size, const array<Index, NumIndices_>& dimensions)
- : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size)), m_dimensions(dimensions)
- { EIGEN_INTERNAL_TENSOR_STORAGE_CTOR_PLUGIN }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- template <typename... DenseIndex>
- EIGEN_DEVICE_FUNC TensorStorage(DenseIndex... indices) : m_dimensions(indices...) {
- m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(m_dimensions));
- }
-#endif
-
- EIGEN_DEVICE_FUNC TensorStorage(const Self& other)
- : m_data(internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(internal::array_prod(other.m_dimensions)))
- , m_dimensions(other.m_dimensions)
- {
- internal::smart_copy(other.m_data, other.m_data+internal::array_prod(other.m_dimensions), m_data);
- }
- EIGEN_DEVICE_FUNC Self& operator=(const Self& other)
- {
- if (this != &other) {
- Self tmp(other);
- this->swap(tmp);
- }
- return *this;
- }
-
- EIGEN_DEVICE_FUNC ~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); }
- EIGEN_DEVICE_FUNC void swap(Self& other)
- { numext::swap(m_data,other.m_data); numext::swap(m_dimensions,other.m_dimensions); }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {return m_dimensions;}
-
- EIGEN_DEVICE_FUNC void resize(Index size, const array<Index, NumIndices_>& nbDimensions)
- {
- const Index currentSz = internal::array_prod(m_dimensions);
- if(size != currentSz)
- {
- internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, currentSz);
- if (size)
- m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(size);
- else if (NumIndices_ == 0) {
- m_data = internal::conditional_aligned_new_auto<T,(Options_&DontAlign)==0>(1);
- }
- else
- m_data = 0;
- EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
- }
- m_dimensions = nbDimensions;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T *data() { return m_data; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T *data() const { return m_data; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); }
-
- private:
- T *m_data;
- Dimensions m_dimensions;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSORSTORAGE_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
deleted file mode 100644
index 6c35bfd..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h
+++ /dev/null
@@ -1,338 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
-#define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
-
-namespace Eigen {
-
-/** \class TensorStriding
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor striding class.
- *
- *
- */
-namespace internal {
-template<typename Strides, typename XprType>
-struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
-{
- typedef typename XprType::Scalar Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions;
- static const int Layout = XprTraits::Layout;
-};
-
-template<typename Strides, typename XprType>
-struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense>
-{
- typedef const TensorStridingOp<Strides, XprType>& type;
-};
-
-template<typename Strides, typename XprType>
-struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type>
-{
- typedef TensorStridingOp<Strides, XprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename Strides, typename XprType>
-class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
-{
- public:
- typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims)
- : m_xpr(expr), m_dims(dims) {}
-
- EIGEN_DEVICE_FUNC
- const Strides& strides() const { return m_dims; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorStridingOp& operator = (const TensorStridingOp& other)
- {
- typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorStridingOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- protected:
- typename XprType::Nested m_xpr;
- const Strides m_dims;
-};
-
-
-// Eval as rvalue
-template<typename Strides, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
-{
- typedef TensorStridingOp<Strides, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device)
- {
- m_dimensions = m_impl.dimensions();
- for (int i = 0; i < NumDims; ++i) {
- m_dimensions[i] = ceilf(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
- }
-
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_outputStrides[0] = 1;
- m_inputStrides[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
- m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
- m_inputStrides[i-1] *= op.strides()[i-1];
- }
- m_inputStrides[NumDims-1] *= op.strides()[NumDims-1];
- } else { // RowMajor
- m_outputStrides[NumDims-1] = 1;
- m_inputStrides[NumDims-1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
- m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
- m_inputStrides[i+1] *= op.strides()[i+1];
- }
- m_inputStrides[0] *= op.strides()[0];
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- return m_impl.coeff(srcCoeff(index));
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- Index inputIndices[] = {0, 0};
- Index indices[] = {index, index + PacketSize - 1};
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx0 = indices[0] / m_outputStrides[i];
- const Index idx1 = indices[1] / m_outputStrides[i];
- inputIndices[0] += idx0 * m_inputStrides[i];
- inputIndices[1] += idx1 * m_inputStrides[i];
- indices[0] -= idx0 * m_outputStrides[i];
- indices[1] -= idx1 * m_outputStrides[i];
- }
- inputIndices[0] += indices[0] * m_inputStrides[0];
- inputIndices[1] += indices[1] * m_inputStrides[0];
- } else { // RowMajor
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx0 = indices[0] / m_outputStrides[i];
- const Index idx1 = indices[1] / m_outputStrides[i];
- inputIndices[0] += idx0 * m_inputStrides[i];
- inputIndices[1] += idx1 * m_inputStrides[i];
- indices[0] -= idx0 * m_outputStrides[i];
- indices[1] -= idx1 * m_outputStrides[i];
- }
- inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
- inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
- }
- if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
- PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
- return rslt;
- }
- else {
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- values[0] = m_impl.coeff(inputIndices[0]);
- values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
- for (int i = 1; i < PacketSize-1; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
- double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
- TensorOpCost::MulCost<Index>() +
- TensorOpCost::DivCost<Index>()) +
- TensorOpCost::MulCost<Index>();
- if (vectorized) {
- compute_cost *= 2; // packet() computes two indices
- }
- const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1);
- return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
- // Computation is not vectorized per se, but it is done once per packet.
- TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- protected:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
- {
- Index inputIndex = 0;
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_outputStrides[i];
- inputIndex += idx * m_inputStrides[i];
- index -= idx * m_outputStrides[i];
- }
- inputIndex += index * m_inputStrides[0];
- } else { // RowMajor
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = index / m_outputStrides[i];
- inputIndex += idx * m_inputStrides[i];
- index -= idx * m_outputStrides[i];
- }
- inputIndex += index * m_inputStrides[NumDims-1];
- }
- return inputIndex;
- }
-
- Dimensions m_dimensions;
- array<Index, NumDims> m_outputStrides;
- array<Index, NumDims> m_inputStrides;
- TensorEvaluator<ArgType, Device> m_impl;
-};
-
-
-// Eval as lvalue
-template<typename Strides, typename ArgType, typename Device>
-struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
- : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
-{
- typedef TensorStridingOp<Strides, ArgType> XprType;
- typedef TensorEvaluator<const XprType, Device> Base;
- // typedef typename XprType::Index Index;
- static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- // typedef DSizes<Index, NumDims> Dimensions;
-
- enum {
- IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : Base(op, device) { }
-
- typedef typename XprType::Index Index;
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
- {
- return this->m_impl.coeffRef(this->srcCoeff(index));
- }
-
- template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void writePacket(Index index, const PacketReturnType& x)
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize());
-
- Index inputIndices[] = {0, 0};
- Index indices[] = {index, index + PacketSize - 1};
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = NumDims - 1; i > 0; --i) {
- const Index idx0 = indices[0] / this->m_outputStrides[i];
- const Index idx1 = indices[1] / this->m_outputStrides[i];
- inputIndices[0] += idx0 * this->m_inputStrides[i];
- inputIndices[1] += idx1 * this->m_inputStrides[i];
- indices[0] -= idx0 * this->m_outputStrides[i];
- indices[1] -= idx1 * this->m_outputStrides[i];
- }
- inputIndices[0] += indices[0] * this->m_inputStrides[0];
- inputIndices[1] += indices[1] * this->m_inputStrides[0];
- } else { // RowMajor
- for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx0 = indices[0] / this->m_outputStrides[i];
- const Index idx1 = indices[1] / this->m_outputStrides[i];
- inputIndices[0] += idx0 * this->m_inputStrides[i];
- inputIndices[1] += idx1 * this->m_inputStrides[i];
- indices[0] -= idx0 * this->m_outputStrides[i];
- indices[1] -= idx1 * this->m_outputStrides[i];
- }
- inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
- inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
- }
- if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
- this->m_impl.template writePacket<Unaligned>(inputIndices[0], x);
- }
- else {
- EIGEN_ALIGN_MAX Scalar values[PacketSize];
- internal::pstore<Scalar, PacketReturnType>(values, x);
- this->m_impl.coeffRef(inputIndices[0]) = values[0];
- this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
- for (int i = 1; i < PacketSize-1; ++i) {
- this->coeffRef(index+i) = values[i];
- }
- }
- }
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h
deleted file mode 100644
index bb8800d..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: eigen@codeplay.com
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-// General include header of SYCL target for Tensor Module
-#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H
-#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H
-
-#ifdef EIGEN_USE_SYCL
-
-// global pointer to set different attribute state for a class
-template <class T>
-struct MakeGlobalPointer {
- typedef typename cl::sycl::global_ptr<T>::pointer_t Type;
-};
-
-// global pointer to set different attribute state for a class
-template <class T>
-struct MakeLocalPointer {
- typedef typename cl::sycl::local_ptr<T>::pointer_t Type;
-};
-
-
-namespace Eigen {
-namespace TensorSycl {
-namespace internal {
-
-/// This struct is used for special expression nodes with no operations (for example assign and selectOP).
- struct NoOP;
-
-template<bool IsConst, typename T> struct GetType{
- typedef const T Type;
-};
-template<typename T> struct GetType<false, T>{
- typedef T Type;
-};
-
-}
-}
-}
-
-// tuple construction
-#include "TensorSyclTuple.h"
-
-// counting number of leaf at compile time
-#include "TensorSyclLeafCount.h"
-
-// The index PlaceHolder takes the actual expression and replaces the actual
-// data on it with the place holder. It uses the same pre-order expression tree
-// traverse as the leaf count in order to give the right access number to each
-// node in the expression
-#include "TensorSyclPlaceHolderExpr.h"
-
-// creation of an accessor tuple from a tuple of SYCL buffers
-#include "TensorSyclExtractAccessor.h"
-
-// this is used to change the address space type in tensor map for GPU
-#include "TensorSyclConvertToDeviceExpression.h"
-
-// this is used to extract the functors
-#include "TensorSyclExtractFunctors.h"
-
-// this is used to create tensormap on the device
-// this is used to construct the expression on the device
-#include "TensorSyclExprConstructor.h"
-
-/// this is used for extracting tensor reduction
-#include "TensorReductionSycl.h"
-
-// kernel execution using fusion
-#include "TensorSyclRun.h"
-
-#endif // end of EIGEN_USE_SYCL
-#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
deleted file mode 100644
index 8729c86..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
+++ /dev/null
@@ -1,121 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*****************************************************************
- * TensorSyclConvertToDeviceExpression.h
- *
- * \brief:
- * Conversion from host pointer to device pointer
- * inside leaf nodes of the expression.
- *
-*****************************************************************/
-
-#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP
-#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP
-
-namespace Eigen {
-namespace TensorSycl {
-namespace internal {
-
-/// \struct ConvertToDeviceExpression
-/// \brief This struct is used to convert the MakePointer in the host expression
-/// to the MakeGlobalPointer for the device expression. For the leafNodes
-/// containing the pointer. This is due to the fact that the address space of
-/// the pointer T* is different on the host and the device.
-template <typename Expr>
-struct ConvertToDeviceExpression;
-
-template<template<class...> class NonOpCategory, bool IsConst, typename... Args>
-struct NonOpConversion{
- typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type...> >::Type Type;
-};
-
-
-template<template<class, template <class> class > class NonOpCategory, bool IsConst, typename Args>
-struct DeviceConvertor{
- typedef typename GetType<IsConst, NonOpCategory<typename ConvertToDeviceExpression<Args>::Type, MakeGlobalPointer> >::Type Type;
-};
-
-/// specialisation of the \ref ConvertToDeviceExpression struct when the node
-/// type is TensorMap
-#define TENSORMAPCONVERT(CVQual)\
-template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_>\
-struct ConvertToDeviceExpression<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_> > {\
- typedef CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer> Type;\
-};
-
-TENSORMAPCONVERT(const)
-TENSORMAPCONVERT()
-#undef TENSORMAPCONVERT
-
-/// specialisation of the \ref ConvertToDeviceExpression struct when the node
-/// type is TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, TensorBroadcastingOp
-#define CATEGORYCONVERT(CVQual)\
-template <template<class, class...> class Category, typename OP, typename... subExprs>\
-struct ConvertToDeviceExpression<CVQual Category<OP, subExprs...> > {\
- typedef CVQual Category<OP, typename ConvertToDeviceExpression<subExprs>::Type... > Type;\
-};
-CATEGORYCONVERT(const)
-CATEGORYCONVERT()
-#undef CATEGORYCONVERT
-
-
-/// specialisation of the \ref ConvertToDeviceExpression struct when the node
-/// type is TensorCwiseSelectOp
-#define SELECTOPCONVERT(CVQual, Res)\
-template <typename IfExpr, typename ThenExpr, typename ElseExpr>\
-struct ConvertToDeviceExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >\
-: NonOpConversion<TensorSelectOp, Res, IfExpr, ThenExpr, ElseExpr> {};
-SELECTOPCONVERT(const, true)
-SELECTOPCONVERT(, false)
-#undef SELECTOPCONVERT
-
-/// specialisation of the \ref ConvertToDeviceExpression struct when the node
-/// type is const AssingOP
-#define ASSIGNCONVERT(CVQual, Res)\
-template <typename LHSExpr, typename RHSExpr>\
-struct ConvertToDeviceExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr> >\
-: NonOpConversion<TensorAssignOp, Res, LHSExpr, RHSExpr>{};
-
-ASSIGNCONVERT(const, true)
-ASSIGNCONVERT(, false)
-#undef ASSIGNCONVERT
-
-/// specialisation of the \ref ConvertToDeviceExpression struct when the node
-/// type is either TensorForcedEvalOp or TensorEvalToOp
-#define KERNELBROKERCONVERT(CVQual, Res, ExprNode)\
-template <typename Expr>\
-struct ConvertToDeviceExpression<CVQual ExprNode<Expr> > \
-: DeviceConvertor<ExprNode, Res, Expr>{};
-
-KERNELBROKERCONVERT(const, true, TensorForcedEvalOp)
-KERNELBROKERCONVERT(, false, TensorForcedEvalOp)
-KERNELBROKERCONVERT(const, true, TensorEvalToOp)
-KERNELBROKERCONVERT(, false, TensorEvalToOp)
-#undef KERNELBROKERCONVERT
-
-/// specialisation of the \ref ConvertToDeviceExpression struct when the node type is TensorReductionOp
-#define KERNELBROKERCONVERTREDUCTION(CVQual)\
-template <typename OP, typename Dim, typename subExpr, template <class> class MakePointer_>\
-struct ConvertToDeviceExpression<CVQual TensorReductionOp<OP, Dim, subExpr, MakePointer_> > {\
- typedef CVQual TensorReductionOp<OP, Dim, typename ConvertToDeviceExpression<subExpr>::Type, MakeGlobalPointer> Type;\
-};
-
-KERNELBROKERCONVERTREDUCTION(const)
-KERNELBROKERCONVERTREDUCTION()
-#undef KERNELBROKERCONVERTREDUCTION
-
-} // namespace internal
-} // namespace TensorSycl
-} // namespace Eigen
-
-#endif // UNSUPPORTED_EIGEN_CXX1
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
deleted file mode 100644
index 983f631..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
+++ /dev/null
@@ -1,239 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*****************************************************************
- * TensorSyclExprConstructor.h
- *
- * \brief:
- * This file re-create an expression on the SYCL device in order
- * to use the original tensor evaluator.
- *
-*****************************************************************/
-
-#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP
-#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP
-
-namespace Eigen {
-namespace TensorSycl {
-namespace internal {
-/// this class is used by EvalToOp in order to create an lhs expression which is
-/// a pointer from an accessor on device-only buffer
-template <typename PtrType, size_t N, typename... Params>
-struct EvalToLHSConstructor {
- PtrType expr;
- EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t): expr((&(*(utility::tuple::get<N>(t).get_pointer())))) {}
-};
-
-/// struct ExprConstructor is used to reconstruct the expression on the device and
-/// recreate the expression with MakeGlobalPointer containing the device address
-/// space for the TensorMap pointers used in eval function.
-/// It receives the original expression type, the functor of the node, the tuple
-/// of accessors, and the device expression type to re-instantiate the
-/// expression tree for the device
-template <typename OrigExpr, typename IndexExpr, typename... Params>
-struct ExprConstructor;
-
-/// specialisation of the \ref ExprConstructor struct when the node type is
-/// TensorMap
-#define TENSORMAP(CVQual)\
-template <typename Scalar_, int Options_, int Options2_, int Options3_, int NumIndices_, typename IndexType_,\
-template <class> class MakePointer_, size_t N, typename... Params>\
-struct ExprConstructor< CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer>,\
-CVQual PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options3_, MakePointer_>, N>, Params...>{\
- typedef CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakeGlobalPointer> Type;\
- Type expr;\
- template <typename FuncDetector>\
- ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
- : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\
-};
-
-TENSORMAP(const)
-TENSORMAP()
-#undef TENSORMAP
-
-#define UNARYCATEGORY(CVQual)\
-template <template<class, class> class UnaryCategory, typename OP, typename OrigRHSExpr, typename RHSExpr, typename... Params>\
-struct ExprConstructor<CVQual UnaryCategory<OP, OrigRHSExpr>, CVQual UnaryCategory<OP, RHSExpr>, Params...> {\
- typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_type;\
- my_type rhsExpr;\
- typedef CVQual UnaryCategory<OP, typename my_type::Type> Type;\
- Type expr;\
- template <typename FuncDetector>\
- ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
- : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}\
-};
-
-UNARYCATEGORY(const)
-UNARYCATEGORY()
-#undef UNARYCATEGORY
-
-/// specialisation of the \ref ExprConstructor struct when the node type is
-/// TensorBinaryOp
-#define BINARYCATEGORY(CVQual)\
-template <template<class, class, class> class BinaryCategory, typename OP, typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr,\
-typename RHSExpr, typename... Params>\
-struct ExprConstructor<CVQual BinaryCategory<OP, OrigLHSExpr, OrigRHSExpr>, CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Params...> {\
- typedef ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\
- typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\
- typedef CVQual BinaryCategory<OP, typename my_left_type::Type, typename my_right_type::Type> Type;\
- my_left_type lhsExpr;\
- my_right_type rhsExpr;\
- Type expr;\
- template <typename FuncDetector>\
- ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
- : lhsExpr(funcD.lhsExpr, t),rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {}\
-};
-
-BINARYCATEGORY(const)
-BINARYCATEGORY()
-#undef BINARYCATEGORY
-
-/// specialisation of the \ref ExprConstructor struct when the node type is
-/// TensorCwiseTernaryOp
-#define TERNARYCATEGORY(CVQual)\
-template <template <class, class, class, class> class TernaryCategory, typename OP, typename OrigArg1Expr, typename OrigArg2Expr,typename OrigArg3Expr,\
-typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename... Params>\
-struct ExprConstructor<CVQual TernaryCategory<OP, OrigArg1Expr, OrigArg2Expr, OrigArg3Expr>, CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Params...> {\
- typedef ExprConstructor<OrigArg1Expr, Arg1Expr, Params...> my_arg1_type;\
- typedef ExprConstructor<OrigArg2Expr, Arg2Expr, Params...> my_arg2_type;\
- typedef ExprConstructor<OrigArg3Expr, Arg3Expr, Params...> my_arg3_type;\
- typedef CVQual TernaryCategory<OP, typename my_arg1_type::Type, typename my_arg2_type::Type, typename my_arg3_type::Type> Type;\
- my_arg1_type arg1Expr;\
- my_arg2_type arg2Expr;\
- my_arg3_type arg3Expr;\
- Type expr;\
- template <typename FuncDetector>\
- ExprConstructor(FuncDetector &funcD,const utility::tuple::Tuple<Params...> &t)\
- : arg1Expr(funcD.arg1Expr, t), arg2Expr(funcD.arg2Expr, t), arg3Expr(funcD.arg3Expr, t), expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {}\
-};
-
-TERNARYCATEGORY(const)
-TERNARYCATEGORY()
-#undef TERNARYCATEGORY
-
-/// specialisation of the \ref ExprConstructor struct when the node type is
-/// TensorCwiseSelectOp
-#define SELECTOP(CVQual)\
-template <typename OrigIfExpr, typename OrigThenExpr, typename OrigElseExpr, typename IfExpr, typename ThenExpr, typename ElseExpr, typename... Params>\
-struct ExprConstructor< CVQual TensorSelectOp<OrigIfExpr, OrigThenExpr, OrigElseExpr>, CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Params...> {\
- typedef ExprConstructor<OrigIfExpr, IfExpr, Params...> my_if_type;\
- typedef ExprConstructor<OrigThenExpr, ThenExpr, Params...> my_then_type;\
- typedef ExprConstructor<OrigElseExpr, ElseExpr, Params...> my_else_type;\
- typedef CVQual TensorSelectOp<typename my_if_type::Type, typename my_then_type::Type, typename my_else_type::Type> Type;\
- my_if_type ifExpr;\
- my_then_type thenExpr;\
- my_else_type elseExpr;\
- Type expr;\
- template <typename FuncDetector>\
- ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
- : ifExpr(funcD.ifExpr, t), thenExpr(funcD.thenExpr, t), elseExpr(funcD.elseExpr, t), expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {}\
-};
-
-SELECTOP(const)
-SELECTOP()
-#undef SELECTOP
-
-/// specialisation of the \ref ExprConstructor struct when the node type is
-/// const TensorAssignOp
-#define ASSIGN(CVQual)\
-template <typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr, typename RHSExpr, typename... Params>\
-struct ExprConstructor<CVQual TensorAssignOp<OrigLHSExpr, OrigRHSExpr>, CVQual TensorAssignOp<LHSExpr, RHSExpr>, Params...> {\
- typedef ExprConstructor<OrigLHSExpr, LHSExpr, Params...> my_left_type;\
- typedef ExprConstructor<OrigRHSExpr, RHSExpr, Params...> my_right_type;\
- typedef CVQual TensorAssignOp<typename my_left_type::Type, typename my_right_type::Type> Type;\
- my_left_type lhsExpr;\
- my_right_type rhsExpr;\
- Type expr;\
- template <typename FuncDetector>\
- ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
- : lhsExpr(funcD.lhsExpr, t), rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr) {}\
- };
-
- ASSIGN(const)
- ASSIGN()
- #undef ASSIGN
-/// specialisation of the \ref ExprConstructor struct when the node type is
-/// TensorEvalToOp
-#define EVALTO(CVQual)\
-template <typename OrigExpr, typename Expr, typename... Params>\
-struct ExprConstructor<CVQual TensorEvalToOp<OrigExpr, MakeGlobalPointer>, CVQual TensorEvalToOp<Expr>, Params...> {\
- typedef ExprConstructor<OrigExpr, Expr, Params...> my_expr_type;\
- typedef typename TensorEvalToOp<OrigExpr, MakeGlobalPointer>::PointerType my_buffer_type;\
- typedef CVQual TensorEvalToOp<typename my_expr_type::Type, MakeGlobalPointer> Type;\
- my_expr_type nestedExpression;\
- EvalToLHSConstructor<my_buffer_type, 0, Params...> buffer;\
- Type expr;\
- template <typename FuncDetector>\
- ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
- : nestedExpression(funcD.rhsExpr, t), buffer(t), expr(buffer.expr, nestedExpression.expr) {}\
-};
-
-EVALTO(const)
-EVALTO()
-#undef EVALTO
-
-/// specialisation of the \ref ExprConstructor struct when the node type is
-/// TensorForcedEvalOp
-#define FORCEDEVAL(CVQual)\
-template <typename OrigExpr, typename DevExpr, size_t N, typename... Params>\
-struct ExprConstructor<CVQual TensorForcedEvalOp<OrigExpr, MakeGlobalPointer>,\
-CVQual PlaceHolder<CVQual TensorForcedEvalOp<DevExpr>, N>, Params...> {\
- typedef CVQual TensorMap<Tensor<typename TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::Scalar,\
- TensorForcedEvalOp<DevExpr, MakeGlobalPointer>::NumDimensions, 0, typename TensorForcedEvalOp<DevExpr>::Index>, 0, MakeGlobalPointer> Type;\
- Type expr;\
- template <typename FuncDetector>\
- ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
- : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\
-};
-
-FORCEDEVAL(const)
-FORCEDEVAL()
-#undef FORCEDEVAL
-
-template <bool Conds, size_t X , size_t Y > struct ValueCondition {
- static const size_t Res =X;
-};
-template<size_t X, size_t Y> struct ValueCondition<false, X , Y> {
- static const size_t Res =Y;
-};
-
-/// specialisation of the \ref ExprConstructor struct when the node type is TensorReductionOp
-#define SYCLREDUCTIONEXPR(CVQual)\
-template <typename OP, typename Dim, typename OrigExpr, typename DevExpr, size_t N, typename... Params>\
-struct ExprConstructor<CVQual TensorReductionOp<OP, Dim, OrigExpr, MakeGlobalPointer>,\
-CVQual PlaceHolder<CVQual TensorReductionOp<OP, Dim, DevExpr>, N>, Params...> {\
- static const size_t NumIndices= ValueCondition< TensorReductionOp<OP, Dim, DevExpr, MakeGlobalPointer>::NumDimensions==0, 1, TensorReductionOp<OP, Dim, DevExpr, MakeGlobalPointer>::NumDimensions >::Res;\
- typedef CVQual TensorMap<Tensor<typename TensorReductionOp<OP, Dim, DevExpr, MakeGlobalPointer>::Scalar,\
- NumIndices, 0, typename TensorReductionOp<OP, Dim, DevExpr>::Index>, 0, MakeGlobalPointer> Type;\
- Type expr;\
- template <typename FuncDetector>\
- ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
- : expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}\
-};
-
-SYCLREDUCTIONEXPR(const)
-SYCLREDUCTIONEXPR()
-#undef SYCLREDUCTIONEXPR
-
-/// template deduction for \ref ExprConstructor struct
-template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params>
-auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple<Params...> &t)
- -> decltype(ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t)) {
- return ExprConstructor<OrigExpr, IndexExpr, Params...>(funcD, t);
-}
-
-} /// namespace TensorSycl
-} /// namespace internal
-} /// namespace Eigen
-
-
-#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
deleted file mode 100644
index cc18fcd..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
+++ /dev/null
@@ -1,204 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*****************************************************************
- * TensorSyclExtractAccessor.h
- *
- * \brief:
- * ExtractAccessor takes Expression placeHolder expression and the tuple of sycl
- * buffers as an input. Using pre-order tree traversal, ExtractAccessor
- * recursively calls itself for its children in the expression tree. The
- * leaf node in the PlaceHolder expression is nothing but a container preserving
- * the order of the actual data in the tuple of sycl buffer. By invoking the
- * extract accessor for the PlaceHolder<N>, an accessor is created for the Nth
- * buffer in the tuple of buffers. This accessor is then added as an Nth
- * element in the tuple of accessors. In this case we preserve the order of data
- * in the expression tree.
- *
- * This is the specialisation of extract accessor method for different operation
- * type in the PlaceHolder expression.
- *
-*****************************************************************/
-
-#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP
-#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP
-
-namespace Eigen {
-namespace TensorSycl {
-namespace internal {
-/// struct ExtractAccessor: Extract Accessor Class is used to extract the
-/// accessor from a buffer.
-/// Depending on the type of the leaf node we can get a read accessor or a
-/// read_write accessor
-template <typename Evaluator>
-struct ExtractAccessor;
-
-struct AccessorConstructor{
- template<typename Arg> static inline auto getTuple(cl::sycl::handler& cgh, Arg eval)
- -> decltype(ExtractAccessor<Arg>::getTuple(cgh, eval)) {
- return ExtractAccessor<Arg>::getTuple(cgh, eval);
- }
-
- template<typename Arg1, typename Arg2> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1, Arg2 eval2)
- -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2))) {
- return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2));
- }
- template<typename Arg1, typename Arg2, typename Arg3> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1 , Arg2 eval2 , Arg3 eval3)
- -> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)))) {
- return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)));
- }
- template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval)
- -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM,
- typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data()))){
- return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM, typename Eigen::internal::remove_all<typename Arg::CoeffReturnType>::type>(eval.dimensions().TotalSize(), cgh,eval.data()));
- }
-};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is
-/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp and const TensorBroadcastingOp
-template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {
- static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> eval)
- -> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){
- return AccessorConstructor::getTuple(cgh, eval.impl());
- }
-};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp
-template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> >
-: ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorCwiseBinaryOp
-template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {
- static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> eval)
- -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){
- return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());
- }
-};
-/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseBinaryOp
-template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >
-: ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is
-/// const TensorCwiseTernaryOp
-template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {
- static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> eval)
- -> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){
- return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl());
- }
-};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseTernaryOp
-template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >
-: ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is
-/// const TensorCwiseSelectOp. This is a special case where there is no OP
-template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {
- static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> eval)
- -> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){
- return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl());
- }
-};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is
-/// TensorCwiseSelectOp. This is a special case where there is no OP
-template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >
-: ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >{};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorAssignOp
-template <typename LHSExpr, typename RHSExpr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {
- static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> eval)
- -> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){
- return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());
- }
-};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is TensorAssignOp
-template <typename LHSExpr, typename RHSExpr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> >
-: ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorMap
-#define TENSORMAPEXPR(CVQual, ACCType)\
-template <typename PlainObjectType, int Options_, typename Dev>\
-struct ExtractAccessor<TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> > {\
- static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> eval)\
- -> decltype(AccessorConstructor::template getAccessor<ACCType>(cgh, eval)){\
- return AccessorConstructor::template getAccessor<ACCType>(cgh, eval);\
- }\
-};
-TENSORMAPEXPR(const, cl::sycl::access::mode::read)
-TENSORMAPEXPR(, cl::sycl::access::mode::read_write)
-#undef TENSORMAPEXPR
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorForcedEvalOp
-template <typename Expr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> > {
- static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> eval)
- -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){
- return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval);
- }
-};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is TensorForcedEvalOp
-template <typename Expr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<TensorForcedEvalOp<Expr>, Dev> >
-: ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> >{};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorEvalToOp
-template <typename Expr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> > {
- static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<const TensorEvalToOp<Expr>, Dev> eval)
- -> decltype(utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){
- return utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()));
- }
-};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is TensorEvalToOp
-template <typename Expr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<TensorEvalToOp<Expr>, Dev> >
-: ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> >{};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorReductionOp
-template <typename OP, typename Dim, typename Expr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> > {
- static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> eval)
- -> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){
- return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval);
- }
-};
-
-/// specialisation of the \ref ExtractAccessor struct when the node type is TensorReductionOp
-template <typename OP, typename Dim, typename Expr, typename Dev>
-struct ExtractAccessor<TensorEvaluator<TensorReductionOp<OP, Dim, Expr>, Dev> >
-: ExtractAccessor<TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> >{};
-
-/// template deduction for \ref ExtractAccessor
-template <typename Evaluator>
-auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr)
--> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, expr)) {
- return ExtractAccessor<Evaluator>::getTuple(cgh, expr);
-}
-
-} /// namespace TensorSycl
-} /// namespace internal
-} /// namespace Eigen
-#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_ACCESSOR_HPP
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
deleted file mode 100644
index 9edd38e..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
+++ /dev/null
@@ -1,177 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*****************************************************************
- * TensorSyclextractFunctors.h
- *
- * \brief:
- * Used to extract all the functors allocated to each node of the expression
-*tree.
- *
-*****************************************************************/
-
-#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP
-#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP
-
-namespace Eigen {
-namespace TensorSycl {
-namespace internal {
-/// struct FunctorExtractor: This struct is used to extract the functors
-/// constructed on
-/// the host-side, to pack them and reuse them in reconstruction of the
-/// expression on the device.
-/// We have to do that as in Eigen the functors are not stateless so we cannot
-/// re-instantiate them on the device.
-/// We have to pass instantiated functors to the device.
-// This struct is used for leafNode (TensorMap) and nodes behaving like leafNode (TensorForcedEval).
-template <typename Evaluator> struct FunctorExtractor{
- typedef typename Evaluator::Dimensions Dimensions;
- const Dimensions m_dimensions;
- const Dimensions& dimensions() const { return m_dimensions; }
- FunctorExtractor(const Evaluator& expr)
- : m_dimensions(expr.dimensions()) {}
-
-};
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp, and const TensorBroadcastingOp
-template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
-struct FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {
- FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
- OP func;
- FunctorExtractor(const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev>& expr)
- : rhsExpr(expr.impl()), func(expr.functor()) {}
-};
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, and TensorBroadcastingOp
-template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
-struct FunctorExtractor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> >
-: FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> >{};
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// const TensorCwiseBinaryOp
-template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
-struct FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {
- FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;
- FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
- OP func;
- FunctorExtractor(const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& expr)
- : lhsExpr(expr.left_impl()),rhsExpr(expr.right_impl()),func(expr.functor()) {}
-};
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// const TensorCwiseBinaryOp
-template <template <class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
-struct FunctorExtractor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >
-: FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{};
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// const TensorCwiseTernaryOp
-template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,typename Dev>
-struct FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {
- FunctorExtractor<TensorEvaluator<Arg1Expr, Dev> > arg1Expr;
- FunctorExtractor<TensorEvaluator<Arg2Expr, Dev> > arg2Expr;
- FunctorExtractor<TensorEvaluator<Arg3Expr, Dev> > arg3Expr;
- OP func;
- FunctorExtractor(const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr)
- : arg1Expr(expr.arg1Impl()), arg2Expr(expr.arg2Impl()), arg3Expr(expr.arg3Impl()), func(expr.functor()) {}
-};
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// TensorCwiseTernaryOp
-template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
-struct FunctorExtractor<TensorEvaluator< TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >
-:FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{};
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// const TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated.
-template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
-struct FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {
- FunctorExtractor<TensorEvaluator<IfExpr, Dev> > ifExpr;
- FunctorExtractor<TensorEvaluator<ThenExpr, Dev> > thenExpr;
- FunctorExtractor<TensorEvaluator<ElseExpr, Dev> > elseExpr;
- FunctorExtractor(const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr)
- : ifExpr(expr.cond_impl()), thenExpr(expr.then_impl()), elseExpr(expr.else_impl()) {}
-};
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated
-template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
-struct FunctorExtractor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >
-:FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {};
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// const TensorAssignOp. This is an specialisation without OP so it has to be separated.
-template <typename LHSExpr, typename RHSExpr, typename Dev>
-struct FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {
- FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;
- FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
- FunctorExtractor(const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr)
- : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {}
-};
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// TensorAssignOp. This is an specialisation without OP so it has to be separated.
-template <typename LHSExpr, typename RHSExpr, typename Dev>
-struct FunctorExtractor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> >
-:FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{};
-
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// const TensorEvalToOp, This is an specialisation without OP so it has to be separated.
-template <typename RHSExpr, typename Dev>
-struct FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {
- FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
- FunctorExtractor(const TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev>& expr)
- : rhsExpr(expr.impl()) {}
-};
-
-/// specialisation of the \ref FunctorExtractor struct when the node type is
-/// TensorEvalToOp. This is a specialisation without OP so it has to be separated.
-template <typename RHSExpr, typename Dev>
-struct FunctorExtractor<TensorEvaluator<TensorEvalToOp<RHSExpr>, Dev> >
-: FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {};
-
-template<typename Dim, size_t NumOutputDim> struct DimConstr {
-template<typename InDim>
- static inline Dim getDim(InDim dims ) {return dims;}
-};
-
-template<typename Dim> struct DimConstr<Dim, 0> {
- template<typename InDim>
- static inline Dim getDim(InDim dims ) {return Dim(dims.TotalSize());}
-};
-
-template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
-struct FunctorExtractor<TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{
- typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Evaluator;
- typedef typename Eigen::internal::conditional<Evaluator::NumOutputDims==0, DSizes<typename Evaluator::Index, 1>, typename Evaluator::Dimensions >::type Dimensions;
- const Dimensions m_dimensions;
- const Dimensions& dimensions() const { return m_dimensions; }
- FunctorExtractor(const TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>& expr)
- : m_dimensions(DimConstr<Dimensions, Evaluator::NumOutputDims>::getDim(expr.dimensions())) {}
-};
-
-
-template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
-struct FunctorExtractor<TensorEvaluator<TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>
-: FunctorExtractor<TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{};
-/// template deduction function for FunctorExtractor
-template <typename Evaluator>
-auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> {
- return FunctorExtractor<Evaluator>(evaluator);
-}
-} // namespace internal
-} // namespace TensorSycl
-} // namespace Eigen
-
-#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXTRACT_FUNCTORS_HPP
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
deleted file mode 100644
index 25d1fac..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*****************************************************************
- * TensorSyclLeafCount.h
- *
- * \brief:
- * The leaf count used the pre-order expression tree traverse in order to name
- * count the number of leaf nodes in the expression
- *
-*****************************************************************/
-
-#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP
-#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP
-
-namespace Eigen {
-namespace TensorSycl {
-namespace internal {
-/// \brief LeafCount used to counting terminal nodes. The total number of
-/// leaf nodes is used by MakePlaceHolderExprHelper to find the order
-/// of the leaf node in a expression tree at compile time.
-template <typename Expr>
-struct LeafCount;
-
-template<typename... Args> struct CategoryCount;
-
-template<> struct CategoryCount<>
-{
- static const size_t Count =0;
-};
-
-template<typename Arg, typename... Args>
-struct CategoryCount<Arg,Args...>{
- static const size_t Count = LeafCount<Arg>::Count + CategoryCount<Args...>::Count;
-};
-
-/// specialisation of the \ref LeafCount struct when the node type is const TensorMap
-template <typename PlainObjectType, int Options_, template <class> class MakePointer_>
-struct LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> > {
- static const size_t Count =1;
-};
-
-/// specialisation of the \ref LeafCount struct when the node type is TensorMap
-template <typename PlainObjectType, int Options_, template <class> class MakePointer_>
-struct LeafCount<TensorMap<PlainObjectType, Options_, MakePointer_> > :LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> >{};
-
-// const TensorCwiseUnaryOp, const TensorCwiseNullaryOp, const TensorCwiseBinaryOp, const TensorCwiseTernaryOp, and Const TensorBroadcastingOp
-template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr>
-struct LeafCount<const CategoryExpr<OP, RHSExpr...> >: CategoryCount<RHSExpr...> {};
-// TensorCwiseUnaryOp, TensorCwiseNullaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, and TensorBroadcastingOp
-template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr>
-struct LeafCount<CategoryExpr<OP, RHSExpr...> > :LeafCount<const CategoryExpr<OP, RHSExpr...> >{};
-
-/// specialisation of the \ref LeafCount struct when the node type is const TensorSelectOp is an exception
-template <typename IfExpr, typename ThenExpr, typename ElseExpr>
-struct LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > : CategoryCount<IfExpr, ThenExpr, ElseExpr> {};
-/// specialisation of the \ref LeafCount struct when the node type is TensorSelectOp
-template <typename IfExpr, typename ThenExpr, typename ElseExpr>
-struct LeafCount<TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >: LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > {};
-
-
-/// specialisation of the \ref LeafCount struct when the node type is const TensorAssignOp
-template <typename LHSExpr, typename RHSExpr>
-struct LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >: CategoryCount<LHSExpr,RHSExpr> {};
-
-/// specialisation of the \ref LeafCount struct when the node type is
-/// TensorAssignOp is an exception. It is not the same as Unary
-template <typename LHSExpr, typename RHSExpr>
-struct LeafCount<TensorAssignOp<LHSExpr, RHSExpr> > :LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >{};
-
-/// specialisation of the \ref LeafCount struct when the node type is const TensorForcedEvalOp
-template <typename Expr>
-struct LeafCount<const TensorForcedEvalOp<Expr> > {
- static const size_t Count =1;
-};
-
-/// specialisation of the \ref LeafCount struct when the node type is TensorForcedEvalOp
-template <typename Expr>
-struct LeafCount<TensorForcedEvalOp<Expr> >: LeafCount<const TensorForcedEvalOp<Expr> > {};
-
-/// specialisation of the \ref LeafCount struct when the node type is const TensorEvalToOp
-template <typename Expr>
-struct LeafCount<const TensorEvalToOp<Expr> > {
- static const size_t Count = 1 + CategoryCount<Expr>::Count;
-};
-
-/// specialisation of the \ref LeafCount struct when the node type is const TensorReductionOp
-template <typename OP, typename Dim, typename Expr>
-struct LeafCount<const TensorReductionOp<OP, Dim, Expr> > {
- static const size_t Count =1;
-};
-
-/// specialisation of the \ref LeafCount struct when the node type is TensorReductionOp
-template <typename OP, typename Dim, typename Expr>
-struct LeafCount<TensorReductionOp<OP, Dim, Expr> >: LeafCount<const TensorReductionOp<OP, Dim, Expr> >{};
-
-/// specialisation of the \ref LeafCount struct when the node type is TensorEvalToOp
-template <typename Expr>
-struct LeafCount<TensorEvalToOp<Expr> >: LeafCount<const TensorEvalToOp<Expr> >{};
-
-} /// namespace TensorSycl
-} /// namespace internal
-} /// namespace Eigen
-
-#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_LEAF_COUNT_HPP
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
deleted file mode 100644
index d4c250c..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
+++ /dev/null
@@ -1,181 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*****************************************************************
- * TensorSyclPlaceHolderExpr.h
- *
- * \brief:
- * This is the specialisation of the placeholder expression based on the
- * operation type
- *
-*****************************************************************/
-
-#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP
-#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP
-
-namespace Eigen {
-namespace TensorSycl {
-namespace internal {
-
-/// \struct PlaceHolder
-/// \brief PlaceHolder is used to replace the \ref TensorMap in the expression
-/// tree.
-/// PlaceHolder contains the order of the leaf node in the expression tree.
-template <typename Scalar, size_t N>
-struct PlaceHolder {
- static constexpr size_t I = N;
- typedef Scalar Type;
-};
-
-/// \sttruct PlaceHolderExpression
-/// \brief it is used to create the PlaceHolder expression. The PlaceHolder
-/// expression is a copy of expression type in which the TensorMap of the has
-/// been replaced with PlaceHolder.
-template <typename Expr, size_t N>
-struct PlaceHolderExpression;
-
-template<size_t N, typename... Args>
-struct CalculateIndex;
-
-template<size_t N, typename Arg>
-struct CalculateIndex<N, Arg>{
- typedef typename PlaceHolderExpression<Arg, N>::Type ArgType;
- typedef utility::tuple::Tuple<ArgType> ArgsTuple;
-};
-
-template<size_t N, typename Arg1, typename Arg2>
-struct CalculateIndex<N, Arg1, Arg2>{
- static const size_t Arg2LeafCount = LeafCount<Arg2>::Count;
- typedef typename PlaceHolderExpression<Arg1, N - Arg2LeafCount>::Type Arg1Type;
- typedef typename PlaceHolderExpression<Arg2, N>::Type Arg2Type;
- typedef utility::tuple::Tuple<Arg1Type, Arg2Type> ArgsTuple;
-};
-
-template<size_t N, typename Arg1, typename Arg2, typename Arg3>
-struct CalculateIndex<N, Arg1, Arg2, Arg3> {
- static const size_t Arg3LeafCount = LeafCount<Arg3>::Count;
- static const size_t Arg2LeafCount = LeafCount<Arg2>::Count;
- typedef typename PlaceHolderExpression<Arg1, N - Arg3LeafCount - Arg2LeafCount>::Type Arg1Type;
- typedef typename PlaceHolderExpression<Arg2, N - Arg3LeafCount>::Type Arg2Type;
- typedef typename PlaceHolderExpression<Arg3, N>::Type Arg3Type;
- typedef utility::tuple::Tuple<Arg1Type, Arg2Type, Arg3Type> ArgsTuple;
-};
-
-template<template<class...> class Category , class OP, class TPL>
-struct CategoryHelper;
-
-template<template<class...> class Category , class OP, class ...T >
-struct CategoryHelper<Category, OP, utility::tuple::Tuple<T...> > {
- typedef Category<OP, T... > Type;
-};
-
-template<template<class...> class Category , class ...T >
-struct CategoryHelper<Category, NoOP, utility::tuple::Tuple<T...> > {
- typedef Category<T... > Type;
-};
-
-/// specialisation of the \ref PlaceHolderExpression when the node is
-/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorBroadcastingOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp
-#define OPEXPRCATEGORY(CVQual)\
-template <template <class, class... > class Category, typename OP, typename... SubExpr, size_t N>\
-struct PlaceHolderExpression<CVQual Category<OP, SubExpr...>, N>{\
- typedef CVQual typename CategoryHelper<Category, OP, typename CalculateIndex<N, SubExpr...>::ArgsTuple>::Type Type;\
-};
-
-OPEXPRCATEGORY(const)
-OPEXPRCATEGORY()
-#undef OPEXPRCATEGORY
-
-/// specialisation of the \ref PlaceHolderExpression when the node is
-/// TensorCwiseSelectOp
-#define SELECTEXPR(CVQual)\
-template <typename IfExpr, typename ThenExpr, typename ElseExpr, size_t N>\
-struct PlaceHolderExpression<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, N> {\
- typedef CVQual typename CategoryHelper<TensorSelectOp, NoOP, typename CalculateIndex<N, IfExpr, ThenExpr, ElseExpr>::ArgsTuple>::Type Type;\
-};
-
-SELECTEXPR(const)
-SELECTEXPR()
-#undef SELECTEXPR
-
-/// specialisation of the \ref PlaceHolderExpression when the node is
-/// TensorAssignOp
-#define ASSIGNEXPR(CVQual)\
-template <typename LHSExpr, typename RHSExpr, size_t N>\
-struct PlaceHolderExpression<CVQual TensorAssignOp<LHSExpr, RHSExpr>, N> {\
- typedef CVQual typename CategoryHelper<TensorAssignOp, NoOP, typename CalculateIndex<N, LHSExpr, RHSExpr>::ArgsTuple>::Type Type;\
-};
-
-ASSIGNEXPR(const)
-ASSIGNEXPR()
-#undef ASSIGNEXPR
-
-/// specialisation of the \ref PlaceHolderExpression when the node is
-/// TensorMap
-#define TENSORMAPEXPR(CVQual)\
-template <typename Scalar_, int Options_, int Options2_, int NumIndices_, typename IndexType_, template <class> class MakePointer_, size_t N>\
-struct PlaceHolderExpression< CVQual TensorMap< Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> {\
- typedef CVQual PlaceHolder<CVQual TensorMap<Tensor<Scalar_, NumIndices_, Options_, IndexType_>, Options2_, MakePointer_>, N> Type;\
-};
-
-TENSORMAPEXPR(const)
-TENSORMAPEXPR()
-#undef TENSORMAPEXPR
-
-/// specialisation of the \ref PlaceHolderExpression when the node is
-/// TensorForcedEvalOp
-#define FORCEDEVAL(CVQual)\
-template <typename Expr, size_t N>\
-struct PlaceHolderExpression<CVQual TensorForcedEvalOp<Expr>, N> {\
- typedef CVQual PlaceHolder<CVQual TensorForcedEvalOp<Expr>, N> Type;\
-};
-
-FORCEDEVAL(const)
-FORCEDEVAL()
-#undef FORCEDEVAL
-
-/// specialisation of the \ref PlaceHolderExpression when the node is
-/// TensorEvalToOp
-#define EVALTO(CVQual)\
-template <typename Expr, size_t N>\
-struct PlaceHolderExpression<CVQual TensorEvalToOp<Expr>, N> {\
- typedef CVQual TensorEvalToOp<typename CalculateIndex <N, Expr>::ArgType> Type;\
-};
-
-EVALTO(const)
-EVALTO()
-#undef EVALTO
-
-
-/// specialisation of the \ref PlaceHolderExpression when the node is
-/// TensorReductionOp
-#define SYCLREDUCTION(CVQual)\
-template <typename OP, typename Dims, typename Expr, size_t N>\
-struct PlaceHolderExpression<CVQual TensorReductionOp<OP, Dims, Expr>, N>{\
- typedef CVQual PlaceHolder<CVQual TensorReductionOp<OP, Dims,Expr>, N> Type;\
-};
-SYCLREDUCTION(const)
-SYCLREDUCTION()
-#undef SYCLREDUCTION
-
-/// template deduction for \ref PlaceHolderExpression struct
-template <typename Expr>
-struct createPlaceHolderExpression {
- static const size_t TotalLeaves = LeafCount<Expr>::Count;
- typedef typename PlaceHolderExpression<Expr, TotalLeaves - 1>::Type Type;
-};
-
-} // internal
-} // TensorSycl
-} // namespace Eigen
-
-#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_PLACEHOLDER_EXPR_HPP
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
deleted file mode 100644
index 7914b6f..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Cummins Chris PhD student at The University of Edinburgh.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*****************************************************************
- * TensorSyclRun.h
- *
- * \brief:
- * Schedule_kernel invoke an specialised version of kernel struct. The
- * specialisation is based on the data dimension in sycl buffer
- *
-*****************************************************************/
-
-#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
-#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
-
-namespace Eigen {
-namespace TensorSycl {
-/// The run function in tensor sycl convert the expression tree to a buffer
-/// based expression tree;
-/// creates the expression tree for the device with accessor to buffers;
-/// construct the kernel and submit it to the sycl queue.
-template <typename Expr, typename Dev>
-void run(Expr &expr, Dev &dev) {
- Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev);
- const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
- if (needs_assign) {
- typedef typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr;
- auto functors = internal::extractFunctors(evaluator);
-
- size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2;
- dev.m_queue.submit([&](cl::sycl::handler &cgh) {
-
- // create a tuple of accessors from Evaluator
- auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
- const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0];
- size_t GRange=range;
- if (tileSize>GRange) tileSize=GRange;
- else if(GRange>tileSize){
- size_t xMode = GRange % tileSize;
- if (xMode != 0) GRange += (tileSize - xMode);
- }
- // run the kernel
- cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) {
- typedef typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr;
- auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
- auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice());
- if (itemID.get_global_linear_id() < range) {
- device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id()));
- }
- });
- });
- dev.m_queue.throw_asynchronous();
- }
-
- evaluator.cleanup();
-}
-} // namespace TensorSycl
-} // namespace Eigen
-
-#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h
deleted file mode 100644
index 83915f3..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h
+++ /dev/null
@@ -1,237 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/*****************************************************************
- * TensroSyclTuple.h
- *
- * \brief:
- * Minimal implementation of std::tuple that can be used inside a SYCL kernel.
- *
-*****************************************************************/
-
-#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP
-#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP
-namespace utility {
-namespace tuple {
-/// \struct StaticIf
-/// \brief The StaticIf struct is used to statically choose the type based on the
-/// condition.
-template <bool, typename T = void> struct StaticIf;
-/// \brief specialisation of the \ref StaticIf when the condition is true
-template <typename T>
-struct StaticIf<true, T> {
- typedef T type;
-};
-
-/// \struct Tuple
-/// \brief is a fixed-size collection of heterogeneous values
-/// \tparam Ts... - the types of the elements that the tuple stores.
-/// Empty list is supported.
-template <class... Ts>
-struct Tuple {};
-
-/// \brief specialisation of the \ref Tuple class when the tuple has at least
-/// one element.
-/// \tparam T : the type of the first element in the tuple.
-/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty.
-template <class T, class... Ts>
-struct Tuple<T, Ts...> {
- Tuple(T t, Ts... ts) : head(t), tail(ts...) {}
- T head;
- Tuple<Ts...> tail;
-};
-
-///\ struct ElemTypeHolder
-/// \brief ElemTypeHolder class is used to specify the types of the
-/// elements inside the tuple
-/// \tparam size_t the number of elements inside the tuple
-/// \tparam class the tuple class
-template <size_t, class>
-struct ElemTypeHolder;
-
-/// \brief specialisation of the \ref ElemTypeHolder class when the number of
-/// elements inside the tuple is 1
-template <class T, class... Ts>
-struct ElemTypeHolder<0, Tuple<T, Ts...> > {
- typedef T type;
-};
-
-/// \brief specialisation of the \ref ElemTypeHolder class when the number of
-/// elements inside the tuple is bigger than 1. It recursively calls itself to
-/// detect the type of each element in the tuple
-/// \tparam T : the type of the first element in the tuple.
-/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty.
-/// \tparam K is the Kth element in the tuple
-template <size_t k, class T, class... Ts>
-struct ElemTypeHolder<k, Tuple<T, Ts...> > {
- typedef typename ElemTypeHolder<k - 1, Tuple<Ts...> >::type type;
-};
-
-/// get
-/// \brief Extracts the first element from the tuple.
-/// K=0 represents the first element of the tuple. The tuple cannot be empty.
-/// \tparam Ts... are the type of the elements in the tuple.
-/// \param t is the tuple whose contents to extract
-/// \return typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type
-
-#define TERMINATE_CONDS_TUPLE_GET(CVQual) \
-template <size_t k, class... Ts> \
-typename StaticIf<k == 0, CVQual typename ElemTypeHolder<0, Tuple<Ts...> >::type &>::type \
-get(CVQual Tuple<Ts...> &t) { \
- static_assert(sizeof...(Ts)!=0, "The requseted value is bigger than the size of the tuple"); \
- return t.head; \
-}
-
-TERMINATE_CONDS_TUPLE_GET(const)
-TERMINATE_CONDS_TUPLE_GET()
-#undef TERMINATE_CONDS_TUPLE_GET
-/// get
-/// \brief Extracts the Kth element from the tuple.
-///\tparam K is an integer value in [0,sizeof...(Types)).
-/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple
-/// \tparam Ts... are the type of the elements in the tuple.
-/// \param t is the tuple whose contents to extract
-/// \return typename ElemTypeHolder<K, Tuple<Ts...> >::type &>::type
-#define RECURSIVE_TUPLE_GET(CVQual) \
-template <size_t k, class T, class... Ts> \
-typename StaticIf<k != 0, CVQual typename ElemTypeHolder<k, Tuple<T, Ts...> >::type &>::type \
-get(CVQual Tuple<T, Ts...> &t) { \
- return utility::tuple::get<k - 1>(t.tail); \
-}
-RECURSIVE_TUPLE_GET(const)
-RECURSIVE_TUPLE_GET()
-#undef RECURSIVE_TUPLE_GET
-
-/// make_tuple
-/// \brief Creates a tuple object, deducing the target type from the types of
-/// arguments.
-/// \tparam Args the type of the arguments to construct the tuple from
-/// \param args zero or more arguments to construct the tuple from
-/// \return Tuple<Args...>
-template <typename... Args>
-Tuple<Args...> make_tuple(Args... args) {
- return Tuple<Args...>(args...);
-}
-
-/// size
-/// \brief Provides access to the number of elements in a tuple as a
-/// compile-time constant expression.
-/// \tparam Args the type of the arguments to construct the tuple from
-/// \return size_t
-template <typename... Args>
-static constexpr size_t size(Tuple<Args...> &) {
- return sizeof...(Args);
-}
-
-/// \struct IndexList
-/// \brief Creates a list of index from the elements in the tuple
-/// \tparam Is... a list of index from [0 to sizeof...(tuple elements))
-template <size_t... Is>
-struct IndexList {};
-
-/// \struct RangeBuilder
-/// \brief Collects internal details for generating index ranges [MIN, MAX)
-/// Declare primary template for index range builder
-/// \tparam MIN is the starting index in the tuple
-/// \tparam N represents sizeof..(elemens)- sizeof...(Is)
-/// \tparam Is... are the list of generated index so far
-template <size_t MIN, size_t N, size_t... Is>
-struct RangeBuilder;
-
-// FIXME Doxygen has problems with recursive inheritance
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-/// \brief base Step: Specialisation of the \ref RangeBuilder when the
-/// MIN==MAX. In this case the Is... is [0 to sizeof...(tuple elements))
-/// \tparam MIN is the starting index of the tuple
-/// \tparam Is is [0 to sizeof...(tuple elements))
-template <size_t MIN, size_t... Is>
-struct RangeBuilder<MIN, MIN, Is...> {
- typedef IndexList<Is...> type;
-};
-
-/// Induction step: Specialisation of the RangeBuilder class when N!=MIN
-/// in this case we are recursively subtracting N by one and adding one
-/// index to Is... list until MIN==N
-/// \tparam MIN is the starting index in the tuple
-/// \tparam N represents sizeof..(elemens)- sizeof...(Is)
-/// \tparam Is... are the list of generated index so far
-template <size_t MIN, size_t N, size_t... Is>
-struct RangeBuilder : public RangeBuilder<MIN, N - 1, N - 1, Is...> {};
-#endif // EIGEN_PARSED_BY_DOXYGEN
-
-/// \brief IndexRange that returns a [MIN, MAX) index range
-/// \tparam MIN is the starting index in the tuple
-/// \tparam MAX is the size of the tuple
-template <size_t MIN, size_t MAX>
-struct IndexRange: RangeBuilder<MIN, MAX>::type {};
-
-/// append_base
-/// \brief unpacking the elements of the input tuple t and creating a new tuple
-/// by adding element a at the end of it.
-///\tparam Args... the type of the elements inside the tuple t
-/// \tparam T the type of the new element going to be added at the end of tuple
-/// \tparam I... is the list of index from [0 to sizeof...(t))
-/// \param t the tuple on which we want to append a.
-/// \param a the new elements going to be added to the tuple
-/// \return Tuple<Args..., T>
-template <typename... Args, typename T, size_t... I>
-Tuple<Args..., T> append_base(Tuple<Args...> t, T a,IndexList<I...>) {
- return utility::tuple::make_tuple(get<I>(t)..., a);
-}
-
-/// append
-/// \brief the deduction function for \ref append_base that automatically
-/// generate the \ref IndexRange
-///\tparam Args... the type of the elements inside the tuple t
-/// \tparam T the type of the new element going to be added at the end of tuple
-/// \param t the tuple on which we want to append a.
-/// \param a the new elements going to be added to the tuple
-/// \return Tuple<Args..., T>
-template <typename... Args, typename T>
-Tuple<Args..., T> append(Tuple<Args...> t, T a) {
- return utility::tuple::append_base(t, a, IndexRange<0, sizeof...(Args)>());
-}
-
-/// append_base
-/// \brief This is a specialisation of \ref append_base when we want to
-/// concatenate
-/// tuple t2 at the end of the tuple t1. Here we unpack both tuples, generate the
-/// IndexRange for each of them and create an output tuple T that contains both
-/// elements of t1 and t2.
-///\tparam Args1... the type of the elements inside the tuple t1
-///\tparam Args2... the type of the elements inside the tuple t2
-/// \tparam I1... is the list of index from [0 to sizeof...(t1))
-/// \tparam I2... is the list of index from [0 to sizeof...(t2))
-/// \param t1 is the tuple on which we want to append t2.
-/// \param t2 is the tuple that is going to be added on t1.
-/// \return Tuple<Args1..., Args2...>
-template <typename... Args1, typename... Args2, size_t... I1, size_t... I2>
-Tuple<Args1..., Args2...> append_base(Tuple<Args1...> t1, Tuple<Args2...> t2, IndexList<I1...>, IndexList<I2...>) {
- return utility::tuple::make_tuple(get<I1>(t1)...,get<I2>(t2)...);
-}
-
-/// append
-/// \brief deduction function for \ref append_base when we are appending tuple
-/// t1 by tuple t2. In this case the \ref IndexRange for both tuple are
-/// automatically generated.
-///\tparam Args1... the type of the elements inside the tuple t1
-///\tparam Args2... the type of the elements inside the tuple t2
-/// \param t1 is the tuple on which we want to append t2.
-/// \param t2 is the tuple that is going to be added on t1.
-/// \return Tuple<Args1..., Args2...>
-template <typename... Args1, typename... Args2>
-Tuple<Args1..., Args2...> append(Tuple<Args1...> t1,Tuple<Args2...> t2) {
- return utility::tuple::append_base(t1, t2, IndexRange<0, sizeof...(Args1)>(), IndexRange<0, sizeof...(Args2)>());
-}
-} // tuple
-} // utility
-#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_TUPLE_HPP
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
deleted file mode 100644
index ffcf8b0..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ /dev/null
@@ -1,272 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
-#define EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
-
-namespace Eigen {
-namespace internal {
-
-
-template<typename Scalar, int Options>
-class compute_tensor_flags
-{
- enum {
- is_dynamic_size_storage = 1,
-
- is_aligned =
- (
- ((Options&DontAlign)==0) && (
-#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
- (!is_dynamic_size_storage)
-#else
- 0
-#endif
- |
-#if EIGEN_MAX_ALIGN_BYTES>0
- is_dynamic_size_storage
-#else
- 0
-#endif
- )
- ),
- packet_access_bit = packet_traits<Scalar>::Vectorizable && is_aligned ? PacketAccessBit : 0
- };
-
- public:
- enum { ret = packet_access_bit };
-};
-
-
-template<typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
-struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
-{
- typedef Scalar_ Scalar;
- typedef Dense StorageKind;
- typedef IndexType_ Index;
- static const int NumDimensions = NumIndices_;
- static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
- enum {
- Options = Options_,
- Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit)
- };
- template <typename T> struct MakePointer {
- typedef T* Type;
- };
-};
-
-
-template<typename Scalar_, typename Dimensions, int Options_, typename IndexType_>
-struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
-{
- typedef Scalar_ Scalar;
- typedef Dense StorageKind;
- typedef IndexType_ Index;
- static const int NumDimensions = array_size<Dimensions>::value;
- static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
- enum {
- Options = Options_,
- Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit)
- };
- template <typename T> struct MakePointer {
- typedef T* Type;
- };
-};
-
-
-template<typename PlainObjectType, int Options_, template <class> class MakePointer_>
-struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
- : public traits<PlainObjectType>
-{
- typedef traits<PlainObjectType> BaseTraits;
- typedef typename BaseTraits::Scalar Scalar;
- typedef typename BaseTraits::StorageKind StorageKind;
- typedef typename BaseTraits::Index Index;
- static const int NumDimensions = BaseTraits::NumDimensions;
- static const int Layout = BaseTraits::Layout;
- enum {
- Options = Options_,
- Flags = BaseTraits::Flags
- };
- template <class T> struct MakePointer {
- // Intermediate typedef to workaround MSVC issue.
- typedef MakePointer_<T> MakePointerT;
- typedef typename MakePointerT::Type Type;
- };
-};
-
-template<typename PlainObjectType>
-struct traits<TensorRef<PlainObjectType> >
- : public traits<PlainObjectType>
-{
- typedef traits<PlainObjectType> BaseTraits;
- typedef typename BaseTraits::Scalar Scalar;
- typedef typename BaseTraits::StorageKind StorageKind;
- typedef typename BaseTraits::Index Index;
- static const int NumDimensions = BaseTraits::NumDimensions;
- static const int Layout = BaseTraits::Layout;
- enum {
- Options = BaseTraits::Options,
- Flags = BaseTraits::Flags
- };
-};
-
-
-template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
-struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
-{
- typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type;
-};
-
-template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
-struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
-{
- typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type;
-};
-
-template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
-struct eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense>
-{
- typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
-};
-
-template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
-struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense>
-{
- typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
-};
-
-template<typename PlainObjectType, int Options, template <class> class MakePointer>
-struct eval<TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
-{
- typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
-};
-
-template<typename PlainObjectType, int Options, template <class> class MakePointer>
-struct eval<const TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
-{
- typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
-};
-
-template<typename PlainObjectType>
-struct eval<TensorRef<PlainObjectType>, Eigen::Dense>
-{
- typedef const TensorRef<PlainObjectType>& type;
-};
-
-template<typename PlainObjectType>
-struct eval<const TensorRef<PlainObjectType>, Eigen::Dense>
-{
- typedef const TensorRef<PlainObjectType>& type;
-};
-
-// TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector.
-template<typename T, int n=1, typename PlainObject = void> struct nested
-{
- typedef typename ref_selector<T>::type type;
-};
-
-template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
-struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
-{
- typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type;
-};
-
-template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
-struct nested<const Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
-{
- typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type;
-};
-
-template <typename Scalar_, typename Dimensions, int Options, typename IndexType_>
-struct nested<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >
-{
- typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
-};
-
-template <typename Scalar_, typename Dimensions, int Options, typename IndexType_>
-struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >
-{
- typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
-};
-
-
-template <typename PlainObjectType, int Options, template <class> class MakePointer>
-struct nested<TensorMap<PlainObjectType, Options, MakePointer> >
-{
- typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
-};
-
-template <typename PlainObjectType, int Options, template <class> class MakePointer>
-struct nested<const TensorMap<PlainObjectType, Options, MakePointer> >
-{
- typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
-};
-
-template <typename PlainObjectType>
-struct nested<TensorRef<PlainObjectType> >
-{
- typedef const TensorRef<PlainObjectType>& type;
-};
-
-template <typename PlainObjectType>
-struct nested<const TensorRef<PlainObjectType> >
-{
- typedef const TensorRef<PlainObjectType>& type;
-};
-
-} // end namespace internal
-
-// Convolutional layers take in an input tensor of shape (D, R, C, B), or (D, C,
-// R, B), and convolve it with a set of filters, which can also be presented as
-// a tensor (D, K, K, M), where M is the number of filters, K is the filter
-// size, and each 3-dimensional tensor of size (D, K, K) is a filter. For
-// simplicity we assume that we always use square filters (which is usually the
-// case in images), hence the two Ks in the tensor dimension. It also takes in
-// a few additional parameters:
-// Stride (S): The convolution stride is the offset between locations where we
-// apply the filters. A larger stride means that the output will be
-// spatially smaller.
-// Padding (P): The padding we apply to the input tensor along the R and C
-// dimensions. This is usually used to make sure that the spatial
-// dimensions of the output matches our intention.
-//
-// Two types of padding are often used:
-// SAME: The pad value is computed so that the output will have size
-// R/S and C/S.
-// VALID: no padding is carried out.
-// When we do padding, the padded values at the padded locations are usually
-// zero.
-//
-// The output dimensions for convolution, when given all the parameters above,
-// are as follows:
-// When Padding = SAME: the output size is (B, R', C', M), where
-// R' = ceil(float(R) / float(S))
-// C' = ceil(float(C) / float(S))
-// where ceil is the ceiling function. The input tensor is padded with 0 as
-// needed. The number of padded rows and columns are computed as:
-// Pr = ((R' - 1) * S + K - R) / 2
-// Pc = ((C' - 1) * S + K - C) / 2
-// when the stride is 1, we have the simplified case R'=R, C'=C, Pr=Pc=(K-1)/2.
-// This is where SAME comes from - the output has the same size as the input has.
-// When Padding = VALID: the output size is computed as
-// R' = ceil(float(R - K + 1) / float(S))
-// C' = ceil(float(C - K + 1) / float(S))
-// and the number of padded rows and columns are computed in the same way as in
-// the SAME case.
-// When the stride is 1, we have the simplified case R'=R-K+1, C'=C-K+1, Pr=0,
-// Pc=0.
-typedef enum {
- PADDING_VALID = 1,
- PADDING_SAME = 2
-} PaddingType;
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_TRAITS_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
deleted file mode 100644
index 3523e7c..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
+++ /dev/null
@@ -1,248 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
-#define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
-
-namespace Eigen {
-namespace internal {
-
-
-template <uint64_t n>
-struct static_val {
- static const uint64_t value = n;
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { }
-
- template <typename T>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) {
- eigen_assert(v == n);
- }
-};
-
-
-template <typename HIGH = uint64_t, typename LOW = uint64_t>
-struct TensorUInt128
-{
- HIGH high;
- LOW low;
-
- template<typename OTHER_HIGH, typename OTHER_LOW>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- TensorUInt128(const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) : high(other.high), low(other.low) {
- EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE);
- EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE);
- }
-
- template<typename OTHER_HIGH, typename OTHER_LOW>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- TensorUInt128& operator = (const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) {
- EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE);
- EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE);
- high = other.high;
- low = other.low;
- return *this;
- }
-
- template<typename T>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- explicit TensorUInt128(const T& x) : high(0), low(x) {
- eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest()));
- eigen_assert(x >= 0);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- TensorUInt128(HIGH y, LOW x) : high(y), low(x) { }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const {
- return low;
- }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const {
- return low;
- }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const {
- return high;
- }
-};
-
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
- return (lhs.high == rhs.high) & (lhs.low == rhs.low);
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
- return (lhs.high != rhs.high) | (lhs.low != rhs.low);
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
- if (lhs.high != rhs.high) {
- return lhs.high > rhs.high;
- }
- return lhs.low >= rhs.low;
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
- if (lhs.high != rhs.high) {
- return lhs.high < rhs.high;
- }
- return lhs.low < rhs.low;
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
- TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low);
- if (result.low < rhs.low) {
- result.high += 1;
- }
- return result;
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
- TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low);
- if (result.low > lhs.low) {
- result.high -= 1;
- }
- return result;
-}
-
-
-template <typename HL, typename LL, typename HR, typename LR>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
- // Split each 128-bit integer into 4 32-bit integers, and then do the
- // multiplications by hand as follow:
- // lhs a b c d
- // rhs e f g h
- // -----------
- // ah bh ch dh
- // bg cg dg
- // cf df
- // de
- // The result is stored in 2 64bit integers, high and low.
-
- const uint64_t LOW = 0x00000000FFFFFFFFLL;
- const uint64_t HIGH = 0xFFFFFFFF00000000LL;
-
- uint64_t d = lhs.low & LOW;
- uint64_t c = (lhs.low & HIGH) >> 32LL;
- uint64_t b = lhs.high & LOW;
- uint64_t a = (lhs.high & HIGH) >> 32LL;
-
- uint64_t h = rhs.low & LOW;
- uint64_t g = (rhs.low & HIGH) >> 32LL;
- uint64_t f = rhs.high & LOW;
- uint64_t e = (rhs.high & HIGH) >> 32LL;
-
- // Compute the low 32 bits of low
- uint64_t acc = d * h;
- uint64_t low = acc & LOW;
- // Compute the high 32 bits of low. Add a carry every time we wrap around
- acc >>= 32LL;
- uint64_t carry = 0;
- uint64_t acc2 = acc + c * h;
- if (acc2 < acc) {
- carry++;
- }
- acc = acc2 + d * g;
- if (acc < acc2) {
- carry++;
- }
- low |= (acc << 32LL);
-
- // Carry forward the high bits of acc to initiate the computation of the
- // low 32 bits of high
- acc2 = (acc >> 32LL) | (carry << 32LL);
- carry = 0;
-
- acc = acc2 + b * h;
- if (acc < acc2) {
- carry++;
- }
- acc2 = acc + c * g;
- if (acc2 < acc) {
- carry++;
- }
- acc = acc2 + d * f;
- if (acc < acc2) {
- carry++;
- }
- uint64_t high = acc & LOW;
-
- // Start to compute the high 32 bits of high.
- acc2 = (acc >> 32LL) | (carry << 32LL);
-
- acc = acc2 + a * h;
- acc2 = acc + b * g;
- acc = acc2 + c * f;
- acc2 = acc + d * e;
- high |= (acc2 << 32LL);
-
- return TensorUInt128<uint64_t, uint64_t>(high, low);
-}
-
-template <typename HL, typename LL, typename HR, typename LR>
-static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
-{
- if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) {
- return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
- } else if (lhs < rhs) {
- return TensorUInt128<uint64_t, uint64_t>(0);
- } else {
- // calculate the biggest power of 2 times rhs that's less than or equal to lhs
- TensorUInt128<uint64_t, uint64_t> power2(1);
- TensorUInt128<uint64_t, uint64_t> d(rhs);
- TensorUInt128<uint64_t, uint64_t> tmp(lhs - d);
- while (lhs >= d) {
- tmp = tmp - d;
- d = d + d;
- power2 = power2 + power2;
- }
-
- tmp = TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
- TensorUInt128<uint64_t, uint64_t> result(0);
- while (power2 != TensorUInt128<static_val<0>, static_val<0> >(0)) {
- if (tmp >= d) {
- tmp = tmp - d;
- result = result + power2;
- }
- // Shift right
- power2 = TensorUInt128<uint64_t, uint64_t>(power2.high >> 1, (power2.low >> 1) | (power2.high << 63));
- d = TensorUInt128<uint64_t, uint64_t>(d.high >> 1, (d.low >> 1) | (d.high << 63));
- }
-
- return result;
- }
-}
-
-
-} // namespace internal
-} // namespace Eigen
-
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
deleted file mode 100644
index 0ca2cac..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
+++ /dev/null
@@ -1,608 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
-#define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
-
-namespace Eigen {
-
-/** \class TensorVolumePatch
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Patch extraction specialized for processing of volumetric data.
- * This assumes that the input has a least 4 dimensions ordered as follows:
- * - channels
- * - planes
- * - rows
- * - columns
- * - (optional) additional dimensions such as time or batch size.
- * Calling the volume patch code with patch_planes, patch_rows, and patch_cols
- * is equivalent to calling the regular patch extraction code with parameters
- * d, patch_planes, patch_rows, patch_cols, and 1 for all the additional
- * dimensions.
- */
-namespace internal {
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > : public traits<XprType>
-{
- typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
- typedef traits<XprType> XprTraits;
- typedef typename XprTraits::StorageKind StorageKind;
- typedef typename XprTraits::Index Index;
- typedef typename XprType::Nested Nested;
- typedef typename remove_reference<Nested>::type _Nested;
- static const int NumDimensions = XprTraits::NumDimensions + 1;
- static const int Layout = XprTraits::Layout;
-};
-
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, Eigen::Dense>
-{
- typedef const TensorVolumePatchOp<Planes, Rows, Cols, XprType>& type;
-};
-
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
-struct nested<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, 1, typename eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType> >::type>
-{
- typedef TensorVolumePatchOp<Planes, Rows, Cols, XprType> type;
-};
-
-} // end namespace internal
-
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
-class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, ReadOnlyAccessors>
-{
- public:
- typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorVolumePatchOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorVolumePatchOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols,
- DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides,
- DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides,
- DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
- PaddingType padding_type, Scalar padding_value)
- : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
- m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides),
- m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
- m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
- m_padding_explicit(false), m_padding_top_z(0), m_padding_bottom_z(0), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0),
- m_padding_type(padding_type), m_padding_value(padding_value) {}
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols,
- DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides,
- DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides,
- DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
- DenseIndex padding_top_z, DenseIndex padding_bottom_z,
- DenseIndex padding_top, DenseIndex padding_bottom,
- DenseIndex padding_left, DenseIndex padding_right,
- Scalar padding_value)
- : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
- m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides),
- m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
- m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
- m_padding_explicit(true), m_padding_top_z(padding_top_z), m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
- m_padding_left(padding_left), m_padding_right(padding_right),
- m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
-
- EIGEN_DEVICE_FUNC
- DenseIndex patch_planes() const { return m_patch_planes; }
- EIGEN_DEVICE_FUNC
- DenseIndex patch_rows() const { return m_patch_rows; }
- EIGEN_DEVICE_FUNC
- DenseIndex patch_cols() const { return m_patch_cols; }
- EIGEN_DEVICE_FUNC
- DenseIndex plane_strides() const { return m_plane_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex row_strides() const { return m_row_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex col_strides() const { return m_col_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex in_plane_strides() const { return m_in_plane_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex in_row_strides() const { return m_in_row_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex in_col_strides() const { return m_in_col_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex plane_inflate_strides() const { return m_plane_inflate_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex row_inflate_strides() const { return m_row_inflate_strides; }
- EIGEN_DEVICE_FUNC
- DenseIndex col_inflate_strides() const { return m_col_inflate_strides; }
- EIGEN_DEVICE_FUNC
- bool padding_explicit() const { return m_padding_explicit; }
- EIGEN_DEVICE_FUNC
- DenseIndex padding_top_z() const { return m_padding_top_z; }
- EIGEN_DEVICE_FUNC
- DenseIndex padding_bottom_z() const { return m_padding_bottom_z; }
- EIGEN_DEVICE_FUNC
- DenseIndex padding_top() const { return m_padding_top; }
- EIGEN_DEVICE_FUNC
- DenseIndex padding_bottom() const { return m_padding_bottom; }
- EIGEN_DEVICE_FUNC
- DenseIndex padding_left() const { return m_padding_left; }
- EIGEN_DEVICE_FUNC
- DenseIndex padding_right() const { return m_padding_right; }
- EIGEN_DEVICE_FUNC
- PaddingType padding_type() const { return m_padding_type; }
- EIGEN_DEVICE_FUNC
- Scalar padding_value() const { return m_padding_value; }
-
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- protected:
- typename XprType::Nested m_xpr;
- const DenseIndex m_patch_planes;
- const DenseIndex m_patch_rows;
- const DenseIndex m_patch_cols;
- const DenseIndex m_plane_strides;
- const DenseIndex m_row_strides;
- const DenseIndex m_col_strides;
- const DenseIndex m_in_plane_strides;
- const DenseIndex m_in_row_strides;
- const DenseIndex m_in_col_strides;
- const DenseIndex m_plane_inflate_strides;
- const DenseIndex m_row_inflate_strides;
- const DenseIndex m_col_inflate_strides;
- const bool m_padding_explicit;
- const DenseIndex m_padding_top_z;
- const DenseIndex m_padding_bottom_z;
- const DenseIndex m_padding_top;
- const DenseIndex m_padding_bottom;
- const DenseIndex m_padding_left;
- const DenseIndex m_padding_right;
- const PaddingType m_padding_type;
- const Scalar m_padding_value;
-};
-
-
-// Eval as rvalue
-template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device>
-struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, Device>
-{
- typedef TensorVolumePatchOp<Planes, Rows, Cols, ArgType> XprType;
- typedef typename XprType::Index Index;
- static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- static const int NumDims = NumInputDims + 1;
- typedef DSizes<Index, NumDims> Dimensions;
- typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
- enum {
- IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- BlockAccess = false,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false,
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_impl(op.expression(), device)
- {
- EIGEN_STATIC_ASSERT((NumDims >= 5), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- m_paddingValue = op.padding_value();
-
- const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
-
- // Cache a few variables.
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_inputDepth = input_dims[0];
- m_inputPlanes = input_dims[1];
- m_inputRows = input_dims[2];
- m_inputCols = input_dims[3];
- } else {
- m_inputDepth = input_dims[NumInputDims-1];
- m_inputPlanes = input_dims[NumInputDims-2];
- m_inputRows = input_dims[NumInputDims-3];
- m_inputCols = input_dims[NumInputDims-4];
- }
-
- m_plane_strides = op.plane_strides();
- m_row_strides = op.row_strides();
- m_col_strides = op.col_strides();
-
- // Input strides and effective input/patch size
- m_in_plane_strides = op.in_plane_strides();
- m_in_row_strides = op.in_row_strides();
- m_in_col_strides = op.in_col_strides();
- m_plane_inflate_strides = op.plane_inflate_strides();
- m_row_inflate_strides = op.row_inflate_strides();
- m_col_inflate_strides = op.col_inflate_strides();
-
- // The "effective" spatial size after inflating data with zeros.
- m_input_planes_eff = (m_inputPlanes - 1) * m_plane_inflate_strides + 1;
- m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1;
- m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1;
- m_patch_planes_eff = op.patch_planes() + (op.patch_planes() - 1) * (m_in_plane_strides - 1);
- m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1);
- m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1);
-
- if (op.padding_explicit()) {
- m_outputPlanes = numext::ceil((m_input_planes_eff + op.padding_top_z() + op.padding_bottom_z() - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides));
- m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
- m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
- m_planePaddingTop = op.padding_top_z();
- m_rowPaddingTop = op.padding_top();
- m_colPaddingLeft = op.padding_left();
- } else {
- // Computing padding from the type
- switch (op.padding_type()) {
- case PADDING_VALID:
- m_outputPlanes = numext::ceil((m_input_planes_eff - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides));
- m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
- m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
- m_planePaddingTop = 0;
- m_rowPaddingTop = 0;
- m_colPaddingLeft = 0;
- break;
- case PADDING_SAME: {
- m_outputPlanes = numext::ceil(m_input_planes_eff / static_cast<float>(m_plane_strides));
- m_outputRows = numext::ceil(m_input_rows_eff / static_cast<float>(m_row_strides));
- m_outputCols = numext::ceil(m_input_cols_eff / static_cast<float>(m_col_strides));
- const Index dz = m_outputPlanes * m_plane_strides + m_patch_planes_eff - 1 - m_input_planes_eff;
- const Index dy = m_outputRows * m_row_strides + m_patch_rows_eff - 1 - m_input_rows_eff;
- const Index dx = m_outputCols * m_col_strides + m_patch_cols_eff - 1 - m_input_cols_eff;
- m_planePaddingTop = dz - dz / 2;
- m_rowPaddingTop = dy - dy / 2;
- m_colPaddingLeft = dx - dx / 2;
- break;
- }
- default:
- eigen_assert(false && "unexpected padding");
- }
- }
- eigen_assert(m_outputRows > 0);
- eigen_assert(m_outputCols > 0);
- eigen_assert(m_outputPlanes > 0);
-
- // Dimensions for result of extraction.
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- // ColMajor
- // 0: depth
- // 1: patch_planes
- // 2: patch_rows
- // 3: patch_cols
- // 4: number of patches
- // 5 and beyond: anything else (such as batch).
- m_dimensions[0] = input_dims[0];
- m_dimensions[1] = op.patch_planes();
- m_dimensions[2] = op.patch_rows();
- m_dimensions[3] = op.patch_cols();
- m_dimensions[4] = m_outputPlanes * m_outputRows * m_outputCols;
- for (int i = 5; i < NumDims; ++i) {
- m_dimensions[i] = input_dims[i-1];
- }
- } else {
- // RowMajor
- // NumDims-1: depth
- // NumDims-2: patch_planes
- // NumDims-3: patch_rows
- // NumDims-4: patch_cols
- // NumDims-5: number of patches
- // NumDims-6 and beyond: anything else (such as batch).
- m_dimensions[NumDims-1] = input_dims[NumInputDims-1];
- m_dimensions[NumDims-2] = op.patch_planes();
- m_dimensions[NumDims-3] = op.patch_rows();
- m_dimensions[NumDims-4] = op.patch_cols();
- m_dimensions[NumDims-5] = m_outputPlanes * m_outputRows * m_outputCols;
- for (int i = NumDims-6; i >= 0; --i) {
- m_dimensions[i] = input_dims[i];
- }
- }
-
- // Strides for the output tensor.
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_rowStride = m_dimensions[1];
- m_colStride = m_dimensions[2] * m_rowStride;
- m_patchStride = m_colStride * m_dimensions[3] * m_dimensions[0];
- m_otherStride = m_patchStride * m_dimensions[4];
- } else {
- m_rowStride = m_dimensions[NumDims-2];
- m_colStride = m_dimensions[NumDims-3] * m_rowStride;
- m_patchStride = m_colStride * m_dimensions[NumDims-4] * m_dimensions[NumDims-1];
- m_otherStride = m_patchStride * m_dimensions[NumDims-5];
- }
-
- // Strides for navigating through the input tensor.
- m_planeInputStride = m_inputDepth;
- m_rowInputStride = m_inputDepth * m_inputPlanes;
- m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes;
- m_otherInputStride = m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes;
-
- m_outputPlanesRows = m_outputPlanes * m_outputRows;
-
- // Fast representations of different variables.
- m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride);
- m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride);
- m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride);
- m_fastRowStride = internal::TensorIntDivisor<Index>(m_rowStride);
- m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides);
- m_fastInputColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides);
- m_fastInputPlaneStride = internal::TensorIntDivisor<Index>(m_plane_inflate_strides);
- m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff);
- m_fastOutputPlanes = internal::TensorIntDivisor<Index>(m_outputPlanes);
- m_fastOutputPlanesRows = internal::TensorIntDivisor<Index>(m_outputPlanesRows);
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]);
- } else {
- m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
- m_impl.evalSubExprsIfNeeded(NULL);
- return true;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_impl.cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- // Patch index corresponding to the passed in index.
- const Index patchIndex = index / m_fastPatchStride;
-
- // Spatial offset within the patch. This has to be translated into 3D
- // coordinates within the patch.
- const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth;
-
- // Batch, etc.
- const Index otherIndex = (NumDims == 5) ? 0 : index / m_fastOtherStride;
- const Index patch3DIndex = (NumDims == 5) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride;
-
- // Calculate column index in the input original tensor.
- const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
- const Index colOffset = patchOffset / m_fastColStride;
- const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft;
- const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
- if (inputCol < 0 || inputCol >= m_input_cols_eff ||
- ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) {
- return Scalar(m_paddingValue);
- }
-
- // Calculate row index in the original input tensor.
- const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
- const Index rowOffset = (patchOffset - colOffset * m_colStride) / m_fastRowStride;
- const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop;
- const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
- if (inputRow < 0 || inputRow >= m_input_rows_eff ||
- ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
- return Scalar(m_paddingValue);
- }
-
- // Calculate plane index in the original input tensor.
- const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex));
- const Index planeOffset = patchOffset - colOffset * m_colStride - rowOffset * m_rowStride;
- const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop;
- const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0);
- if (inputPlane < 0 || inputPlane >= m_input_planes_eff ||
- ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) {
- return Scalar(m_paddingValue);
- }
-
- const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
- const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
-
- const Index inputIndex = depth +
- origInputRow * m_rowInputStride +
- origInputCol * m_colInputStride +
- origInputPlane * m_planeInputStride +
- otherIndex * m_otherInputStride;
-
- return m_impl.coeff(inputIndex);
- }
-
- template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
-
- if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 ||
- m_in_plane_strides != 1 || m_plane_inflate_strides != 1) {
- return packetWithPossibleZero(index);
- }
-
- const Index indices[2] = {index, index + PacketSize - 1};
- const Index patchIndex = indices[0] / m_fastPatchStride;
- if (patchIndex != indices[1] / m_fastPatchStride) {
- return packetWithPossibleZero(index);
- }
- const Index otherIndex = (NumDims == 5) ? 0 : indices[0] / m_fastOtherStride;
- eigen_assert(otherIndex == indices[1] / m_fastOtherStride);
-
- // Find the offset of the element wrt the location of the first element.
- const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth,
- (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth};
-
- const Index patch3DIndex = (NumDims == 5) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride;
- eigen_assert(patch3DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride);
-
- const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
- const Index colOffsets[2] = {
- patchOffsets[0] / m_fastColStride,
- patchOffsets[1] / m_fastColStride};
-
- // Calculate col indices in the original input tensor.
- const Index inputCols[2] = {
- colIndex * m_col_strides + colOffsets[0] - m_colPaddingLeft,
- colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft};
- if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) {
- return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
- }
-
- if (inputCols[0] != inputCols[1]) {
- return packetWithPossibleZero(index);
- }
-
- const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
- const Index rowOffsets[2] = {
- (patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride,
- (patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride};
- eigen_assert(rowOffsets[0] <= rowOffsets[1]);
- // Calculate col indices in the original input tensor.
- const Index inputRows[2] = {
- rowIndex * m_row_strides + rowOffsets[0] - m_rowPaddingTop,
- rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop};
-
- if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) {
- return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
- }
-
- if (inputRows[0] != inputRows[1]) {
- return packetWithPossibleZero(index);
- }
-
- const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex));
- const Index planeOffsets[2] = {
- patchOffsets[0] - colOffsets[0] * m_colStride - rowOffsets[0] * m_rowStride,
- patchOffsets[1] - colOffsets[1] * m_colStride - rowOffsets[1] * m_rowStride};
- eigen_assert(planeOffsets[0] <= planeOffsets[1]);
- const Index inputPlanes[2] = {
- planeIndex * m_plane_strides + planeOffsets[0] - m_planePaddingTop,
- planeIndex * m_plane_strides + planeOffsets[1] - m_planePaddingTop};
-
- if (inputPlanes[1] < 0 || inputPlanes[0] >= m_inputPlanes) {
- return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
- }
-
- if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) {
- // no padding
- const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
- const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
- const Index inputIndex = depth +
- inputRows[0] * m_rowInputStride +
- inputCols[0] * m_colInputStride +
- m_planeInputStride * inputPlanes[0] +
- otherIndex * m_otherInputStride;
- return m_impl.template packet<Unaligned>(inputIndex);
- }
-
- return packetWithPossibleZero(index);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
- costPerCoeff(bool vectorized) const {
- const double compute_cost =
- 10 * TensorOpCost::DivCost<Index>() + 21 * TensorOpCost::MulCost<Index>() +
- 8 * TensorOpCost::AddCost<Index>();
- return TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
-
- Index planePaddingTop() const { return m_planePaddingTop; }
- Index rowPaddingTop() const { return m_rowPaddingTop; }
- Index colPaddingLeft() const { return m_colPaddingLeft; }
- Index outputPlanes() const { return m_outputPlanes; }
- Index outputRows() const { return m_outputRows; }
- Index outputCols() const { return m_outputCols; }
- Index userPlaneStride() const { return m_plane_strides; }
- Index userRowStride() const { return m_row_strides; }
- Index userColStride() const { return m_col_strides; }
- Index userInPlaneStride() const { return m_in_plane_strides; }
- Index userInRowStride() const { return m_in_row_strides; }
- Index userInColStride() const { return m_in_col_strides; }
- Index planeInflateStride() const { return m_plane_inflate_strides; }
- Index rowInflateStride() const { return m_row_inflate_strides; }
- Index colInflateStride() const { return m_col_inflate_strides; }
-
- protected:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
- {
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = coeff(index+i);
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
-
- Dimensions m_dimensions;
-
- // Parameters passed to the costructor.
- Index m_plane_strides;
- Index m_row_strides;
- Index m_col_strides;
-
- Index m_outputPlanes;
- Index m_outputRows;
- Index m_outputCols;
-
- Index m_planePaddingTop;
- Index m_rowPaddingTop;
- Index m_colPaddingLeft;
-
- Index m_in_plane_strides;
- Index m_in_row_strides;
- Index m_in_col_strides;
-
- Index m_plane_inflate_strides;
- Index m_row_inflate_strides;
- Index m_col_inflate_strides;
-
- // Cached input size.
- Index m_inputDepth;
- Index m_inputPlanes;
- Index m_inputRows;
- Index m_inputCols;
-
- // Other cached variables.
- Index m_outputPlanesRows;
-
- // Effective input/patch post-inflation size.
- Index m_input_planes_eff;
- Index m_input_rows_eff;
- Index m_input_cols_eff;
- Index m_patch_planes_eff;
- Index m_patch_rows_eff;
- Index m_patch_cols_eff;
-
- // Strides for the output tensor.
- Index m_otherStride;
- Index m_patchStride;
- Index m_rowStride;
- Index m_colStride;
-
- // Strides for the input tensor.
- Index m_planeInputStride;
- Index m_rowInputStride;
- Index m_colInputStride;
- Index m_otherInputStride;
-
- internal::TensorIntDivisor<Index> m_fastOtherStride;
- internal::TensorIntDivisor<Index> m_fastPatchStride;
- internal::TensorIntDivisor<Index> m_fastColStride;
- internal::TensorIntDivisor<Index> m_fastRowStride;
- internal::TensorIntDivisor<Index> m_fastInputPlaneStride;
- internal::TensorIntDivisor<Index> m_fastInputRowStride;
- internal::TensorIntDivisor<Index> m_fastInputColStride;
- internal::TensorIntDivisor<Index> m_fastInputColsEff;
- internal::TensorIntDivisor<Index> m_fastOutputPlanesRows;
- internal::TensorIntDivisor<Index> m_fastOutputPlanes;
- internal::TensorIntDivisor<Index> m_fastOutputDepth;
-
- Scalar m_paddingValue;
-
- TensorEvaluator<ArgType, Device> m_impl;
-};
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h b/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h
deleted file mode 100644
index bc4f202..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h
+++ /dev/null
@@ -1,293 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
-#define EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
-
-namespace Eigen {
-
-class DynamicSGroup
-{
- public:
- inline explicit DynamicSGroup() : m_numIndices(1), m_elements(), m_generators(), m_globalFlags(0) { m_elements.push_back(ge(Generator(0, 0, 0))); }
- inline DynamicSGroup(const DynamicSGroup& o) : m_numIndices(o.m_numIndices), m_elements(o.m_elements), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { }
- inline DynamicSGroup(DynamicSGroup&& o) : m_numIndices(o.m_numIndices), m_elements(), m_generators(o.m_generators), m_globalFlags(o.m_globalFlags) { std::swap(m_elements, o.m_elements); }
- inline DynamicSGroup& operator=(const DynamicSGroup& o) { m_numIndices = o.m_numIndices; m_elements = o.m_elements; m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; }
- inline DynamicSGroup& operator=(DynamicSGroup&& o) { m_numIndices = o.m_numIndices; std::swap(m_elements, o.m_elements); m_generators = o.m_generators; m_globalFlags = o.m_globalFlags; return *this; }
-
- void add(int one, int two, int flags = 0);
-
- template<typename Gen_>
- inline void add(Gen_) { add(Gen_::One, Gen_::Two, Gen_::Flags); }
- inline void addSymmetry(int one, int two) { add(one, two, 0); }
- inline void addAntiSymmetry(int one, int two) { add(one, two, NegationFlag); }
- inline void addHermiticity(int one, int two) { add(one, two, ConjugationFlag); }
- inline void addAntiHermiticity(int one, int two) { add(one, two, NegationFlag | ConjugationFlag); }
-
- template<typename Op, typename RV, typename Index, std::size_t N, typename... Args>
- inline RV apply(const std::array<Index, N>& idx, RV initial, Args&&... args) const
- {
- eigen_assert(N >= m_numIndices && "Can only apply symmetry group to objects that have at least the required amount of indices.");
- for (std::size_t i = 0; i < size(); i++)
- initial = Op::run(h_permute(i, idx, typename internal::gen_numeric_list<int, N>::type()), m_elements[i].flags, initial, std::forward<Args>(args)...);
- return initial;
- }
-
- template<typename Op, typename RV, typename Index, typename... Args>
- inline RV apply(const std::vector<Index>& idx, RV initial, Args&&... args) const
- {
- eigen_assert(idx.size() >= m_numIndices && "Can only apply symmetry group to objects that have at least the required amount of indices.");
- for (std::size_t i = 0; i < size(); i++)
- initial = Op::run(h_permute(i, idx), m_elements[i].flags, initial, std::forward<Args>(args)...);
- return initial;
- }
-
- inline int globalFlags() const { return m_globalFlags; }
- inline std::size_t size() const { return m_elements.size(); }
-
- template<typename Tensor_, typename... IndexTypes>
- inline internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const
- {
- static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
- return operator()(tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices>{{firstIndex, otherIndices...}});
- }
-
- template<typename Tensor_>
- inline internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup> operator()(Tensor_& tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices> const& indices) const
- {
- return internal::tensor_symmetry_value_setter<Tensor_, DynamicSGroup>(tensor, *this, indices);
- }
- private:
- struct GroupElement {
- std::vector<int> representation;
- int flags;
- bool isId() const
- {
- for (std::size_t i = 0; i < representation.size(); i++)
- if (i != (size_t)representation[i])
- return false;
- return true;
- }
- };
- struct Generator {
- int one;
- int two;
- int flags;
- constexpr inline Generator(int one_, int two_, int flags_) : one(one_), two(two_), flags(flags_) {}
- };
-
- std::size_t m_numIndices;
- std::vector<GroupElement> m_elements;
- std::vector<Generator> m_generators;
- int m_globalFlags;
-
- template<typename Index, std::size_t N, int... n>
- inline std::array<Index, N> h_permute(std::size_t which, const std::array<Index, N>& idx, internal::numeric_list<int, n...>) const
- {
- return std::array<Index, N>{{ idx[n >= m_numIndices ? n : m_elements[which].representation[n]]... }};
- }
-
- template<typename Index>
- inline std::vector<Index> h_permute(std::size_t which, std::vector<Index> idx) const
- {
- std::vector<Index> result;
- result.reserve(idx.size());
- for (auto k : m_elements[which].representation)
- result.push_back(idx[k]);
- for (std::size_t i = m_numIndices; i < idx.size(); i++)
- result.push_back(idx[i]);
- return result;
- }
-
- inline GroupElement ge(Generator const& g) const
- {
- GroupElement result;
- result.representation.reserve(m_numIndices);
- result.flags = g.flags;
- for (std::size_t k = 0; k < m_numIndices; k++) {
- if (k == (std::size_t)g.one)
- result.representation.push_back(g.two);
- else if (k == (std::size_t)g.two)
- result.representation.push_back(g.one);
- else
- result.representation.push_back(int(k));
- }
- return result;
- }
-
- GroupElement mul(GroupElement, GroupElement) const;
- inline GroupElement mul(Generator g1, GroupElement g2) const
- {
- return mul(ge(g1), g2);
- }
-
- inline GroupElement mul(GroupElement g1, Generator g2) const
- {
- return mul(g1, ge(g2));
- }
-
- inline GroupElement mul(Generator g1, Generator g2) const
- {
- return mul(ge(g1), ge(g2));
- }
-
- inline int findElement(GroupElement e) const
- {
- for (auto ee : m_elements) {
- if (ee.representation == e.representation)
- return ee.flags ^ e.flags;
- }
- return -1;
- }
-
- void updateGlobalFlags(int flagDiffOfSameGenerator);
-};
-
-// dynamic symmetry group that auto-adds the template parameters in the constructor
-template<typename... Gen>
-class DynamicSGroupFromTemplateArgs : public DynamicSGroup
-{
- public:
- inline DynamicSGroupFromTemplateArgs() : DynamicSGroup()
- {
- add_all(internal::type_list<Gen...>());
- }
- inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs const& other) : DynamicSGroup(other) { }
- inline DynamicSGroupFromTemplateArgs(DynamicSGroupFromTemplateArgs&& other) : DynamicSGroup(other) { }
- inline DynamicSGroupFromTemplateArgs<Gen...>& operator=(const DynamicSGroupFromTemplateArgs<Gen...>& o) { DynamicSGroup::operator=(o); return *this; }
- inline DynamicSGroupFromTemplateArgs<Gen...>& operator=(DynamicSGroupFromTemplateArgs<Gen...>&& o) { DynamicSGroup::operator=(o); return *this; }
-
- private:
- template<typename Gen1, typename... GenNext>
- inline void add_all(internal::type_list<Gen1, GenNext...>)
- {
- add(Gen1());
- add_all(internal::type_list<GenNext...>());
- }
-
- inline void add_all(internal::type_list<>)
- {
- }
-};
-
-inline DynamicSGroup::GroupElement DynamicSGroup::mul(GroupElement g1, GroupElement g2) const
-{
- eigen_internal_assert(g1.representation.size() == m_numIndices);
- eigen_internal_assert(g2.representation.size() == m_numIndices);
-
- GroupElement result;
- result.representation.reserve(m_numIndices);
- for (std::size_t i = 0; i < m_numIndices; i++) {
- int v = g2.representation[g1.representation[i]];
- eigen_assert(v >= 0);
- result.representation.push_back(v);
- }
- result.flags = g1.flags ^ g2.flags;
- return result;
-}
-
-inline void DynamicSGroup::add(int one, int two, int flags)
-{
- eigen_assert(one >= 0);
- eigen_assert(two >= 0);
- eigen_assert(one != two);
-
- if ((std::size_t)one >= m_numIndices || (std::size_t)two >= m_numIndices) {
- std::size_t newNumIndices = (one > two) ? one : two + 1;
- for (auto& gelem : m_elements) {
- gelem.representation.reserve(newNumIndices);
- for (std::size_t i = m_numIndices; i < newNumIndices; i++)
- gelem.representation.push_back(i);
- }
- m_numIndices = newNumIndices;
- }
-
- Generator g{one, two, flags};
- GroupElement e = ge(g);
-
- /* special case for first generator */
- if (m_elements.size() == 1) {
- while (!e.isId()) {
- m_elements.push_back(e);
- e = mul(e, g);
- }
-
- if (e.flags > 0)
- updateGlobalFlags(e.flags);
-
- // only add in case we didn't have identity
- if (m_elements.size() > 1)
- m_generators.push_back(g);
- return;
- }
-
- int p = findElement(e);
- if (p >= 0) {
- updateGlobalFlags(p);
- return;
- }
-
- std::size_t coset_order = m_elements.size();
- m_elements.push_back(e);
- for (std::size_t i = 1; i < coset_order; i++)
- m_elements.push_back(mul(m_elements[i], e));
- m_generators.push_back(g);
-
- std::size_t coset_rep = coset_order;
- do {
- for (auto g : m_generators) {
- e = mul(m_elements[coset_rep], g);
- p = findElement(e);
- if (p < 0) {
- // element not yet in group
- m_elements.push_back(e);
- for (std::size_t i = 1; i < coset_order; i++)
- m_elements.push_back(mul(m_elements[i], e));
- } else if (p > 0) {
- updateGlobalFlags(p);
- }
- }
- coset_rep += coset_order;
- } while (coset_rep < m_elements.size());
-}
-
-inline void DynamicSGroup::updateGlobalFlags(int flagDiffOfSameGenerator)
-{
- switch (flagDiffOfSameGenerator) {
- case 0:
- default:
- // nothing happened
- break;
- case NegationFlag:
- // every element is it's own negative => whole tensor is zero
- m_globalFlags |= GlobalZeroFlag;
- break;
- case ConjugationFlag:
- // every element is it's own conjugate => whole tensor is real
- m_globalFlags |= GlobalRealFlag;
- break;
- case (NegationFlag | ConjugationFlag):
- // every element is it's own negative conjugate => whole tensor is imaginary
- m_globalFlags |= GlobalImagFlag;
- break;
- /* NOTE:
- * since GlobalZeroFlag == GlobalRealFlag | GlobalImagFlag, if one generator
- * causes the tensor to be real and the next one to be imaginary, this will
- * trivially give the correct result
- */
- }
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSORSYMMETRY_DYNAMICSYMMETRY_H
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */
diff --git a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h b/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h
deleted file mode 100644
index 942293b..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h
+++ /dev/null
@@ -1,236 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
-#define EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
-
-namespace Eigen {
-
-namespace internal {
-
-template<typename list> struct tensor_static_symgroup_permutate;
-
-template<int... nn>
-struct tensor_static_symgroup_permutate<numeric_list<int, nn...>>
-{
- constexpr static std::size_t N = sizeof...(nn);
-
- template<typename T>
- constexpr static inline std::array<T, N> run(const std::array<T, N>& indices)
- {
- return {{indices[nn]...}};
- }
-};
-
-template<typename indices_, int flags_>
-struct tensor_static_symgroup_element
-{
- typedef indices_ indices;
- constexpr static int flags = flags_;
-};
-
-template<typename Gen, int N>
-struct tensor_static_symgroup_element_ctor
-{
- typedef tensor_static_symgroup_element<
- typename gen_numeric_list_swapped_pair<int, N, Gen::One, Gen::Two>::type,
- Gen::Flags
- > type;
-};
-
-template<int N>
-struct tensor_static_symgroup_identity_ctor
-{
- typedef tensor_static_symgroup_element<
- typename gen_numeric_list<int, N>::type,
- 0
- > type;
-};
-
-template<typename iib>
-struct tensor_static_symgroup_multiply_helper
-{
- template<int... iia>
- constexpr static inline numeric_list<int, get<iia, iib>::value...> helper(numeric_list<int, iia...>) {
- return numeric_list<int, get<iia, iib>::value...>();
- }
-};
-
-template<typename A, typename B>
-struct tensor_static_symgroup_multiply
-{
- private:
- typedef typename A::indices iia;
- typedef typename B::indices iib;
- constexpr static int ffa = A::flags;
- constexpr static int ffb = B::flags;
-
- public:
- static_assert(iia::count == iib::count, "Cannot multiply symmetry elements with different number of indices.");
-
- typedef tensor_static_symgroup_element<
- decltype(tensor_static_symgroup_multiply_helper<iib>::helper(iia())),
- ffa ^ ffb
- > type;
-};
-
-template<typename A, typename B>
-struct tensor_static_symgroup_equality
-{
- typedef typename A::indices iia;
- typedef typename B::indices iib;
- constexpr static int ffa = A::flags;
- constexpr static int ffb = B::flags;
- static_assert(iia::count == iib::count, "Cannot compare symmetry elements with different number of indices.");
-
- constexpr static bool value = is_same<iia, iib>::value;
-
- private:
- /* this should be zero if they are identical, or else the tensor
- * will be forced to be pure real, pure imaginary or even pure zero
- */
- constexpr static int flags_cmp_ = ffa ^ ffb;
-
- /* either they are not equal, then we don't care whether the flags
- * match, or they are equal, and then we have to check
- */
- constexpr static bool is_zero = value && flags_cmp_ == NegationFlag;
- constexpr static bool is_real = value && flags_cmp_ == ConjugationFlag;
- constexpr static bool is_imag = value && flags_cmp_ == (NegationFlag | ConjugationFlag);
-
- public:
- constexpr static int global_flags =
- (is_real ? GlobalRealFlag : 0) |
- (is_imag ? GlobalImagFlag : 0) |
- (is_zero ? GlobalZeroFlag : 0);
-};
-
-template<std::size_t NumIndices, typename... Gen>
-struct tensor_static_symgroup
-{
- typedef StaticSGroup<Gen...> type;
- constexpr static std::size_t size = type::static_size;
-};
-
-template<typename Index, std::size_t N, int... ii, int... jj>
-constexpr static inline std::array<Index, N> tensor_static_symgroup_index_permute(std::array<Index, N> idx, internal::numeric_list<int, ii...>, internal::numeric_list<int, jj...>)
-{
- return {{ idx[ii]..., idx[jj]... }};
-}
-
-template<typename Index, int... ii>
-static inline std::vector<Index> tensor_static_symgroup_index_permute(std::vector<Index> idx, internal::numeric_list<int, ii...>)
-{
- std::vector<Index> result{{ idx[ii]... }};
- std::size_t target_size = idx.size();
- for (std::size_t i = result.size(); i < target_size; i++)
- result.push_back(idx[i]);
- return result;
-}
-
-template<typename T> struct tensor_static_symgroup_do_apply;
-
-template<typename first, typename... next>
-struct tensor_static_symgroup_do_apply<internal::type_list<first, next...>>
-{
- template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, std::size_t NumIndices, typename... Args>
- static inline RV run(const std::array<Index, NumIndices>& idx, RV initial, Args&&... args)
- {
- static_assert(NumIndices >= SGNumIndices, "Can only apply symmetry group to objects that have at least the required amount of indices.");
- typedef typename internal::gen_numeric_list<int, NumIndices - SGNumIndices, SGNumIndices>::type remaining_indices;
- initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices(), remaining_indices()), first::flags, initial, std::forward<Args>(args)...);
- return tensor_static_symgroup_do_apply<internal::type_list<next...>>::template run<Op, RV, SGNumIndices>(idx, initial, args...);
- }
-
- template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, typename... Args>
- static inline RV run(const std::vector<Index>& idx, RV initial, Args&&... args)
- {
- eigen_assert(idx.size() >= SGNumIndices && "Can only apply symmetry group to objects that have at least the required amount of indices.");
- initial = Op::run(tensor_static_symgroup_index_permute(idx, typename first::indices()), first::flags, initial, std::forward<Args>(args)...);
- return tensor_static_symgroup_do_apply<internal::type_list<next...>>::template run<Op, RV, SGNumIndices>(idx, initial, args...);
- }
-};
-
-template<EIGEN_TPL_PP_SPEC_HACK_DEF(typename, empty)>
-struct tensor_static_symgroup_do_apply<internal::type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>>
-{
- template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, std::size_t NumIndices, typename... Args>
- static inline RV run(const std::array<Index, NumIndices>&, RV initial, Args&&...)
- {
- // do nothing
- return initial;
- }
-
- template<typename Op, typename RV, std::size_t SGNumIndices, typename Index, typename... Args>
- static inline RV run(const std::vector<Index>&, RV initial, Args&&...)
- {
- // do nothing
- return initial;
- }
-};
-
-} // end namespace internal
-
-template<typename... Gen>
-class StaticSGroup
-{
- constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices<Gen...>::value;
- typedef internal::group_theory::enumerate_group_elements<
- internal::tensor_static_symgroup_multiply,
- internal::tensor_static_symgroup_equality,
- typename internal::tensor_static_symgroup_identity_ctor<NumIndices>::type,
- internal::type_list<typename internal::tensor_static_symgroup_element_ctor<Gen, NumIndices>::type...>
- > group_elements;
- typedef typename group_elements::type ge;
- public:
- constexpr inline StaticSGroup() {}
- constexpr inline StaticSGroup(const StaticSGroup<Gen...>&) {}
- constexpr inline StaticSGroup(StaticSGroup<Gen...>&&) {}
-
- template<typename Op, typename RV, typename Index, std::size_t N, typename... Args>
- static inline RV apply(const std::array<Index, N>& idx, RV initial, Args&&... args)
- {
- return internal::tensor_static_symgroup_do_apply<ge>::template run<Op, RV, NumIndices>(idx, initial, args...);
- }
-
- template<typename Op, typename RV, typename Index, typename... Args>
- static inline RV apply(const std::vector<Index>& idx, RV initial, Args&&... args)
- {
- eigen_assert(idx.size() == NumIndices);
- return internal::tensor_static_symgroup_do_apply<ge>::template run<Op, RV, NumIndices>(idx, initial, args...);
- }
-
- constexpr static std::size_t static_size = ge::count;
-
- constexpr static inline std::size_t size() {
- return ge::count;
- }
- constexpr static inline int globalFlags() { return group_elements::global_flags; }
-
- template<typename Tensor_, typename... IndexTypes>
- inline internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>> operator()(Tensor_& tensor, typename Tensor_::Index firstIndex, IndexTypes... otherIndices) const
- {
- static_assert(sizeof...(otherIndices) + 1 == Tensor_::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
- return operator()(tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices>{{firstIndex, otherIndices...}});
- }
-
- template<typename Tensor_>
- inline internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>> operator()(Tensor_& tensor, std::array<typename Tensor_::Index, Tensor_::NumIndices> const& indices) const
- {
- return internal::tensor_symmetry_value_setter<Tensor_, StaticSGroup<Gen...>>(tensor, *this, indices);
- }
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSORSYMMETRY_STATICSYMMETRY_H
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */
diff --git a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h b/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h
deleted file mode 100644
index 879d6cd..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h
+++ /dev/null
@@ -1,338 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
-#define EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
-
-namespace Eigen {
-
-enum {
- NegationFlag = 0x01,
- ConjugationFlag = 0x02
-};
-
-enum {
- GlobalRealFlag = 0x01,
- GlobalImagFlag = 0x02,
- GlobalZeroFlag = 0x03
-};
-
-namespace internal {
-
-template<std::size_t NumIndices, typename... Sym> struct tensor_symmetry_pre_analysis;
-template<std::size_t NumIndices, typename... Sym> struct tensor_static_symgroup;
-template<bool instantiate, std::size_t NumIndices, typename... Sym> struct tensor_static_symgroup_if;
-template<typename Tensor_> struct tensor_symmetry_calculate_flags;
-template<typename Tensor_> struct tensor_symmetry_assign_value;
-template<typename... Sym> struct tensor_symmetry_num_indices;
-
-} // end namespace internal
-
-template<int One_, int Two_>
-struct Symmetry
-{
- static_assert(One_ != Two_, "Symmetries must cover distinct indices.");
- constexpr static int One = One_;
- constexpr static int Two = Two_;
- constexpr static int Flags = 0;
-};
-
-template<int One_, int Two_>
-struct AntiSymmetry
-{
- static_assert(One_ != Two_, "Symmetries must cover distinct indices.");
- constexpr static int One = One_;
- constexpr static int Two = Two_;
- constexpr static int Flags = NegationFlag;
-};
-
-template<int One_, int Two_>
-struct Hermiticity
-{
- static_assert(One_ != Two_, "Symmetries must cover distinct indices.");
- constexpr static int One = One_;
- constexpr static int Two = Two_;
- constexpr static int Flags = ConjugationFlag;
-};
-
-template<int One_, int Two_>
-struct AntiHermiticity
-{
- static_assert(One_ != Two_, "Symmetries must cover distinct indices.");
- constexpr static int One = One_;
- constexpr static int Two = Two_;
- constexpr static int Flags = ConjugationFlag | NegationFlag;
-};
-
-/** \class DynamicSGroup
- * \ingroup TensorSymmetry_Module
- *
- * \brief Dynamic symmetry group
- *
- * The %DynamicSGroup class represents a symmetry group that need not be known at
- * compile time. It is useful if one wants to support arbitrary run-time defineable
- * symmetries for tensors, but it is also instantiated if a symmetry group is defined
- * at compile time that would be either too large for the compiler to reasonably
- * generate (using templates to calculate this at compile time is very inefficient)
- * or that the compiler could generate the group but that it wouldn't make sense to
- * unroll the loop for setting coefficients anymore.
- */
-class DynamicSGroup;
-
-/** \internal
- *
- * \class DynamicSGroupFromTemplateArgs
- * \ingroup TensorSymmetry_Module
- *
- * \brief Dynamic symmetry group, initialized from template arguments
- *
- * This class is a child class of DynamicSGroup. It uses the template arguments
- * specified to initialize itself.
- */
-template<typename... Gen>
-class DynamicSGroupFromTemplateArgs;
-
-/** \class StaticSGroup
- * \ingroup TensorSymmetry_Module
- *
- * \brief Static symmetry group
- *
- * This class represents a symmetry group that is known and resolved completely
- * at compile time. Ideally, no run-time penalty is incurred compared to the
- * manual unrolling of the symmetry.
- *
- * <b><i>CAUTION:</i></b>
- *
- * Do not use this class directly for large symmetry groups. The compiler
- * may run into a limit, or segfault or in the very least will take a very,
- * very, very long time to compile the code. Use the SGroup class instead
- * if you want a static group. That class contains logic that will
- * automatically select the DynamicSGroup class instead if the symmetry
- * group becomes too large. (In that case, unrolling may not even be
- * beneficial.)
- */
-template<typename... Gen>
-class StaticSGroup;
-
-/** \class SGroup
- * \ingroup TensorSymmetry_Module
- *
- * \brief Symmetry group, initialized from template arguments
- *
- * This class represents a symmetry group whose generators are already
- * known at compile time. It may or may not be resolved at compile time,
- * depending on the estimated size of the group.
- *
- * \sa StaticSGroup
- * \sa DynamicSGroup
- */
-template<typename... Gen>
-class SGroup : public internal::tensor_symmetry_pre_analysis<internal::tensor_symmetry_num_indices<Gen...>::value, Gen...>::root_type
-{
- public:
- constexpr static std::size_t NumIndices = internal::tensor_symmetry_num_indices<Gen...>::value;
- typedef typename internal::tensor_symmetry_pre_analysis<NumIndices, Gen...>::root_type Base;
-
- // make standard constructors + assignment operators public
- inline SGroup() : Base() { }
- inline SGroup(const SGroup<Gen...>& other) : Base(other) { }
- inline SGroup(SGroup<Gen...>&& other) : Base(other) { }
- inline SGroup<Gen...>& operator=(const SGroup<Gen...>& other) { Base::operator=(other); return *this; }
- inline SGroup<Gen...>& operator=(SGroup<Gen...>&& other) { Base::operator=(other); return *this; }
-
- // all else is defined in the base class
-};
-
-namespace internal {
-
-template<typename... Sym> struct tensor_symmetry_num_indices
-{
- constexpr static std::size_t value = 1;
-};
-
-template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...>
-{
-private:
- constexpr static std::size_t One = static_cast<std::size_t>(One_);
- constexpr static std::size_t Two = static_cast<std::size_t>(Two_);
- constexpr static std::size_t Three = tensor_symmetry_num_indices<Sym...>::value;
-
- // don't use std::max, since it's not constexpr until C++14...
- constexpr static std::size_t maxOneTwoPlusOne = ((One > Two) ? One : Two) + 1;
-public:
- constexpr static std::size_t value = (maxOneTwoPlusOne > Three) ? maxOneTwoPlusOne : Three;
-};
-
-template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<AntiSymmetry<One_, Two_>, Sym...>
- : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {};
-template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<Hermiticity<One_, Two_>, Sym...>
- : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {};
-template<int One_, int Two_, typename... Sym> struct tensor_symmetry_num_indices<AntiHermiticity<One_, Two_>, Sym...>
- : public tensor_symmetry_num_indices<Symmetry<One_, Two_>, Sym...> {};
-
-/** \internal
- *
- * \class tensor_symmetry_pre_analysis
- * \ingroup TensorSymmetry_Module
- *
- * \brief Pre-select whether to use a static or dynamic symmetry group
- *
- * When a symmetry group could in principle be determined at compile time,
- * this template implements the logic whether to actually do that or whether
- * to rather defer that to runtime.
- *
- * The logic is as follows:
- * <dl>
- * <dt><b>No generators (trivial symmetry):</b></dt>
- * <dd>Use a trivial static group. Ideally, this has no performance impact
- * compared to not using symmetry at all. In practice, this might not
- * be the case.</dd>
- * <dt><b>More than 4 generators:</b></dt>
- * <dd>Calculate the group at run time, it is likely far too large for the
- * compiler to be able to properly generate it in a realistic time.</dd>
- * <dt><b>Up to and including 4 generators:</b></dt>
- * <dd>Actually enumerate all group elements, but then check how many there
- * are. If there are more than 16, it is unlikely that unrolling the
- * loop (as is done in the static compile-time case) is sensible, so
- * use a dynamic group instead. If there are at most 16 elements, actually
- * use that static group. Note that the largest group with 4 generators
- * still compiles with reasonable resources.</dd>
- * </dl>
- *
- * Note: Example compile time performance with g++-4.6 on an Intenl Core i5-3470
- * with 16 GiB RAM (all generators non-redundant and the subgroups don't
- * factorize):
- *
- * # Generators -O0 -ggdb -O2
- * -------------------------------------------------------------------
- * 1 0.5 s / 250 MiB 0.45s / 230 MiB
- * 2 0.5 s / 260 MiB 0.5 s / 250 MiB
- * 3 0.65s / 310 MiB 0.62s / 310 MiB
- * 4 2.2 s / 860 MiB 1.7 s / 770 MiB
- * 5 130 s / 13000 MiB 120 s / 11000 MiB
- *
- * It is clear that everything is still very efficient up to 4 generators, then
- * the memory and CPU requirements become unreasonable. Thus we only instantiate
- * the template group theory logic if the number of generators supplied is 4 or
- * lower, otherwise this will be forced to be done during runtime, where the
- * algorithm is reasonably fast.
- */
-template<std::size_t NumIndices>
-struct tensor_symmetry_pre_analysis<NumIndices>
-{
- typedef StaticSGroup<> root_type;
-};
-
-template<std::size_t NumIndices, typename Gen_, typename... Gens_>
-struct tensor_symmetry_pre_analysis<NumIndices, Gen_, Gens_...>
-{
- constexpr static std::size_t max_static_generators = 4;
- constexpr static std::size_t max_static_elements = 16;
- typedef tensor_static_symgroup_if<(sizeof...(Gens_) + 1 <= max_static_generators), NumIndices, Gen_, Gens_...> helper;
- constexpr static std::size_t possible_size = helper::size;
-
- typedef typename conditional<
- possible_size == 0 || possible_size >= max_static_elements,
- DynamicSGroupFromTemplateArgs<Gen_, Gens_...>,
- typename helper::type
- >::type root_type;
-};
-
-template<bool instantiate, std::size_t NumIndices, typename... Gens>
-struct tensor_static_symgroup_if
-{
- constexpr static std::size_t size = 0;
- typedef void type;
-};
-
-template<std::size_t NumIndices, typename... Gens>
-struct tensor_static_symgroup_if<true, NumIndices, Gens...> : tensor_static_symgroup<NumIndices, Gens...> {};
-
-template<typename Tensor_>
-struct tensor_symmetry_assign_value
-{
- typedef typename Tensor_::Index Index;
- typedef typename Tensor_::Scalar Scalar;
- constexpr static std::size_t NumIndices = Tensor_::NumIndices;
-
- static inline int run(const std::array<Index, NumIndices>& transformed_indices, int transformation_flags, int dummy, Tensor_& tensor, const Scalar& value_)
- {
- Scalar value(value_);
- if (transformation_flags & ConjugationFlag)
- value = numext::conj(value);
- if (transformation_flags & NegationFlag)
- value = -value;
- tensor.coeffRef(transformed_indices) = value;
- return dummy;
- }
-};
-
-template<typename Tensor_>
-struct tensor_symmetry_calculate_flags
-{
- typedef typename Tensor_::Index Index;
- constexpr static std::size_t NumIndices = Tensor_::NumIndices;
-
- static inline int run(const std::array<Index, NumIndices>& transformed_indices, int transform_flags, int current_flags, const std::array<Index, NumIndices>& orig_indices)
- {
- if (transformed_indices == orig_indices) {
- if (transform_flags & (ConjugationFlag | NegationFlag))
- return current_flags | GlobalImagFlag; // anti-hermitian diagonal
- else if (transform_flags & ConjugationFlag)
- return current_flags | GlobalRealFlag; // hermitian diagonal
- else if (transform_flags & NegationFlag)
- return current_flags | GlobalZeroFlag; // anti-symmetric diagonal
- }
- return current_flags;
- }
-};
-
-template<typename Tensor_, typename Symmetry_, int Flags = 0>
-class tensor_symmetry_value_setter
-{
- public:
- typedef typename Tensor_::Index Index;
- typedef typename Tensor_::Scalar Scalar;
- constexpr static std::size_t NumIndices = Tensor_::NumIndices;
-
- inline tensor_symmetry_value_setter(Tensor_& tensor, Symmetry_ const& symmetry, std::array<Index, NumIndices> const& indices)
- : m_tensor(tensor), m_symmetry(symmetry), m_indices(indices) { }
-
- inline tensor_symmetry_value_setter<Tensor_, Symmetry_, Flags>& operator=(Scalar const& value)
- {
- doAssign(value);
- return *this;
- }
- private:
- Tensor_& m_tensor;
- Symmetry_ m_symmetry;
- std::array<Index, NumIndices> m_indices;
-
- inline void doAssign(Scalar const& value)
- {
- #ifdef EIGEN_TENSOR_SYMMETRY_CHECK_VALUES
- int value_flags = m_symmetry.template apply<internal::tensor_symmetry_calculate_flags<Tensor_>, int>(m_indices, m_symmetry.globalFlags(), m_indices);
- if (value_flags & GlobalRealFlag)
- eigen_assert(numext::imag(value) == 0);
- if (value_flags & GlobalImagFlag)
- eigen_assert(numext::real(value) == 0);
- #endif
- m_symmetry.template apply<internal::tensor_symmetry_assign_value<Tensor_>, int>(m_indices, 0, m_tensor, value);
- }
-};
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSORSYMMETRY_SYMMETRY_H
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */
diff --git a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h b/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h
deleted file mode 100644
index 5e97d07..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h
+++ /dev/null
@@ -1,669 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
-#define EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
-
-namespace Eigen {
-
-namespace internal {
-
-namespace group_theory {
-
-/** \internal
- * \file CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h
- * This file contains C++ templates that implement group theory algorithms.
- *
- * The algorithms allow for a compile-time analysis of finite groups.
- *
- * Currently only Dimino's algorithm is implemented, which returns a list
- * of all elements in a group given a set of (possibly redundant) generators.
- * (One could also do that with the so-called orbital algorithm, but that
- * is much more expensive and usually has no advantages.)
- */
-
-/**********************************************************************
- * "Ok kid, here is where it gets complicated."
- * - Amelia Pond in the "Doctor Who" episode
- * "The Big Bang"
- *
- * Dimino's algorithm
- * ==================
- *
- * The following is Dimino's algorithm in sequential form:
- *
- * Input: identity element, list of generators, equality check,
- * multiplication operation
- * Output: list of group elements
- *
- * 1. add identity element
- * 2. remove identities from list of generators
- * 3. add all powers of first generator that aren't the
- * identity element
- * 4. go through all remaining generators:
- * a. if generator is already in the list of elements
- * -> do nothing
- * b. otherwise
- * i. remember current # of elements
- * (i.e. the size of the current subgroup)
- * ii. add all current elements (which includes
- * the identity) each multiplied from right
- * with the current generator to the group
- * iii. add all remaining cosets that are generated
- * by products of the new generator with itself
- * and all other generators seen so far
- *
- * In functional form, this is implemented as a long set of recursive
- * templates that have a complicated relationship.
- *
- * The main interface for Dimino's algorithm is the template
- * enumerate_group_elements. All lists are implemented as variadic
- * type_list<typename...> and numeric_list<typename = int, int...>
- * templates.
- *
- * 'Calling' templates is usually done via typedefs.
- *
- * This algorithm is an extended version of the basic version. The
- * extension consists in the fact that each group element has a set
- * of flags associated with it. Multiplication of two group elements
- * with each other results in a group element whose flags are the
- * XOR of the flags of the previous elements. Each time the algorithm
- * notices that a group element it just calculated is already in the
- * list of current elements, the flags of both will be compared and
- * added to the so-called 'global flags' of the group.
- *
- * The rationale behind this extension is that this allows not only
- * for the description of symmetries between tensor indices, but
- * also allows for the description of hermiticity, antisymmetry and
- * antihermiticity. Negation and conjugation each are specific bit
- * in the flags value and if two different ways to reach a group
- * element lead to two different flags, this poses a constraint on
- * the allowed values of the resulting tensor. For example, if a
- * group element is reach both with and without the conjugation
- * flags, it is clear that the resulting tensor has to be real.
- *
- * Note that this flag mechanism is quite generic and may have other
- * uses beyond tensor properties.
- *
- * IMPORTANT:
- * This algorithm assumes the group to be finite. If you try to
- * run it with a group that's infinite, the algorithm will only
- * terminate once you hit a compiler limit (max template depth).
- * Also note that trying to use this implementation to create a
- * very large group will probably either make you hit the same
- * limit, cause the compiler to segfault or at the very least
- * take a *really* long time (hours, days, weeks - sic!) to
- * compile. It is not recommended to plug in more than 4
- * generators, unless they are independent of each other.
- */
-
-/** \internal
- *
- * \class strip_identities
- * \ingroup CXX11_TensorSymmetry_Module
- *
- * \brief Cleanse a list of group elements of the identity element
- *
- * This template is used to make a first pass through all initial
- * generators of Dimino's algorithm and remove the identity
- * elements.
- *
- * \sa enumerate_group_elements
- */
-template<template<typename, typename> class Equality, typename id, typename L> struct strip_identities;
-
-template<
- template<typename, typename> class Equality,
- typename id,
- typename t,
- typename... ts
->
-struct strip_identities<Equality, id, type_list<t, ts...>>
-{
- typedef typename conditional<
- Equality<id, t>::value,
- typename strip_identities<Equality, id, type_list<ts...>>::type,
- typename concat<type_list<t>, typename strip_identities<Equality, id, type_list<ts...>>::type>::type
- >::type type;
- constexpr static int global_flags = Equality<id, t>::global_flags | strip_identities<Equality, id, type_list<ts...>>::global_flags;
-};
-
-template<
- template<typename, typename> class Equality,
- typename id
- EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, ts)
->
-struct strip_identities<Equality, id, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(ts)>>
-{
- typedef type_list<> type;
- constexpr static int global_flags = 0;
-};
-
-/** \internal
- *
- * \class dimino_first_step_elements_helper
- * \ingroup CXX11_TensorSymmetry_Module
- *
- * \brief Recursive template that adds powers of the first generator to the list of group elements
- *
- * This template calls itself recursively to add powers of the first
- * generator to the list of group elements. It stops if it reaches
- * the identity element again.
- *
- * \sa enumerate_group_elements, dimino_first_step_elements
- */
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename g,
- typename current_element,
- typename elements,
- bool dont_add_current_element // = false
->
-struct dimino_first_step_elements_helper
-#ifndef EIGEN_PARSED_BY_DOXYGEN
- : // recursive inheritance is too difficult for Doxygen
- public dimino_first_step_elements_helper<
- Multiply,
- Equality,
- id,
- g,
- typename Multiply<current_element, g>::type,
- typename concat<elements, type_list<current_element>>::type,
- Equality<typename Multiply<current_element, g>::type, id>::value
- > {};
-
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename g,
- typename current_element,
- typename elements
->
-struct dimino_first_step_elements_helper<Multiply, Equality, id, g, current_element, elements, true>
-#endif // EIGEN_PARSED_BY_DOXYGEN
-{
- typedef elements type;
- constexpr static int global_flags = Equality<current_element, id>::global_flags;
-};
-
-/** \internal
- *
- * \class dimino_first_step_elements
- * \ingroup CXX11_TensorSymmetry_Module
- *
- * \brief Add all powers of the first generator to the list of group elements
- *
- * This template takes the first non-identity generator and generates the initial
- * list of elements which consists of all powers of that generator. For a group
- * with just one generated, it would be enumerated after this.
- *
- * \sa enumerate_group_elements
- */
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename generators
->
-struct dimino_first_step_elements
-{
- typedef typename get<0, generators>::type first_generator;
- typedef typename skip<1, generators>::type next_generators;
- typedef type_list<first_generator> generators_done;
-
- typedef dimino_first_step_elements_helper<
- Multiply,
- Equality,
- id,
- first_generator,
- first_generator,
- type_list<id>,
- false
- > helper;
- typedef typename helper::type type;
- constexpr static int global_flags = helper::global_flags;
-};
-
-/** \internal
- *
- * \class dimino_get_coset_elements
- * \ingroup CXX11_TensorSymmetry_Module
- *
- * \brief Generate all elements of a specific coset
- *
- * This template generates all the elements of a specific coset by
- * multiplying all elements in the given subgroup with the new
- * coset representative. Note that the first element of the
- * subgroup is always the identity element, so the first element of
- * ther result of this template is going to be the coset
- * representative itself.
- *
- * Note that this template accepts an additional boolean parameter
- * that specifies whether to actually generate the coset (true) or
- * just return an empty list (false).
- *
- * \sa enumerate_group_elements, dimino_add_cosets_for_rep
- */
-template<
- template<typename, typename> class Multiply,
- typename sub_group_elements,
- typename new_coset_rep,
- bool generate_coset // = true
->
-struct dimino_get_coset_elements
-{
- typedef typename apply_op_from_right<Multiply, new_coset_rep, sub_group_elements>::type type;
-};
-
-template<
- template<typename, typename> class Multiply,
- typename sub_group_elements,
- typename new_coset_rep
->
-struct dimino_get_coset_elements<Multiply, sub_group_elements, new_coset_rep, false>
-{
- typedef type_list<> type;
-};
-
-/** \internal
- *
- * \class dimino_add_cosets_for_rep
- * \ingroup CXX11_TensorSymmetry_Module
- *
- * \brief Recursive template for adding coset spaces
- *
- * This template multiplies the coset representative with a generator
- * from the list of previous generators. If the new element is not in
- * the group already, it adds the corresponding coset. Finally it
- * proceeds to call itself with the next generator from the list.
- *
- * \sa enumerate_group_elements, dimino_add_all_coset_spaces
- */
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename sub_group_elements,
- typename elements,
- typename generators,
- typename rep_element,
- int sub_group_size
->
-struct dimino_add_cosets_for_rep;
-
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename sub_group_elements,
- typename elements,
- typename g,
- typename... gs,
- typename rep_element,
- int sub_group_size
->
-struct dimino_add_cosets_for_rep<Multiply, Equality, id, sub_group_elements, elements, type_list<g, gs...>, rep_element, sub_group_size>
-{
- typedef typename Multiply<rep_element, g>::type new_coset_rep;
- typedef contained_in_list_gf<Equality, new_coset_rep, elements> _cil;
- constexpr static bool add_coset = !_cil::value;
-
- typedef typename dimino_get_coset_elements<
- Multiply,
- sub_group_elements,
- new_coset_rep,
- add_coset
- >::type coset_elements;
-
- typedef dimino_add_cosets_for_rep<
- Multiply,
- Equality,
- id,
- sub_group_elements,
- typename concat<elements, coset_elements>::type,
- type_list<gs...>,
- rep_element,
- sub_group_size
- > _helper;
-
- typedef typename _helper::type type;
- constexpr static int global_flags = _cil::global_flags | _helper::global_flags;
-
- /* Note that we don't have to update global flags here, since
- * we will only add these elements if they are not part of
- * the group already. But that only happens if the coset rep
- * is not already in the group, so the check for the coset rep
- * will catch this.
- */
-};
-
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename sub_group_elements,
- typename elements
- EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty),
- typename rep_element,
- int sub_group_size
->
-struct dimino_add_cosets_for_rep<Multiply, Equality, id, sub_group_elements, elements, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, rep_element, sub_group_size>
-{
- typedef elements type;
- constexpr static int global_flags = 0;
-};
-
-/** \internal
- *
- * \class dimino_add_all_coset_spaces
- * \ingroup CXX11_TensorSymmetry_Module
- *
- * \brief Recursive template for adding all coset spaces for a new generator
- *
- * This template tries to go through the list of generators (with
- * the help of the dimino_add_cosets_for_rep template) as long as
- * it still finds elements that are not part of the group and add
- * the corresponding cosets.
- *
- * \sa enumerate_group_elements, dimino_add_cosets_for_rep
- */
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename sub_group_elements,
- typename elements,
- typename generators,
- int sub_group_size,
- int rep_pos,
- bool stop_condition // = false
->
-struct dimino_add_all_coset_spaces
-{
- typedef typename get<rep_pos, elements>::type rep_element;
- typedef dimino_add_cosets_for_rep<
- Multiply,
- Equality,
- id,
- sub_group_elements,
- elements,
- generators,
- rep_element,
- sub_group_elements::count
- > _ac4r;
- typedef typename _ac4r::type new_elements;
-
- constexpr static int new_rep_pos = rep_pos + sub_group_elements::count;
- constexpr static bool new_stop_condition = new_rep_pos >= new_elements::count;
-
- typedef dimino_add_all_coset_spaces<
- Multiply,
- Equality,
- id,
- sub_group_elements,
- new_elements,
- generators,
- sub_group_size,
- new_rep_pos,
- new_stop_condition
- > _helper;
-
- typedef typename _helper::type type;
- constexpr static int global_flags = _helper::global_flags | _ac4r::global_flags;
-};
-
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename sub_group_elements,
- typename elements,
- typename generators,
- int sub_group_size,
- int rep_pos
->
-struct dimino_add_all_coset_spaces<Multiply, Equality, id, sub_group_elements, elements, generators, sub_group_size, rep_pos, true>
-{
- typedef elements type;
- constexpr static int global_flags = 0;
-};
-
-/** \internal
- *
- * \class dimino_add_generator
- * \ingroup CXX11_TensorSymmetry_Module
- *
- * \brief Enlarge the group by adding a new generator.
- *
- * It accepts a boolean parameter that determines if the generator is redundant,
- * i.e. was already seen in the group. In that case, it reduces to a no-op.
- *
- * \sa enumerate_group_elements, dimino_add_all_coset_spaces
- */
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename elements,
- typename generators_done,
- typename current_generator,
- bool redundant // = false
->
-struct dimino_add_generator
-{
- /* this template is only called if the generator is not redundant
- * => all elements of the group multiplied with the new generator
- * are going to be new elements of the most trivial coset space
- */
- typedef typename apply_op_from_right<Multiply, current_generator, elements>::type multiplied_elements;
- typedef typename concat<elements, multiplied_elements>::type new_elements;
-
- constexpr static int rep_pos = elements::count;
-
- typedef dimino_add_all_coset_spaces<
- Multiply,
- Equality,
- id,
- elements, // elements of previous subgroup
- new_elements,
- typename concat<generators_done, type_list<current_generator>>::type,
- elements::count, // size of previous subgroup
- rep_pos,
- false // don't stop (because rep_pos >= new_elements::count is always false at this point)
- > _helper;
- typedef typename _helper::type type;
- constexpr static int global_flags = _helper::global_flags;
-};
-
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename elements,
- typename generators_done,
- typename current_generator
->
-struct dimino_add_generator<Multiply, Equality, id, elements, generators_done, current_generator, true>
-{
- // redundant case
- typedef elements type;
- constexpr static int global_flags = 0;
-};
-
-/** \internal
- *
- * \class dimino_add_remaining_generators
- * \ingroup CXX11_TensorSymmetry_Module
- *
- * \brief Recursive template that adds all remaining generators to a group
- *
- * Loop through the list of generators that remain and successively
- * add them to the group.
- *
- * \sa enumerate_group_elements, dimino_add_generator
- */
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename generators_done,
- typename remaining_generators,
- typename elements
->
-struct dimino_add_remaining_generators
-{
- typedef typename get<0, remaining_generators>::type first_generator;
- typedef typename skip<1, remaining_generators>::type next_generators;
-
- typedef contained_in_list_gf<Equality, first_generator, elements> _cil;
-
- typedef dimino_add_generator<
- Multiply,
- Equality,
- id,
- elements,
- generators_done,
- first_generator,
- _cil::value
- > _helper;
-
- typedef typename _helper::type new_elements;
-
- typedef dimino_add_remaining_generators<
- Multiply,
- Equality,
- id,
- typename concat<generators_done, type_list<first_generator>>::type,
- next_generators,
- new_elements
- > _next_iter;
-
- typedef typename _next_iter::type type;
- constexpr static int global_flags =
- _cil::global_flags |
- _helper::global_flags |
- _next_iter::global_flags;
-};
-
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename generators_done,
- typename elements
->
-struct dimino_add_remaining_generators<Multiply, Equality, id, generators_done, type_list<>, elements>
-{
- typedef elements type;
- constexpr static int global_flags = 0;
-};
-
-/** \internal
- *
- * \class enumerate_group_elements_noid
- * \ingroup CXX11_TensorSymmetry_Module
- *
- * \brief Helper template that implements group element enumeration
- *
- * This is a helper template that implements the actual enumeration
- * of group elements. This has been split so that the list of
- * generators can be cleansed of the identity element before
- * performing the actual operation.
- *
- * \sa enumerate_group_elements
- */
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename generators,
- int initial_global_flags = 0
->
-struct enumerate_group_elements_noid
-{
- typedef dimino_first_step_elements<Multiply, Equality, id, generators> first_step;
- typedef typename first_step::type first_step_elements;
-
- typedef dimino_add_remaining_generators<
- Multiply,
- Equality,
- id,
- typename first_step::generators_done,
- typename first_step::next_generators, // remaining_generators
- typename first_step::type // first_step elements
- > _helper;
-
- typedef typename _helper::type type;
- constexpr static int global_flags =
- initial_global_flags |
- first_step::global_flags |
- _helper::global_flags;
-};
-
-// in case when no generators are specified
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- int initial_global_flags
->
-struct enumerate_group_elements_noid<Multiply, Equality, id, type_list<>, initial_global_flags>
-{
- typedef type_list<id> type;
- constexpr static int global_flags = initial_global_flags;
-};
-
-/** \internal
- *
- * \class enumerate_group_elements
- * \ingroup CXX11_TensorSymmetry_Module
- *
- * \brief Enumerate all elements in a finite group
- *
- * This template enumerates all elements in a finite group. It accepts
- * the following template parameters:
- *
- * \tparam Multiply The multiplication operation that multiplies two group elements
- * with each other.
- * \tparam Equality The equality check operation that checks if two group elements
- * are equal to another.
- * \tparam id The identity element
- * \tparam _generators A list of (possibly redundant) generators of the group
- */
-template<
- template<typename, typename> class Multiply,
- template<typename, typename> class Equality,
- typename id,
- typename _generators
->
-struct enumerate_group_elements
- : public enumerate_group_elements_noid<
- Multiply,
- Equality,
- id,
- typename strip_identities<Equality, id, _generators>::type,
- strip_identities<Equality, id, _generators>::global_flags
- >
-{
-};
-
-} // end namespace group_theory
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSORSYMMETRY_TEMPLATEGROUPTHEORY_H
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */
diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
deleted file mode 100644
index 71d5555..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
+++ /dev/null
@@ -1,233 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
-#define EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
-
-namespace Eigen {
-
-// EventCount allows to wait for arbitrary predicates in non-blocking
-// algorithms. Think of condition variable, but wait predicate does not need to
-// be protected by a mutex. Usage:
-// Waiting thread does:
-//
-// if (predicate)
-// return act();
-// EventCount::Waiter& w = waiters[my_index];
-// ec.Prewait(&w);
-// if (predicate) {
-// ec.CancelWait(&w);
-// return act();
-// }
-// ec.CommitWait(&w);
-//
-// Notifying thread does:
-//
-// predicate = true;
-// ec.Notify(true);
-//
-// Notify is cheap if there are no waiting threads. Prewait/CommitWait are not
-// cheap, but they are executed only if the preceeding predicate check has
-// failed.
-//
-// Algorihtm outline:
-// There are two main variables: predicate (managed by user) and state_.
-// Operation closely resembles Dekker mutual algorithm:
-// https://en.wikipedia.org/wiki/Dekker%27s_algorithm
-// Waiting thread sets state_ then checks predicate, Notifying thread sets
-// predicate then checks state_. Due to seq_cst fences in between these
-// operations it is guaranteed than either waiter will see predicate change
-// and won't block, or notifying thread will see state_ change and will unblock
-// the waiter, or both. But it can't happen that both threads don't see each
-// other changes, which would lead to deadlock.
-class EventCount {
- public:
- class Waiter;
-
- EventCount(MaxSizeVector<Waiter>& waiters) : waiters_(waiters) {
- eigen_assert(waiters.size() < (1 << kWaiterBits) - 1);
- // Initialize epoch to something close to overflow to test overflow.
- state_ = kStackMask | (kEpochMask - kEpochInc * waiters.size() * 2);
- }
-
- ~EventCount() {
- // Ensure there are no waiters.
- eigen_assert((state_.load() & (kStackMask | kWaiterMask)) == kStackMask);
- }
-
- // Prewait prepares for waiting.
- // After calling this function the thread must re-check the wait predicate
- // and call either CancelWait or CommitWait passing the same Waiter object.
- void Prewait(Waiter* w) {
- w->epoch = state_.fetch_add(kWaiterInc, std::memory_order_relaxed);
- std::atomic_thread_fence(std::memory_order_seq_cst);
- }
-
- // CommitWait commits waiting.
- void CommitWait(Waiter* w) {
- w->state = Waiter::kNotSignaled;
- // Modification epoch of this waiter.
- uint64_t epoch =
- (w->epoch & kEpochMask) +
- (((w->epoch & kWaiterMask) >> kWaiterShift) << kEpochShift);
- uint64_t state = state_.load(std::memory_order_seq_cst);
- for (;;) {
- if (int64_t((state & kEpochMask) - epoch) < 0) {
- // The preceeding waiter has not decided on its fate. Wait until it
- // calls either CancelWait or CommitWait, or is notified.
- EIGEN_THREAD_YIELD();
- state = state_.load(std::memory_order_seq_cst);
- continue;
- }
- // We've already been notified.
- if (int64_t((state & kEpochMask) - epoch) > 0) return;
- // Remove this thread from prewait counter and add it to the waiter list.
- eigen_assert((state & kWaiterMask) != 0);
- uint64_t newstate = state - kWaiterInc + kEpochInc;
- newstate = (newstate & ~kStackMask) | (w - &waiters_[0]);
- if ((state & kStackMask) == kStackMask)
- w->next.store(nullptr, std::memory_order_relaxed);
- else
- w->next.store(&waiters_[state & kStackMask], std::memory_order_relaxed);
- if (state_.compare_exchange_weak(state, newstate,
- std::memory_order_release))
- break;
- }
- Park(w);
- }
-
- // CancelWait cancels effects of the previous Prewait call.
- void CancelWait(Waiter* w) {
- uint64_t epoch =
- (w->epoch & kEpochMask) +
- (((w->epoch & kWaiterMask) >> kWaiterShift) << kEpochShift);
- uint64_t state = state_.load(std::memory_order_relaxed);
- for (;;) {
- if (int64_t((state & kEpochMask) - epoch) < 0) {
- // The preceeding waiter has not decided on its fate. Wait until it
- // calls either CancelWait or CommitWait, or is notified.
- EIGEN_THREAD_YIELD();
- state = state_.load(std::memory_order_relaxed);
- continue;
- }
- // We've already been notified.
- if (int64_t((state & kEpochMask) - epoch) > 0) return;
- // Remove this thread from prewait counter.
- eigen_assert((state & kWaiterMask) != 0);
- if (state_.compare_exchange_weak(state, state - kWaiterInc + kEpochInc,
- std::memory_order_relaxed))
- return;
- }
- }
-
- // Notify wakes one or all waiting threads.
- // Must be called after changing the associated wait predicate.
- void Notify(bool all) {
- std::atomic_thread_fence(std::memory_order_seq_cst);
- uint64_t state = state_.load(std::memory_order_acquire);
- for (;;) {
- // Easy case: no waiters.
- if ((state & kStackMask) == kStackMask && (state & kWaiterMask) == 0)
- return;
- uint64_t waiters = (state & kWaiterMask) >> kWaiterShift;
- uint64_t newstate;
- if (all) {
- // Reset prewait counter and empty wait list.
- newstate = (state & kEpochMask) + (kEpochInc * waiters) + kStackMask;
- } else if (waiters) {
- // There is a thread in pre-wait state, unblock it.
- newstate = state + kEpochInc - kWaiterInc;
- } else {
- // Pop a waiter from list and unpark it.
- Waiter* w = &waiters_[state & kStackMask];
- Waiter* wnext = w->next.load(std::memory_order_relaxed);
- uint64_t next = kStackMask;
- if (wnext != nullptr) next = wnext - &waiters_[0];
- // Note: we don't add kEpochInc here. ABA problem on the lock-free stack
- // can't happen because a waiter is re-pushed onto the stack only after
- // it was in the pre-wait state which inevitably leads to epoch
- // increment.
- newstate = (state & kEpochMask) + next;
- }
- if (state_.compare_exchange_weak(state, newstate,
- std::memory_order_acquire)) {
- if (!all && waiters) return; // unblocked pre-wait thread
- if ((state & kStackMask) == kStackMask) return;
- Waiter* w = &waiters_[state & kStackMask];
- if (!all) w->next.store(nullptr, std::memory_order_relaxed);
- Unpark(w);
- return;
- }
- }
- }
-
- class Waiter {
- friend class EventCount;
- // Align to 128 byte boundary to prevent false sharing with other Waiter objects in the same vector.
- EIGEN_ALIGN_TO_BOUNDARY(128) std::atomic<Waiter*> next;
- std::mutex mu;
- std::condition_variable cv;
- uint64_t epoch;
- unsigned state;
- enum {
- kNotSignaled,
- kWaiting,
- kSignaled,
- };
- };
-
- private:
- // State_ layout:
- // - low kStackBits is a stack of waiters committed wait.
- // - next kWaiterBits is count of waiters in prewait state.
- // - next kEpochBits is modification counter.
- static const uint64_t kStackBits = 16;
- static const uint64_t kStackMask = (1ull << kStackBits) - 1;
- static const uint64_t kWaiterBits = 16;
- static const uint64_t kWaiterShift = 16;
- static const uint64_t kWaiterMask = ((1ull << kWaiterBits) - 1)
- << kWaiterShift;
- static const uint64_t kWaiterInc = 1ull << kWaiterBits;
- static const uint64_t kEpochBits = 32;
- static const uint64_t kEpochShift = 32;
- static const uint64_t kEpochMask = ((1ull << kEpochBits) - 1) << kEpochShift;
- static const uint64_t kEpochInc = 1ull << kEpochShift;
- std::atomic<uint64_t> state_;
- MaxSizeVector<Waiter>& waiters_;
-
- void Park(Waiter* w) {
- std::unique_lock<std::mutex> lock(w->mu);
- while (w->state != Waiter::kSignaled) {
- w->state = Waiter::kWaiting;
- w->cv.wait(lock);
- }
- }
-
- void Unpark(Waiter* waiters) {
- Waiter* next = nullptr;
- for (Waiter* w = waiters; w; w = next) {
- next = w->next.load(std::memory_order_relaxed);
- unsigned state;
- {
- std::unique_lock<std::mutex> lock(w->mu);
- state = w->state;
- w->state = Waiter::kSignaled;
- }
- // Avoid notifying if it wasn't waiting.
- if (state == Waiter::kWaiting) w->cv.notify_one();
- }
- }
-
- EventCount(const EventCount&) = delete;
- void operator=(const EventCount&) = delete;
-};
-
-} // namespace Eigen
-
-#endif // EIGEN_CXX11_THREADPOOL_EVENTCOUNT_H_
diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
deleted file mode 100644
index 354bce5..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ /dev/null
@@ -1,274 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
-#define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
-
-
-namespace Eigen {
-
-template <typename Environment>
-class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface {
- public:
- typedef typename Environment::Task Task;
- typedef RunQueue<Task, 1024> Queue;
-
- NonBlockingThreadPoolTempl(int num_threads, Environment env = Environment())
- : env_(env),
- threads_(num_threads),
- queues_(num_threads),
- coprimes_(num_threads),
- waiters_(num_threads),
- blocked_(0),
- spinning_(0),
- done_(false),
- ec_(waiters_) {
- waiters_.resize(num_threads);
-
- // Calculate coprimes of num_threads.
- // Coprimes are used for a random walk over all threads in Steal
- // and NonEmptyQueueIndex. Iteration is based on the fact that if we take
- // a walk starting thread index t and calculate num_threads - 1 subsequent
- // indices as (t + coprime) % num_threads, we will cover all threads without
- // repetitions (effectively getting a presudo-random permutation of thread
- // indices).
- for (int i = 1; i <= num_threads; i++) {
- unsigned a = i;
- unsigned b = num_threads;
- // If GCD(a, b) == 1, then a and b are coprimes.
- while (b != 0) {
- unsigned tmp = a;
- a = b;
- b = tmp % b;
- }
- if (a == 1) {
- coprimes_.push_back(i);
- }
- }
- for (int i = 0; i < num_threads; i++) {
- queues_.push_back(new Queue());
- }
- for (int i = 0; i < num_threads; i++) {
- threads_.push_back(env_.CreateThread([this, i]() { WorkerLoop(i); }));
- }
- }
-
- ~NonBlockingThreadPoolTempl() {
- done_ = true;
- // Now if all threads block without work, they will start exiting.
- // But note that threads can continue to work arbitrary long,
- // block, submit new work, unblock and otherwise live full life.
- ec_.Notify(true);
-
- // Join threads explicitly to avoid destruction order issues.
- for (size_t i = 0; i < threads_.size(); i++) delete threads_[i];
- for (size_t i = 0; i < threads_.size(); i++) delete queues_[i];
- }
-
- void Schedule(std::function<void()> fn) {
- Task t = env_.CreateTask(std::move(fn));
- PerThread* pt = GetPerThread();
- if (pt->pool == this) {
- // Worker thread of this pool, push onto the thread's queue.
- Queue* q = queues_[pt->thread_id];
- t = q->PushFront(std::move(t));
- } else {
- // A free-standing thread (or worker of another pool), push onto a random
- // queue.
- Queue* q = queues_[Rand(&pt->rand) % queues_.size()];
- t = q->PushBack(std::move(t));
- }
- // Note: below we touch this after making w available to worker threads.
- // Strictly speaking, this can lead to a racy-use-after-free. Consider that
- // Schedule is called from a thread that is neither main thread nor a worker
- // thread of this pool. Then, execution of w directly or indirectly
- // completes overall computations, which in turn leads to destruction of
- // this. We expect that such scenario is prevented by program, that is,
- // this is kept alive while any threads can potentially be in Schedule.
- if (!t.f)
- ec_.Notify(false);
- else
- env_.ExecuteTask(t); // Push failed, execute directly.
- }
-
- int NumThreads() const final {
- return static_cast<int>(threads_.size());
- }
-
- int CurrentThreadId() const final {
- const PerThread* pt =
- const_cast<NonBlockingThreadPoolTempl*>(this)->GetPerThread();
- if (pt->pool == this) {
- return pt->thread_id;
- } else {
- return -1;
- }
- }
-
- private:
- typedef typename Environment::EnvThread Thread;
-
- struct PerThread {
- constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) { }
- NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads.
- uint64_t rand; // Random generator state.
- int thread_id; // Worker thread index in pool.
- };
-
- Environment env_;
- MaxSizeVector<Thread*> threads_;
- MaxSizeVector<Queue*> queues_;
- MaxSizeVector<unsigned> coprimes_;
- MaxSizeVector<EventCount::Waiter> waiters_;
- std::atomic<unsigned> blocked_;
- std::atomic<bool> spinning_;
- std::atomic<bool> done_;
- EventCount ec_;
-
- // Main worker thread loop.
- void WorkerLoop(int thread_id) {
- PerThread* pt = GetPerThread();
- pt->pool = this;
- pt->rand = std::hash<std::thread::id>()(std::this_thread::get_id());
- pt->thread_id = thread_id;
- Queue* q = queues_[thread_id];
- EventCount::Waiter* waiter = &waiters_[thread_id];
- for (;;) {
- Task t = q->PopFront();
- if (!t.f) {
- t = Steal();
- if (!t.f) {
- // Leave one thread spinning. This reduces latency.
- // TODO(dvyukov): 1000 iterations is based on fair dice roll, tune it.
- // Also, the time it takes to attempt to steal work 1000 times depends
- // on the size of the thread pool. However the speed at which the user
- // of the thread pool submit tasks is independent of the size of the
- // pool. Consider a time based limit instead.
- if (!spinning_ && !spinning_.exchange(true)) {
- for (int i = 0; i < 1000 && !t.f; i++) {
- t = Steal();
- }
- spinning_ = false;
- }
- if (!t.f) {
- if (!WaitForWork(waiter, &t)) {
- return;
- }
- }
- }
- }
- if (t.f) {
- env_.ExecuteTask(t);
- }
- }
- }
-
- // Steal tries to steal work from other worker threads in best-effort manner.
- Task Steal() {
- PerThread* pt = GetPerThread();
- const size_t size = queues_.size();
- unsigned r = Rand(&pt->rand);
- unsigned inc = coprimes_[r % coprimes_.size()];
- unsigned victim = r % size;
- for (unsigned i = 0; i < size; i++) {
- Task t = queues_[victim]->PopBack();
- if (t.f) {
- return t;
- }
- victim += inc;
- if (victim >= size) {
- victim -= size;
- }
- }
- return Task();
- }
-
- // WaitForWork blocks until new work is available (returns true), or if it is
- // time to exit (returns false). Can optionally return a task to execute in t
- // (in such case t.f != nullptr on return).
- bool WaitForWork(EventCount::Waiter* waiter, Task* t) {
- eigen_assert(!t->f);
- // We already did best-effort emptiness check in Steal, so prepare for
- // blocking.
- ec_.Prewait(waiter);
- // Now do a reliable emptiness check.
- int victim = NonEmptyQueueIndex();
- if (victim != -1) {
- ec_.CancelWait(waiter);
- *t = queues_[victim]->PopBack();
- return true;
- }
- // Number of blocked threads is used as termination condition.
- // If we are shutting down and all worker threads blocked without work,
- // that's we are done.
- blocked_++;
- if (done_ && blocked_ == threads_.size()) {
- ec_.CancelWait(waiter);
- // Almost done, but need to re-check queues.
- // Consider that all queues are empty and all worker threads are preempted
- // right after incrementing blocked_ above. Now a free-standing thread
- // submits work and calls destructor (which sets done_). If we don't
- // re-check queues, we will exit leaving the work unexecuted.
- if (NonEmptyQueueIndex() != -1) {
- // Note: we must not pop from queues before we decrement blocked_,
- // otherwise the following scenario is possible. Consider that instead
- // of checking for emptiness we popped the only element from queues.
- // Now other worker threads can start exiting, which is bad if the
- // work item submits other work. So we just check emptiness here,
- // which ensures that all worker threads exit at the same time.
- blocked_--;
- return true;
- }
- // Reached stable termination state.
- ec_.Notify(true);
- return false;
- }
- ec_.CommitWait(waiter);
- blocked_--;
- return true;
- }
-
- int NonEmptyQueueIndex() {
- PerThread* pt = GetPerThread();
- const size_t size = queues_.size();
- unsigned r = Rand(&pt->rand);
- unsigned inc = coprimes_[r % coprimes_.size()];
- unsigned victim = r % size;
- for (unsigned i = 0; i < size; i++) {
- if (!queues_[victim]->Empty()) {
- return victim;
- }
- victim += inc;
- if (victim >= size) {
- victim -= size;
- }
- }
- return -1;
- }
-
- static EIGEN_STRONG_INLINE PerThread* GetPerThread() {
- EIGEN_THREAD_LOCAL PerThread per_thread_;
- PerThread* pt = &per_thread_;
- return pt;
- }
-
- static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) {
- uint64_t current = *state;
- // Update the internal state
- *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
- // Generate the random output (using the PCG-XSH-RS scheme)
- return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
- }
-};
-
-typedef NonBlockingThreadPoolTempl<StlThreadEnvironment> NonBlockingThreadPool;
-
-} // namespace Eigen
-
-#endif // EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
deleted file mode 100644
index 05ed76c..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
+++ /dev/null
@@ -1,210 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
-#define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
-
-
-namespace Eigen {
-
-// RunQueue is a fixed-size, partially non-blocking deque or Work items.
-// Operations on front of the queue must be done by a single thread (owner),
-// operations on back of the queue can be done by multiple threads concurrently.
-//
-// Algorithm outline:
-// All remote threads operating on the queue back are serialized by a mutex.
-// This ensures that at most two threads access state: owner and one remote
-// thread (Size aside). The algorithm ensures that the occupied region of the
-// underlying array is logically continuous (can wraparound, but no stray
-// occupied elements). Owner operates on one end of this region, remote thread
-// operates on the other end. Synchronization between these threads
-// (potential consumption of the last element and take up of the last empty
-// element) happens by means of state variable in each element. States are:
-// empty, busy (in process of insertion of removal) and ready. Threads claim
-// elements (empty->busy and ready->busy transitions) by means of a CAS
-// operation. The finishing transition (busy->empty and busy->ready) are done
-// with plain store as the element is exclusively owned by the current thread.
-//
-// Note: we could permit only pointers as elements, then we would not need
-// separate state variable as null/non-null pointer value would serve as state,
-// but that would require malloc/free per operation for large, complex values
-// (and this is designed to store std::function<()>).
-template <typename Work, unsigned kSize>
-class RunQueue {
- public:
- RunQueue() : front_(0), back_(0) {
- // require power-of-two for fast masking
- eigen_assert((kSize & (kSize - 1)) == 0);
- eigen_assert(kSize > 2); // why would you do this?
- eigen_assert(kSize <= (64 << 10)); // leave enough space for counter
- for (unsigned i = 0; i < kSize; i++)
- array_[i].state.store(kEmpty, std::memory_order_relaxed);
- }
-
- ~RunQueue() { eigen_assert(Size() == 0); }
-
- // PushFront inserts w at the beginning of the queue.
- // If queue is full returns w, otherwise returns default-constructed Work.
- Work PushFront(Work w) {
- unsigned front = front_.load(std::memory_order_relaxed);
- Elem* e = &array_[front & kMask];
- uint8_t s = e->state.load(std::memory_order_relaxed);
- if (s != kEmpty ||
- !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire))
- return w;
- front_.store(front + 1 + (kSize << 1), std::memory_order_relaxed);
- e->w = std::move(w);
- e->state.store(kReady, std::memory_order_release);
- return Work();
- }
-
- // PopFront removes and returns the first element in the queue.
- // If the queue was empty returns default-constructed Work.
- Work PopFront() {
- unsigned front = front_.load(std::memory_order_relaxed);
- Elem* e = &array_[(front - 1) & kMask];
- uint8_t s = e->state.load(std::memory_order_relaxed);
- if (s != kReady ||
- !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire))
- return Work();
- Work w = std::move(e->w);
- e->state.store(kEmpty, std::memory_order_release);
- front = ((front - 1) & kMask2) | (front & ~kMask2);
- front_.store(front, std::memory_order_relaxed);
- return w;
- }
-
- // PushBack adds w at the end of the queue.
- // If queue is full returns w, otherwise returns default-constructed Work.
- Work PushBack(Work w) {
- std::unique_lock<std::mutex> lock(mutex_);
- unsigned back = back_.load(std::memory_order_relaxed);
- Elem* e = &array_[(back - 1) & kMask];
- uint8_t s = e->state.load(std::memory_order_relaxed);
- if (s != kEmpty ||
- !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire))
- return w;
- back = ((back - 1) & kMask2) | (back & ~kMask2);
- back_.store(back, std::memory_order_relaxed);
- e->w = std::move(w);
- e->state.store(kReady, std::memory_order_release);
- return Work();
- }
-
- // PopBack removes and returns the last elements in the queue.
- // Can fail spuriously.
- Work PopBack() {
- if (Empty()) return Work();
- std::unique_lock<std::mutex> lock(mutex_, std::try_to_lock);
- if (!lock) return Work();
- unsigned back = back_.load(std::memory_order_relaxed);
- Elem* e = &array_[back & kMask];
- uint8_t s = e->state.load(std::memory_order_relaxed);
- if (s != kReady ||
- !e->state.compare_exchange_strong(s, kBusy, std::memory_order_acquire))
- return Work();
- Work w = std::move(e->w);
- e->state.store(kEmpty, std::memory_order_release);
- back_.store(back + 1 + (kSize << 1), std::memory_order_relaxed);
- return w;
- }
-
- // PopBackHalf removes and returns half last elements in the queue.
- // Returns number of elements removed. But can also fail spuriously.
- unsigned PopBackHalf(std::vector<Work>* result) {
- if (Empty()) return 0;
- std::unique_lock<std::mutex> lock(mutex_, std::try_to_lock);
- if (!lock) return 0;
- unsigned back = back_.load(std::memory_order_relaxed);
- unsigned size = Size();
- unsigned mid = back;
- if (size > 1) mid = back + (size - 1) / 2;
- unsigned n = 0;
- unsigned start = 0;
- for (; static_cast<int>(mid - back) >= 0; mid--) {
- Elem* e = &array_[mid & kMask];
- uint8_t s = e->state.load(std::memory_order_relaxed);
- if (n == 0) {
- if (s != kReady ||
- !e->state.compare_exchange_strong(s, kBusy,
- std::memory_order_acquire))
- continue;
- start = mid;
- } else {
- // Note: no need to store temporal kBusy, we exclusively own these
- // elements.
- eigen_assert(s == kReady);
- }
- result->push_back(std::move(e->w));
- e->state.store(kEmpty, std::memory_order_release);
- n++;
- }
- if (n != 0)
- back_.store(start + 1 + (kSize << 1), std::memory_order_relaxed);
- return n;
- }
-
- // Size returns current queue size.
- // Can be called by any thread at any time.
- unsigned Size() const {
- // Emptiness plays critical role in thread pool blocking. So we go to great
- // effort to not produce false positives (claim non-empty queue as empty).
- for (;;) {
- // Capture a consistent snapshot of front/tail.
- unsigned front = front_.load(std::memory_order_acquire);
- unsigned back = back_.load(std::memory_order_acquire);
- unsigned front1 = front_.load(std::memory_order_relaxed);
- if (front != front1) continue;
- int size = (front & kMask2) - (back & kMask2);
- // Fix overflow.
- if (size < 0) size += 2 * kSize;
- // Order of modification in push/pop is crafted to make the queue look
- // larger than it is during concurrent modifications. E.g. pop can
- // decrement size before the corresponding push has incremented it.
- // So the computed size can be up to kSize + 1, fix it.
- if (size > static_cast<int>(kSize)) size = kSize;
- return size;
- }
- }
-
- // Empty tests whether container is empty.
- // Can be called by any thread at any time.
- bool Empty() const { return Size() == 0; }
-
- private:
- static const unsigned kMask = kSize - 1;
- static const unsigned kMask2 = (kSize << 1) - 1;
- struct Elem {
- std::atomic<uint8_t> state;
- Work w;
- };
- enum {
- kEmpty,
- kBusy,
- kReady,
- };
- std::mutex mutex_;
- // Low log(kSize) + 1 bits in front_ and back_ contain rolling index of
- // front/back, repsectively. The remaining bits contain modification counters
- // that are incremented on Push operations. This allows us to (1) distinguish
- // between empty and full conditions (if we would use log(kSize) bits for
- // position, these conditions would be indistinguishable); (2) obtain
- // consistent snapshot of front_/back_ for Size operation using the
- // modification counters.
- std::atomic<unsigned> front_;
- std::atomic<unsigned> back_;
- Elem array_[kSize];
-
- RunQueue(const RunQueue&) = delete;
- void operator=(const RunQueue&) = delete;
-};
-
-} // namespace Eigen
-
-#endif // EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h
deleted file mode 100644
index e75d0f4..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h
+++ /dev/null
@@ -1,154 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_THREADPOOL_SIMPLE_THREAD_POOL_H
-#define EIGEN_CXX11_THREADPOOL_SIMPLE_THREAD_POOL_H
-
-namespace Eigen {
-
-// The implementation of the ThreadPool type ensures that the Schedule method
-// runs the functions it is provided in FIFO order when the scheduling is done
-// by a single thread.
-// Environment provides a way to create threads and also allows to intercept
-// task submission and execution.
-template <typename Environment>
-class SimpleThreadPoolTempl : public ThreadPoolInterface {
- public:
- // Construct a pool that contains "num_threads" threads.
- explicit SimpleThreadPoolTempl(int num_threads, Environment env = Environment())
- : env_(env), threads_(num_threads), waiters_(num_threads) {
- for (int i = 0; i < num_threads; i++) {
- threads_.push_back(env.CreateThread([this, i]() { WorkerLoop(i); }));
- }
- }
-
- // Wait until all scheduled work has finished and then destroy the
- // set of threads.
- ~SimpleThreadPoolTempl() {
- {
- // Wait for all work to get done.
- std::unique_lock<std::mutex> l(mu_);
- while (!pending_.empty()) {
- empty_.wait(l);
- }
- exiting_ = true;
-
- // Wakeup all waiters.
- for (auto w : waiters_) {
- w->ready = true;
- w->task.f = nullptr;
- w->cv.notify_one();
- }
- }
-
- // Wait for threads to finish.
- for (auto t : threads_) {
- delete t;
- }
- }
-
- // Schedule fn() for execution in the pool of threads. The functions are
- // executed in the order in which they are scheduled.
- void Schedule(std::function<void()> fn) final {
- Task t = env_.CreateTask(std::move(fn));
- std::unique_lock<std::mutex> l(mu_);
- if (waiters_.empty()) {
- pending_.push_back(std::move(t));
- } else {
- Waiter* w = waiters_.back();
- waiters_.pop_back();
- w->ready = true;
- w->task = std::move(t);
- w->cv.notify_one();
- }
- }
-
- int NumThreads() const final {
- return static_cast<int>(threads_.size());
- }
-
- int CurrentThreadId() const final {
- const PerThread* pt = this->GetPerThread();
- if (pt->pool == this) {
- return pt->thread_id;
- } else {
- return -1;
- }
- }
-
- protected:
- void WorkerLoop(int thread_id) {
- std::unique_lock<std::mutex> l(mu_);
- PerThread* pt = GetPerThread();
- pt->pool = this;
- pt->thread_id = thread_id;
- Waiter w;
- Task t;
- while (!exiting_) {
- if (pending_.empty()) {
- // Wait for work to be assigned to me
- w.ready = false;
- waiters_.push_back(&w);
- while (!w.ready) {
- w.cv.wait(l);
- }
- t = w.task;
- w.task.f = nullptr;
- } else {
- // Pick up pending work
- t = std::move(pending_.front());
- pending_.pop_front();
- if (pending_.empty()) {
- empty_.notify_all();
- }
- }
- if (t.f) {
- mu_.unlock();
- env_.ExecuteTask(t);
- t.f = nullptr;
- mu_.lock();
- }
- }
- }
-
- private:
- typedef typename Environment::Task Task;
- typedef typename Environment::EnvThread Thread;
-
- struct Waiter {
- std::condition_variable cv;
- Task task;
- bool ready;
- };
-
- struct PerThread {
- constexpr PerThread() : pool(NULL), thread_id(-1) { }
- SimpleThreadPoolTempl* pool; // Parent pool, or null for normal threads.
- int thread_id; // Worker thread index in pool.
- };
-
- Environment env_;
- std::mutex mu_;
- MaxSizeVector<Thread*> threads_; // All threads
- MaxSizeVector<Waiter*> waiters_; // Stack of waiting threads.
- std::deque<Task> pending_; // Queue of pending work
- std::condition_variable empty_; // Signaled on pending_.empty()
- bool exiting_ = false;
-
- PerThread* GetPerThread() const {
- EIGEN_THREAD_LOCAL PerThread per_thread;
- return &per_thread;
- }
-};
-
-typedef SimpleThreadPoolTempl<StlThreadEnvironment> SimpleThreadPool;
-
-} // namespace Eigen
-
-#endif // EIGEN_CXX11_THREADPOOL_SIMPLE_THREAD_POOL_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h
deleted file mode 100644
index 399f95c..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H
-#define EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H
-
-namespace Eigen {
-
-struct StlThreadEnvironment {
- struct Task {
- std::function<void()> f;
- };
-
- // EnvThread constructor must start the thread,
- // destructor must join the thread.
- class EnvThread {
- public:
- EnvThread(std::function<void()> f) : thr_(std::move(f)) {}
- ~EnvThread() { thr_.join(); }
-
- private:
- std::thread thr_;
- };
-
- EnvThread* CreateThread(std::function<void()> f) { return new EnvThread(std::move(f)); }
- Task CreateTask(std::function<void()> f) { return Task{std::move(f)}; }
- void ExecuteTask(const Task& t) { t.f(); }
-};
-
-} // namespace Eigen
-
-#endif // EIGEN_CXX11_THREADPOOL_THREAD_ENVIRONMENT_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
deleted file mode 100644
index cfa2217..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H
-#define EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H
-
-// Try to come up with a portable implementation of thread local variables
-#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7)
-#define EIGEN_THREAD_LOCAL static __thread
-#elif EIGEN_COMP_CLANG
-#define EIGEN_THREAD_LOCAL static __thread
-#else
-#define EIGEN_THREAD_LOCAL static thread_local
-#endif
-
-#endif // EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h
deleted file mode 100644
index a65ee97..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H
-#define EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H
-
-namespace Eigen {
-
-// This defines an interface that ThreadPoolDevice can take to use
-// custom thread pools underneath.
-class ThreadPoolInterface {
- public:
- virtual void Schedule(std::function<void()> fn) = 0;
-
- // Returns the number of threads in the pool.
- virtual int NumThreads() const = 0;
-
- // Returns a logical thread index between 0 and NumThreads() - 1 if called
- // from one of the threads in the pool. Returns -1 otherwise.
- virtual int CurrentThreadId() const = 0;
-
- virtual ~ThreadPoolInterface() {}
-};
-
-} // namespace Eigen
-
-#endif // EIGEN_CXX11_THREADPOOL_THREAD_POOL_INTERFACE_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h b/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h
deleted file mode 100644
index a859c7b..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H
-#define EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H
-
-// Try to come up with a portable way to yield
-#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7)
-#define EIGEN_THREAD_YIELD() sched_yield()
-#else
-#define EIGEN_THREAD_YIELD() std::this_thread::yield()
-#endif
-
-#endif // EIGEN_CXX11_THREADPOOL_THREAD_YIELD_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h b/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h
deleted file mode 100644
index ec27edd..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h
+++ /dev/null
@@ -1,542 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11META_H
-#define EIGEN_CXX11META_H
-
-#include <vector>
-#include "EmulateArray.h"
-
-// Emulate the cxx11 functionality that we need if the compiler doesn't support it.
-// Visual studio 2015 doesn't advertise itself as cxx11 compliant, although it
-// supports enough of the standard for our needs
-#if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900
-
-#include "CXX11Workarounds.h"
-
-namespace Eigen {
-
-namespace internal {
-
-/** \internal
- * \file CXX11/util/CXX11Meta.h
- * This file contains generic metaprogramming classes which are not specifically related to Eigen.
- * This file expands upon Core/util/Meta.h and adds support for C++11 specific features.
- */
-
-template<typename... tt>
-struct type_list { constexpr static int count = sizeof...(tt); };
-
-template<typename t, typename... tt>
-struct type_list<t, tt...> { constexpr static int count = sizeof...(tt) + 1; typedef t first_type; };
-
-template<typename T, T... nn>
-struct numeric_list { constexpr static std::size_t count = sizeof...(nn); };
-
-template<typename T, T n, T... nn>
-struct numeric_list<T, n, nn...> { constexpr static std::size_t count = sizeof...(nn) + 1; constexpr static T first_value = n; };
-
-/* numeric list constructors
- *
- * equivalencies:
- * constructor result
- * typename gen_numeric_list<int, 5>::type numeric_list<int, 0,1,2,3,4>
- * typename gen_numeric_list_reversed<int, 5>::type numeric_list<int, 4,3,2,1,0>
- * typename gen_numeric_list_swapped_pair<int, 5,1,2>::type numeric_list<int, 0,2,1,3,4>
- * typename gen_numeric_list_repeated<int, 0, 5>::type numeric_list<int, 0,0,0,0,0>
- */
-
-template<typename T, std::size_t n, T start = 0, T... ii> struct gen_numeric_list : gen_numeric_list<T, n-1, start, start + n-1, ii...> {};
-template<typename T, T start, T... ii> struct gen_numeric_list<T, 0, start, ii...> { typedef numeric_list<T, ii...> type; };
-
-template<typename T, std::size_t n, T start = 0, T... ii> struct gen_numeric_list_reversed : gen_numeric_list_reversed<T, n-1, start, ii..., start + n-1> {};
-template<typename T, T start, T... ii> struct gen_numeric_list_reversed<T, 0, start, ii...> { typedef numeric_list<T, ii...> type; };
-
-template<typename T, std::size_t n, T a, T b, T start = 0, T... ii> struct gen_numeric_list_swapped_pair : gen_numeric_list_swapped_pair<T, n-1, a, b, start, (start + n-1) == a ? b : ((start + n-1) == b ? a : (start + n-1)), ii...> {};
-template<typename T, T a, T b, T start, T... ii> struct gen_numeric_list_swapped_pair<T, 0, a, b, start, ii...> { typedef numeric_list<T, ii...> type; };
-
-template<typename T, std::size_t n, T V, T... nn> struct gen_numeric_list_repeated : gen_numeric_list_repeated<T, n-1, V, V, nn...> {};
-template<typename T, T V, T... nn> struct gen_numeric_list_repeated<T, 0, V, nn...> { typedef numeric_list<T, nn...> type; };
-
-/* list manipulation: concatenate */
-
-template<class a, class b> struct concat;
-
-template<typename... as, typename... bs> struct concat<type_list<as...>, type_list<bs...>> { typedef type_list<as..., bs...> type; };
-template<typename T, T... as, T... bs> struct concat<numeric_list<T, as...>, numeric_list<T, bs...> > { typedef numeric_list<T, as..., bs...> type; };
-
-template<typename... p> struct mconcat;
-template<typename a> struct mconcat<a> { typedef a type; };
-template<typename a, typename b> struct mconcat<a, b> : concat<a, b> {};
-template<typename a, typename b, typename... cs> struct mconcat<a, b, cs...> : concat<a, typename mconcat<b, cs...>::type> {};
-
-/* list manipulation: extract slices */
-
-template<int n, typename x> struct take;
-template<int n, typename a, typename... as> struct take<n, type_list<a, as...>> : concat<type_list<a>, typename take<n-1, type_list<as...>>::type> {};
-template<int n> struct take<n, type_list<>> { typedef type_list<> type; };
-template<typename a, typename... as> struct take<0, type_list<a, as...>> { typedef type_list<> type; };
-template<> struct take<0, type_list<>> { typedef type_list<> type; };
-
-template<typename T, int n, T a, T... as> struct take<n, numeric_list<T, a, as...>> : concat<numeric_list<T, a>, typename take<n-1, numeric_list<T, as...>>::type> {};
-template<typename T, int n> struct take<n, numeric_list<T>> { typedef numeric_list<T> type; };
-template<typename T, T a, T... as> struct take<0, numeric_list<T, a, as...>> { typedef numeric_list<T> type; };
-template<typename T> struct take<0, numeric_list<T>> { typedef numeric_list<T> type; };
-
-template<typename T, int n, T... ii> struct h_skip_helper_numeric;
-template<typename T, int n, T i, T... ii> struct h_skip_helper_numeric<T, n, i, ii...> : h_skip_helper_numeric<T, n-1, ii...> {};
-template<typename T, T i, T... ii> struct h_skip_helper_numeric<T, 0, i, ii...> { typedef numeric_list<T, i, ii...> type; };
-template<typename T, int n> struct h_skip_helper_numeric<T, n> { typedef numeric_list<T> type; };
-template<typename T> struct h_skip_helper_numeric<T, 0> { typedef numeric_list<T> type; };
-
-template<int n, typename... tt> struct h_skip_helper_type;
-template<int n, typename t, typename... tt> struct h_skip_helper_type<n, t, tt...> : h_skip_helper_type<n-1, tt...> {};
-template<typename t, typename... tt> struct h_skip_helper_type<0, t, tt...> { typedef type_list<t, tt...> type; };
-template<int n> struct h_skip_helper_type<n> { typedef type_list<> type; };
-template<> struct h_skip_helper_type<0> { typedef type_list<> type; };
-
-template<int n>
-struct h_skip {
- template<typename T, T... ii>
- constexpr static inline typename h_skip_helper_numeric<T, n, ii...>::type helper(numeric_list<T, ii...>) { return typename h_skip_helper_numeric<T, n, ii...>::type(); }
- template<typename... tt>
- constexpr static inline typename h_skip_helper_type<n, tt...>::type helper(type_list<tt...>) { return typename h_skip_helper_type<n, tt...>::type(); }
-};
-
-template<int n, typename a> struct skip { typedef decltype(h_skip<n>::helper(a())) type; };
-
-template<int start, int count, typename a> struct slice : take<count, typename skip<start, a>::type> {};
-
-/* list manipulation: retrieve single element from list */
-
-template<int n, typename x> struct get;
-
-template<int n, typename a, typename... as> struct get<n, type_list<a, as...>> : get<n-1, type_list<as...>> {};
-template<typename a, typename... as> struct get<0, type_list<a, as...>> { typedef a type; };
-
-template<typename T, int n, T a, T... as> struct get<n, numeric_list<T, a, as...>> : get<n-1, numeric_list<T, as...>> {};
-template<typename T, T a, T... as> struct get<0, numeric_list<T, a, as...>> { constexpr static T value = a; };
-
-/* always get type, regardless of dummy; good for parameter pack expansion */
-
-template<typename T, T dummy, typename t> struct id_numeric { typedef t type; };
-template<typename dummy, typename t> struct id_type { typedef t type; };
-
-/* equality checking, flagged version */
-
-template<typename a, typename b> struct is_same_gf : is_same<a, b> { constexpr static int global_flags = 0; };
-
-/* apply_op to list */
-
-template<
- bool from_left, // false
- template<typename, typename> class op,
- typename additional_param,
- typename... values
->
-struct h_apply_op_helper { typedef type_list<typename op<values, additional_param>::type...> type; };
-template<
- template<typename, typename> class op,
- typename additional_param,
- typename... values
->
-struct h_apply_op_helper<true, op, additional_param, values...> { typedef type_list<typename op<additional_param, values>::type...> type; };
-
-template<
- bool from_left,
- template<typename, typename> class op,
- typename additional_param
->
-struct h_apply_op
-{
- template<typename... values>
- constexpr static typename h_apply_op_helper<from_left, op, additional_param, values...>::type helper(type_list<values...>)
- { return typename h_apply_op_helper<from_left, op, additional_param, values...>::type(); }
-};
-
-template<
- template<typename, typename> class op,
- typename additional_param,
- typename a
->
-struct apply_op_from_left { typedef decltype(h_apply_op<true, op, additional_param>::helper(a())) type; };
-
-template<
- template<typename, typename> class op,
- typename additional_param,
- typename a
->
-struct apply_op_from_right { typedef decltype(h_apply_op<false, op, additional_param>::helper(a())) type; };
-
-/* see if an element is in a list */
-
-template<
- template<typename, typename> class test,
- typename check_against,
- typename h_list,
- bool last_check_positive = false
->
-struct contained_in_list;
-
-template<
- template<typename, typename> class test,
- typename check_against,
- typename h_list
->
-struct contained_in_list<test, check_against, h_list, true>
-{
- constexpr static bool value = true;
-};
-
-template<
- template<typename, typename> class test,
- typename check_against,
- typename a,
- typename... as
->
-struct contained_in_list<test, check_against, type_list<a, as...>, false> : contained_in_list<test, check_against, type_list<as...>, test<check_against, a>::value> {};
-
-template<
- template<typename, typename> class test,
- typename check_against
- EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty)
->
-struct contained_in_list<test, check_against, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, false> { constexpr static bool value = false; };
-
-/* see if an element is in a list and check for global flags */
-
-template<
- template<typename, typename> class test,
- typename check_against,
- typename h_list,
- int default_flags = 0,
- bool last_check_positive = false,
- int last_check_flags = default_flags
->
-struct contained_in_list_gf;
-
-template<
- template<typename, typename> class test,
- typename check_against,
- typename h_list,
- int default_flags,
- int last_check_flags
->
-struct contained_in_list_gf<test, check_against, h_list, default_flags, true, last_check_flags>
-{
- constexpr static bool value = true;
- constexpr static int global_flags = last_check_flags;
-};
-
-template<
- template<typename, typename> class test,
- typename check_against,
- typename a,
- typename... as,
- int default_flags,
- int last_check_flags
->
-struct contained_in_list_gf<test, check_against, type_list<a, as...>, default_flags, false, last_check_flags> : contained_in_list_gf<test, check_against, type_list<as...>, default_flags, test<check_against, a>::value, test<check_against, a>::global_flags> {};
-
-template<
- template<typename, typename> class test,
- typename check_against
- EIGEN_TPL_PP_SPEC_HACK_DEFC(typename, empty),
- int default_flags,
- int last_check_flags
->
-struct contained_in_list_gf<test, check_against, type_list<EIGEN_TPL_PP_SPEC_HACK_USE(empty)>, default_flags, false, last_check_flags> { constexpr static bool value = false; constexpr static int global_flags = default_flags; };
-
-/* generic reductions */
-
-template<
- typename Reducer,
- typename... Ts
-> struct reduce;
-
-template<
- typename Reducer
-> struct reduce<Reducer>
-{
- constexpr static inline int run() { return Reducer::Identity; }
-};
-
-template<
- typename Reducer,
- typename A
-> struct reduce<Reducer, A>
-{
- constexpr static inline A run(A a) { return a; }
-};
-
-template<
- typename Reducer,
- typename A,
- typename... Ts
-> struct reduce<Reducer, A, Ts...>
-{
- constexpr static inline auto run(A a, Ts... ts) -> decltype(Reducer::run(a, reduce<Reducer, Ts...>::run(ts...))) {
- return Reducer::run(a, reduce<Reducer, Ts...>::run(ts...));
- }
-};
-
-/* generic binary operations */
-
-struct sum_op {
- template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; }
- static constexpr int Identity = 0;
-};
-struct product_op {
- template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; }
- static constexpr int Identity = 1;
-};
-
-struct logical_and_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a && b) { return a && b; } };
-struct logical_or_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a || b) { return a || b; } };
-
-struct equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a == b) { return a == b; } };
-struct not_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a != b) { return a != b; } };
-struct lesser_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a < b) { return a < b; } };
-struct lesser_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a <= b) { return a <= b; } };
-struct greater_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a > b) { return a > b; } };
-struct greater_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a >= b) { return a >= b; } };
-
-/* generic unary operations */
-
-struct not_op { template<typename A> constexpr static inline auto run(A a) -> decltype(!a) { return !a; } };
-struct negation_op { template<typename A> constexpr static inline auto run(A a) -> decltype(-a) { return -a; } };
-struct greater_equal_zero_op { template<typename A> constexpr static inline auto run(A a) -> decltype(a >= 0) { return a >= 0; } };
-
-
-/* reductions for lists */
-
-// using auto -> return value spec makes ICC 13.0 and 13.1 crash here, so we have to hack it
-// together in front... (13.0 doesn't work with array_prod/array_reduce/... anyway, but 13.1
-// does...
-template<typename... Ts>
-constexpr inline decltype(reduce<product_op, Ts...>::run((*((Ts*)0))...)) arg_prod(Ts... ts)
-{
- return reduce<product_op, Ts...>::run(ts...);
-}
-
-template<typename... Ts>
-constexpr inline decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts... ts)
-{
- return reduce<sum_op, Ts...>::run(ts...);
-}
-
-/* reverse arrays */
-
-template<typename Array, int... n>
-constexpr inline Array h_array_reverse(Array arr, numeric_list<int, n...>)
-{
- return {{array_get<sizeof...(n) - n - 1>(arr)...}};
-}
-
-template<typename T, std::size_t N>
-constexpr inline array<T, N> array_reverse(array<T, N> arr)
-{
- return h_array_reverse(arr, typename gen_numeric_list<int, N>::type());
-}
-
-
-/* generic array reductions */
-
-// can't reuse standard reduce() interface above because Intel's Compiler
-// *really* doesn't like it, so we just reimplement the stuff
-// (start from N - 1 and work down to 0 because specialization for
-// n == N - 1 also doesn't work in Intel's compiler, so it goes into
-// an infinite loop)
-template<typename Reducer, typename T, std::size_t N, std::size_t n = N - 1>
-struct h_array_reduce {
- EIGEN_DEVICE_FUNC constexpr static inline auto run(array<T, N> arr, T identity) -> decltype(Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr)))
- {
- return Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr));
- }
-};
-
-template<typename Reducer, typename T, std::size_t N>
-struct h_array_reduce<Reducer, T, N, 0>
-{
- EIGEN_DEVICE_FUNC constexpr static inline T run(const array<T, N>& arr, T)
- {
- return array_get<0>(arr);
- }
-};
-
-template<typename Reducer, typename T>
-struct h_array_reduce<Reducer, T, 0>
-{
- EIGEN_DEVICE_FUNC constexpr static inline T run(const array<T, 0>&, T identity)
- {
- return identity;
- }
-};
-
-template<typename Reducer, typename T, std::size_t N>
-EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array<T, N>& arr, T identity) -> decltype(h_array_reduce<Reducer, T, N>::run(arr, identity))
-{
- return h_array_reduce<Reducer, T, N>::run(arr, identity);
-}
-
-/* standard array reductions */
-
-template<typename T, std::size_t N>
-EIGEN_DEVICE_FUNC constexpr inline auto array_sum(const array<T, N>& arr) -> decltype(array_reduce<sum_op, T, N>(arr, static_cast<T>(0)))
-{
- return array_reduce<sum_op, T, N>(arr, static_cast<T>(0));
-}
-
-template<typename T, std::size_t N>
-EIGEN_DEVICE_FUNC constexpr inline auto array_prod(const array<T, N>& arr) -> decltype(array_reduce<product_op, T, N>(arr, static_cast<T>(1)))
-{
- return array_reduce<product_op, T, N>(arr, static_cast<T>(1));
-}
-
-template<typename t>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) {
- eigen_assert(a.size() > 0);
- t prod = 1;
- for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; }
- return prod;
-}
-
-/* zip an array */
-
-template<typename Op, typename A, typename B, std::size_t N, int... n>
-constexpr inline array<decltype(Op::run(A(), B())),N> h_array_zip(array<A, N> a, array<B, N> b, numeric_list<int, n...>)
-{
- return array<decltype(Op::run(A(), B())),N>{{ Op::run(array_get<n>(a), array_get<n>(b))... }};
-}
-
-template<typename Op, typename A, typename B, std::size_t N>
-constexpr inline array<decltype(Op::run(A(), B())),N> array_zip(array<A, N> a, array<B, N> b)
-{
- return h_array_zip<Op>(a, b, typename gen_numeric_list<int, N>::type());
-}
-
-/* zip an array and reduce the result */
-
-template<typename Reducer, typename Op, typename A, typename B, std::size_t N, int... n>
-constexpr inline auto h_array_zip_and_reduce(array<A, N> a, array<B, N> b, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...))
-{
- return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...);
-}
-
-template<typename Reducer, typename Op, typename A, typename B, std::size_t N>
-constexpr inline auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decltype(h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type()))
-{
- return h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type());
-}
-
-/* apply stuff to an array */
-
-template<typename Op, typename A, std::size_t N, int... n>
-constexpr inline array<decltype(Op::run(A())),N> h_array_apply(array<A, N> a, numeric_list<int, n...>)
-{
- return array<decltype(Op::run(A())),N>{{ Op::run(array_get<n>(a))... }};
-}
-
-template<typename Op, typename A, std::size_t N>
-constexpr inline array<decltype(Op::run(A())),N> array_apply(array<A, N> a)
-{
- return h_array_apply<Op>(a, typename gen_numeric_list<int, N>::type());
-}
-
-/* apply stuff to an array and reduce */
-
-template<typename Reducer, typename Op, typename A, std::size_t N, int... n>
-constexpr inline auto h_array_apply_and_reduce(array<A, N> arr, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...))
-{
- return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...);
-}
-
-template<typename Reducer, typename Op, typename A, std::size_t N>
-constexpr inline auto array_apply_and_reduce(array<A, N> a) -> decltype(h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type()))
-{
- return h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type());
-}
-
-/* repeat a value n times (and make an array out of it
- * usage:
- * array<int, 16> = repeat<16>(42);
- */
-
-template<int n>
-struct h_repeat
-{
- template<typename t, int... ii>
- constexpr static inline array<t, n> run(t v, numeric_list<int, ii...>)
- {
- return {{ typename id_numeric<int, ii, t>::type(v)... }};
- }
-};
-
-template<int n, typename t>
-constexpr array<t, n> repeat(t v) { return h_repeat<n>::run(v, typename gen_numeric_list<int, n>::type()); }
-
-/* instantiate a class by a C-style array */
-template<class InstType, typename ArrType, std::size_t N, bool Reverse, typename... Ps>
-struct h_instantiate_by_c_array;
-
-template<class InstType, typename ArrType, std::size_t N, typename... Ps>
-struct h_instantiate_by_c_array<InstType, ArrType, N, false, Ps...>
-{
- static InstType run(ArrType* arr, Ps... args)
- {
- return h_instantiate_by_c_array<InstType, ArrType, N - 1, false, Ps..., ArrType>::run(arr + 1, args..., arr[0]);
- }
-};
-
-template<class InstType, typename ArrType, std::size_t N, typename... Ps>
-struct h_instantiate_by_c_array<InstType, ArrType, N, true, Ps...>
-{
- static InstType run(ArrType* arr, Ps... args)
- {
- return h_instantiate_by_c_array<InstType, ArrType, N - 1, false, ArrType, Ps...>::run(arr + 1, arr[0], args...);
- }
-};
-
-template<class InstType, typename ArrType, typename... Ps>
-struct h_instantiate_by_c_array<InstType, ArrType, 0, false, Ps...>
-{
- static InstType run(ArrType* arr, Ps... args)
- {
- (void)arr;
- return InstType(args...);
- }
-};
-
-template<class InstType, typename ArrType, typename... Ps>
-struct h_instantiate_by_c_array<InstType, ArrType, 0, true, Ps...>
-{
- static InstType run(ArrType* arr, Ps... args)
- {
- (void)arr;
- return InstType(args...);
- }
-};
-
-template<class InstType, typename ArrType, std::size_t N, bool Reverse = false>
-InstType instantiate_by_c_array(ArrType* arr)
-{
- return h_instantiate_by_c_array<InstType, ArrType, N, Reverse>::run(arr);
-}
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#else // Non C++11, fallback to emulation mode
-
-#include "EmulateCXX11Meta.h"
-
-#endif
-
-#endif // EIGEN_CXX11META_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h b/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h
deleted file mode 100644
index fe4d228..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h
+++ /dev/null
@@ -1,88 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11WORKAROUNDS_H
-#define EIGEN_CXX11WORKAROUNDS_H
-
-/* COMPATIBILITY CHECKS
- * (so users of compilers that are too old get some realistic error messages)
- */
-#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1310)
-#error Intel Compiler only supports required C++ features since version 13.1.
-// note that most stuff in principle works with 13.0 but when combining
-// some features, at some point 13.0 will just fail with an internal assertion
-#elif defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6))
-// G++ < 4.6 by default will continue processing the source files - even if we use #error to make
-// it error out. For this reason, we use the pragma to make sure G++ aborts at the first error
-// it sees. Unfortunately, that is still not our #error directive, but at least the output is
-// short enough the user has a chance to see that the compiler version is not sufficient for
-// the funky template mojo we use.
-#pragma GCC diagnostic error "-Wfatal-errors"
-#error GNU C++ Compiler (g++) only supports required C++ features since version 4.6.
-#endif
-
-/* Check that the compiler at least claims to support C++11. It might not be sufficient
- * because the compiler may not implement it correctly, but at least we'll know.
- * On the other hand, visual studio still doesn't claim to support C++11 although it's
- * compliant enugh for our purpose.
- */
-#if (__cplusplus <= 199711L) && (EIGEN_COMP_MSVC < 1900)
-#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
-#pragma GCC diagnostic error "-Wfatal-errors"
-#endif
-#error This library needs at least a C++11 compliant compiler. If you use g++/clang, please enable the -std=c++11 compiler flag. (-std=c++0x on older versions.)
-#endif
-
-namespace Eigen {
-
-namespace internal {
-
-/* std::get is only constexpr in C++14, not yet in C++11
- */
-
-
-template<std::size_t I, class T> constexpr inline T& array_get(std::vector<T>& a) { return a[I]; }
-template<std::size_t I, class T> constexpr inline T&& array_get(std::vector<T>&& a) { return a[I]; }
-template<std::size_t I, class T> constexpr inline T const& array_get(std::vector<T> const& a) { return a[I]; }
-
-/* Suppose you have a template of the form
- * template<typename T> struct X;
- * And you want to specialize it in such a way:
- * template<typename S1, typename... SN> struct X<Foo<S1, SN...>> { ::: };
- * template<> struct X<Foo<>> { ::: };
- * This will work in Intel's compiler 13.0, but only to some extent in g++ 4.6, since
- * g++ can only match templates called with parameter packs if the number of template
- * arguments is not a fixed size (so inside the first specialization, referencing
- * X<Foo<Sn...>> will fail in g++). On the other hand, g++ will accept the following:
- * template<typename S...> struct X<Foo<S...>> { ::: }:
- * as an additional (!) specialization, which will then only match the empty case.
- * But Intel's compiler 13.0 won't accept that, it will only accept the empty syntax,
- * so we have to create a workaround for this.
- */
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-#define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n) mt... n
-#define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n) , EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n)
-#define EIGEN_TPL_PP_SPEC_HACK_USE(n) n...
-#define EIGEN_TPL_PP_SPEC_HACK_USEC(n) , n...
-#else
-#define EIGEN_TPL_PP_SPEC_HACK_DEF(mt, n)
-#define EIGEN_TPL_PP_SPEC_HACK_DEFC(mt, n)
-#define EIGEN_TPL_PP_SPEC_HACK_USE(n)
-#define EIGEN_TPL_PP_SPEC_HACK_USEC(n)
-#endif
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11WORKAROUNDS_H
-
-/*
- * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
- */
diff --git a/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h b/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h
deleted file mode 100644
index 30d3ebc..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_EMULATE_ARRAY_H
-#define EIGEN_EMULATE_ARRAY_H
-
-
-
-// The array class is only available starting with cxx11. Emulate our own here
-// if needed. Beware, msvc still doesn't advertise itself as a c++11 compiler!
-// Moreover, CUDA doesn't support the STL containers, so we use our own instead.
-#if (__cplusplus <= 199711L && EIGEN_COMP_MSVC < 1900) || defined(__CUDACC__) || defined(EIGEN_AVOID_STL_ARRAY)
-
-namespace Eigen {
-template <typename T, size_t n> class array {
- public:
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE T& operator[] (size_t index) { return values[index]; }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE T& front() { return values[0]; }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const T& front() const { return values[0]; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE T& back() { return values[n-1]; }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const T& back() const { return values[n-1]; }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- static std::size_t size() { return n; }
-
- T values[n];
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array() { }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array(const T& v) {
- EIGEN_STATIC_ASSERT(n==1, YOU_MADE_A_PROGRAMMING_MISTAKE)
- values[0] = v;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array(const T& v1, const T& v2) {
- EIGEN_STATIC_ASSERT(n==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
- values[0] = v1;
- values[1] = v2;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3) {
- EIGEN_STATIC_ASSERT(n==3, YOU_MADE_A_PROGRAMMING_MISTAKE)
- values[0] = v1;
- values[1] = v2;
- values[2] = v3;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3,
- const T& v4) {
- EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE)
- values[0] = v1;
- values[1] = v2;
- values[2] = v3;
- values[3] = v4;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4,
- const T& v5) {
- EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE)
- values[0] = v1;
- values[1] = v2;
- values[2] = v3;
- values[3] = v4;
- values[4] = v5;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4,
- const T& v5, const T& v6) {
- EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE)
- values[0] = v1;
- values[1] = v2;
- values[2] = v3;
- values[3] = v4;
- values[4] = v5;
- values[5] = v6;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4,
- const T& v5, const T& v6, const T& v7) {
- EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE)
- values[0] = v1;
- values[1] = v2;
- values[2] = v3;
- values[3] = v4;
- values[4] = v5;
- values[5] = v6;
- values[6] = v7;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array(
- const T& v1, const T& v2, const T& v3, const T& v4,
- const T& v5, const T& v6, const T& v7, const T& v8) {
- EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE)
- values[0] = v1;
- values[1] = v2;
- values[2] = v3;
- values[3] = v4;
- values[4] = v5;
- values[5] = v6;
- values[6] = v7;
- values[7] = v8;
- }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array(std::initializer_list<T> l) {
- eigen_assert(l.size() == n);
- internal::smart_copy(l.begin(), l.end(), values);
- }
-#endif
-};
-
-
-// Specialize array for zero size
-template <typename T> class array<T, 0> {
- public:
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE T& operator[] (size_t) {
- eigen_assert(false && "Can't index a zero size array");
- return dummy;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const T& operator[] (size_t) const {
- eigen_assert(false && "Can't index a zero size array");
- return dummy;
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE T& front() {
- eigen_assert(false && "Can't index a zero size array");
- return dummy;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const T& front() const {
- eigen_assert(false && "Can't index a zero size array");
- return dummy;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE T& back() {
- eigen_assert(false && "Can't index a zero size array");
- return dummy;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const T& back() const {
- eigen_assert(false && "Can't index a zero size array");
- return dummy;
- }
-
- static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE array() : dummy() { }
-
-#if EIGEN_HAS_VARIADIC_TEMPLATES
- EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) : dummy() {
- eigen_assert(l.size() == 0);
- }
-#endif
-
- private:
- T dummy;
-};
-
-// Comparison operator
-// Todo: implement !=, <, <=, >, and >=
-template<class T, std::size_t N>
-EIGEN_DEVICE_FUNC bool operator==(const array<T,N>& lhs, const array<T,N>& rhs) {
- for (std::size_t i = 0; i < N; ++i) {
- if (lhs[i] != rhs[i]) {
- return false;
- }
- }
- return true;
-}
-
-
-namespace internal {
-template<std::size_t I, class T, std::size_t N>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array<T,N>& a) {
- return a[I];
-}
-template<std::size_t I, class T, std::size_t N>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array<T,N>& a) {
- return a[I];
-}
-
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<array<T,N> > {
- static const size_t value = N;
-};
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<array<T,N>& > {
- static const size_t value = N;
-};
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<const array<T,N> > {
- static const size_t value = N;
-};
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<const array<T,N>& > {
- static const size_t value = N;
-};
-
-} // end namespace internal
-} // end namespace Eigen
-
-#else
-
-// The compiler supports c++11, and we're not targetting cuda: use std::array as Eigen::array
-#include <array>
-namespace Eigen {
-
-template <typename T, std::size_t N> using array = std::array<T, N>;
-
-namespace internal {
-/* std::get is only constexpr in C++14, not yet in C++11
- * - libstdc++ from version 4.7 onwards has it nevertheless,
- * so use that
- * - libstdc++ older versions: use _M_instance directly
- * - libc++ all versions so far: use __elems_ directly
- * - all other libs: use std::get to be portable, but
- * this may not be constexpr
- */
-#if defined(__GLIBCXX__) && __GLIBCXX__ < 20120322
-#define STD_GET_ARR_HACK a._M_instance[I]
-#elif defined(_LIBCPP_VERSION)
-#define STD_GET_ARR_HACK a.__elems_[I]
-#else
-#define STD_GET_ARR_HACK std::template get<I, T, N>(a)
-#endif
-
-template<std::size_t I, class T, std::size_t N> constexpr inline T& array_get(std::array<T,N>& a) { return (T&) STD_GET_ARR_HACK; }
-template<std::size_t I, class T, std::size_t N> constexpr inline T&& array_get(std::array<T,N>&& a) { return (T&&) STD_GET_ARR_HACK; }
-template<std::size_t I, class T, std::size_t N> constexpr inline T const& array_get(std::array<T,N> const& a) { return (T const&) STD_GET_ARR_HACK; }
-
-#undef STD_GET_ARR_HACK
-
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<const std::array<T,N> > {
- static const size_t value = N;
-};
-template <typename T> struct array_size;
-template<class T, std::size_t N> struct array_size<std::array<T,N> > {
- static const size_t value = N;
-};
-} // end namespace internal
-} // end namespace Eigen
-
-#endif
-
-#endif // EIGEN_EMULATE_ARRAY_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h b/eigen/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h
deleted file mode 100644
index 8a536fa..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h
+++ /dev/null
@@ -1,311 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_EMULATE_CXX11_META_H
-#define EIGEN_EMULATE_CXX11_META_H
-
-
-
-namespace Eigen {
-
-namespace internal {
-
-/** \internal
- * \file CXX11/util/EmulateCXX11Meta.h
- * This file emulates a subset of the functionality provided by CXXMeta.h for
- * compilers that don't yet support cxx11 such as nvcc.
- */
-
-struct empty_list { static const std::size_t count = 0; };
-
-template<typename T, typename Tail=empty_list> struct type_list {
- typedef T HeadType;
- typedef Tail TailType;
- static const T head;
- static const Tail tail;
- static const std::size_t count = 1 + Tail::count;
-};
-
-struct null_type { };
-
-template<typename T1 = null_type, typename T2 = null_type, typename T3 = null_type,
- typename T4 = null_type, typename T5 = null_type, typename T6 = null_type,
- typename T7 = null_type, typename T8 = null_type>
-struct make_type_list {
- typedef typename make_type_list<T2, T3, T4, T5, T6, T7, T8>::type tailresult;
-
- typedef type_list<T1, tailresult> type;
-};
-
-template<> struct make_type_list<> {
- typedef empty_list type;
-};
-
-
-template <std::size_t index, class TList> struct get_type;
-
-template <class Head, class Tail>
-struct get_type<0, type_list<Head, Tail> >
-{
- typedef Head type;
-};
-
-template <std::size_t i, class Head, class Tail>
-struct get_type<i, type_list<Head, Tail> >
-{
- typedef typename get_type<i-1, Tail>::type type;
-};
-
-
-/* numeric list */
-template <typename T, T n>
-struct type2val {
- typedef T type;
- static const T value = n;
-};
-
-
-template<typename T, size_t n, T V> struct gen_numeric_list_repeated;
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 1, V> {
- typedef typename make_type_list<type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 2, V> {
- typedef typename make_type_list<type2val<T, V>, type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 3, V> {
- typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 4, V> {
- typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 5, V> {
- typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 6, V> {
- typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>,
- type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 7, V> {
- typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>,
- type2val<T, V>, type2val<T, V>, type2val<T, V>,
- type2val<T, V> >::type type;
-};
-
-template<typename T, T V> struct gen_numeric_list_repeated<T, 8, V> {
- typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>,
- type2val<T, V>, type2val<T, V>, type2val<T, V>,
- type2val<T, V>, type2val<T, V> >::type type;
-};
-
-
-template <std::size_t index, class NList> struct get;
-
-template <std::size_t i>
-struct get<i, empty_list>
-{
- get() { eigen_assert(false && "index overflow"); }
- typedef void type;
- static const char value = '\0';
-};
-
-template <std::size_t i, class Head>
-struct get<i, type_list<Head, empty_list> >
-{
- get() { eigen_assert(false && "index overflow"); }
- typedef void type;
- static const char value = '\0';
-};
-
-template <class Head>
-struct get<0, type_list<Head, empty_list> >
-{
- typedef typename Head::type type;
- static const type value = Head::value;
-};
-
-template <class Head, class Tail>
-struct get<0, type_list<Head, Tail> >
-{
- typedef typename Head::type type;
- static const type value = Head::value;
-};
-
-template <std::size_t i, class Head, class Tail>
-struct get<i, type_list<Head, Tail> >
-{
- typedef typename Tail::HeadType::type type;
- static const type value = get<i-1, Tail>::value;
-};
-
-
-template <class NList> struct arg_prod {
- static const typename NList::HeadType::type value = get<0, NList>::value * arg_prod<typename NList::TailType>::value;
-};
-template <> struct arg_prod<empty_list> {
- static const int value = 1;
-};
-
-
-template<int n, typename t>
-array<t, n> repeat(t v) {
- array<t, n> array;
- array.fill(v);
- return array;
-}
-
-template<std::size_t I, class Head, class Tail>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(type_list<Head, Tail>&) {
- return get<I, type_list<Head, Tail> >::value;
-}
-template<std::size_t I, class Head, class Tail>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_list<Head, Tail>&) {
- return get<I, type_list<Head, Tail> >::value;
-}
-
-template <class NList>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList&) {
- return arg_prod<NList>::value;
-}
-
-template<typename t, std::size_t n>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array<t, n>& a) {
- t prod = 1;
- for (size_t i = 0; i < n; ++i) { prod *= a[i]; }
- return prod;
-}
-template<typename t>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array<t, 0>& /*a*/) {
- return 1;
-}
-
-template<typename t>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) {
- eigen_assert(a.size() > 0);
- t prod = 1;
- for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; }
- return prod;
-}
-
-
-template<std::size_t I, class T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(std::vector<T>& a) {
- return a[I];
-}
-template<std::size_t I, class T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const std::vector<T>& a) {
- return a[I];
-}
-
-struct sum_op {
- template<typename A, typename B> static inline bool run(A a, B b) { return a + b; }
-};
-struct product_op {
- template<typename A, typename B> static inline bool run(A a, B b) { return a * b; }
-};
-
-struct logical_and_op {
- template<typename A, typename B> static inline bool run(A a, B b) { return a && b; }
-};
-struct logical_or_op {
- template<typename A, typename B> static inline bool run(A a, B b) { return a || b; }
-};
-
-struct equal_op {
- template<typename A, typename B> static inline bool run(A a, B b) { return a == b; }
-};
-struct not_equal_op {
- template<typename A, typename B> static inline bool run(A a, B b) { return a != b; }
-};
-struct lesser_op {
- template<typename A, typename B> static inline bool run(A a, B b) { return a < b; }
-};
-struct lesser_equal_op {
- template<typename A, typename B> static inline bool run(A a, B b) { return a <= b; }
-};
-
-struct greater_op {
- template<typename A, typename B> static inline bool run(A a, B b) { return a > b; }
-};
-struct greater_equal_op {
- template<typename A, typename B> static inline bool run(A a, B b) { return a >= b; }
-};
-
-struct not_op {
- template<typename A> static inline bool run(A a) { return !a; }
-};
-struct negation_op {
- template<typename A> static inline bool run(A a) { return -a; }
-};
-struct greater_equal_zero_op {
- template<typename A> static inline bool run(A a) { return a >= 0; }
-};
-
-
-template<typename Reducer, typename Op, typename A, std::size_t N>
-struct ArrayApplyAndReduce {
- static inline bool run(const array<A, N>& a) {
- EIGEN_STATIC_ASSERT(N >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE);
- bool result = Reducer::run(Op::run(a[0]), Op::run(a[1]));
- for (size_t i = 2; i < N; ++i) {
- result = Reducer::run(result, Op::run(a[i]));
- }
- return result;
- }
-};
-
-template<typename Reducer, typename Op, typename A>
-struct ArrayApplyAndReduce<Reducer, Op, A, 1> {
- static inline bool run(const array<A, 1>& a) {
- return Op::run(a[0]);
- }
-};
-
-template<typename Reducer, typename Op, typename A, std::size_t N>
-inline bool array_apply_and_reduce(const array<A, N>& a) {
- return ArrayApplyAndReduce<Reducer, Op, A, N>::run(a);
-}
-
-template<typename Reducer, typename Op, typename A, typename B, std::size_t N>
-struct ArrayZipAndReduce {
- static inline bool run(const array<A, N>& a, const array<B, N>& b) {
- EIGEN_STATIC_ASSERT(N >= 2, YOU_MADE_A_PROGRAMMING_MISTAKE);
- bool result = Reducer::run(Op::run(a[0], b[0]), Op::run(a[1], b[1]));
- for (size_t i = 2; i < N; ++i) {
- result = Reducer::run(result, Op::run(a[i], b[i]));
- }
- return result;
- }
-};
-
-template<typename Reducer, typename Op, typename A, typename B>
-struct ArrayZipAndReduce<Reducer, Op, A, B, 1> {
- static inline bool run(const array<A, 1>& a, const array<B, 1>& b) {
- return Op::run(a[0], b[0]);
- }
-};
-
-template<typename Reducer, typename Op, typename A, typename B, std::size_t N>
-inline bool array_zip_and_reduce(const array<A, N>& a, const array<B, N>& b) {
- return ArrayZipAndReduce<Reducer, Op, A, B, N>::run(a, b);
-}
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-
-
-#endif // EIGEN_EMULATE_CXX11_META_H
diff --git a/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h b/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
deleted file mode 100644
index 4bc3dd1..0000000
--- a/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
+++ /dev/null
@@ -1,141 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_FIXEDSIZEVECTOR_H
-#define EIGEN_FIXEDSIZEVECTOR_H
-
-namespace Eigen {
-
-/** \class MaxSizeVector
- * \ingroup Core
- *
- * \brief The MaxSizeVector class.
- *
- * The %MaxSizeVector provides a subset of std::vector functionality.
- *
- * The goal is to provide basic std::vector operations when using
- * std::vector is not an option (e.g. on GPU or when compiling using
- * FMA/AVX, as this can cause either compilation failures or illegal
- * instruction failures).
- *
- * Beware: The constructors are not API compatible with these of
- * std::vector.
- */
-template <typename T>
-class MaxSizeVector {
- public:
- // Construct a new MaxSizeVector, reserve n elements.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- explicit MaxSizeVector(size_t n)
- : reserve_(n), size_(0),
- data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
- for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; }
- }
-
- // Construct a new MaxSizeVector, reserve and resize to n.
- // Copy the init value to all elements.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- MaxSizeVector(size_t n, const T& init)
- : reserve_(n), size_(n),
- data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
- for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- ~MaxSizeVector() {
- for (size_t i = 0; i < size_; ++i) {
- data_[i].~T();
- }
- internal::aligned_free(data_);
- }
-
- void resize(size_t n) {
- eigen_assert(n <= reserve_);
- for (size_t i = size_; i < n; ++i) {
- new (&data_[i]) T;
- }
- for (size_t i = n; i < size_; ++i) {
- data_[i].~T();
- }
- size_ = n;
- }
-
- // Append new elements (up to reserved size).
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void push_back(const T& t) {
- eigen_assert(size_ < reserve_);
- data_[size_++] = t;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const T& operator[] (size_t i) const {
- eigen_assert(i < size_);
- return data_[i];
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- T& operator[] (size_t i) {
- eigen_assert(i < size_);
- return data_[i];
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- T& back() {
- eigen_assert(size_ > 0);
- return data_[size_ - 1];
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const T& back() const {
- eigen_assert(size_ > 0);
- return data_[size_ - 1];
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void pop_back() {
- // NOTE: This does not destroy the value at the end the way
- // std::vector's version of pop_back() does. That happens when
- // the Vector is destroyed.
- eigen_assert(size_ > 0);
- size_--;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- size_t size() const { return size_; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- bool empty() const { return size_ == 0; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- T* data() { return data_; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const T* data() const { return data_; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- T* begin() { return data_; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- T* end() { return data_ + size_; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const T* begin() const { return data_; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const T* end() const { return data_ + size_; }
-
- private:
- size_t reserve_;
- size_t size_;
- T* data_;
-};
-
-} // namespace Eigen
-
-#endif // EIGEN_FIXEDSIZEVECTOR_H