Commit 2e9e1bab authored by Mccaskey, Alex's avatar Mccaskey, Alex
Browse files

updating Eigen unsupported to latest from dev



Signed-off-by: Mccaskey, Alex's avatarAlex McCaskey <mccaskeyaj@ornl.gov>
parent 8ae5ae9e
Pipeline #39760 passed with stages
in 10 minutes and 42 seconds
add_subdirectory(Eigen) add_subdirectory(Eigen)
add_subdirectory(doc EXCLUDE_FROM_ALL) add_subdirectory(doc EXCLUDE_FROM_ALL)
if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) if(BUILD_TESTING)
add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest if(EIGEN_LEAVE_TEST_IN_ALL_TARGET)
else() add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest
add_subdirectory(test EXCLUDE_FROM_ALL) else()
add_subdirectory(test EXCLUDE_FROM_ALL)
endif()
endif() endif()
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
# undef realloc # undef realloc
#endif #endif
#include <Eigen/Core> #include "../../Eigen/Core"
namespace Eigen { namespace Eigen {
......
...@@ -10,7 +10,9 @@ ...@@ -10,7 +10,9 @@
#ifndef EIGEN_ALIGNED_VECTOR3 #ifndef EIGEN_ALIGNED_VECTOR3
#define EIGEN_ALIGNED_VECTOR3 #define EIGEN_ALIGNED_VECTOR3
#include <Eigen/Geometry> #include "../../Eigen/Geometry"
#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
namespace Eigen { namespace Eigen {
...@@ -221,4 +223,6 @@ struct evaluator<AlignedVector3<Scalar> > ...@@ -221,4 +223,6 @@ struct evaluator<AlignedVector3<Scalar> >
} }
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_ALIGNED_VECTOR3 #endif // EIGEN_ALIGNED_VECTOR3
...@@ -9,9 +9,7 @@ ...@@ -9,9 +9,7 @@
#ifndef EIGEN_ARPACKSUPPORT_MODULE_H #ifndef EIGEN_ARPACKSUPPORT_MODULE_H
#define EIGEN_ARPACKSUPPORT_MODULE_H #define EIGEN_ARPACKSUPPORT_MODULE_H
#include <Eigen/Core> #include "../../Eigen/Core"
#include <Eigen/src/Core/util/DisableStupidWarnings.h>
/** \defgroup ArpackSupport_Module Arpack support module /** \defgroup ArpackSupport_Module Arpack support module
* *
...@@ -22,10 +20,12 @@ ...@@ -22,10 +20,12 @@
* \endcode * \endcode
*/ */
#include <Eigen/SparseCholesky> #include "../../Eigen/SparseCholesky"
#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
#include "src/Eigenvalues/ArpackSelfAdjointEigenSolver.h" #include "src/Eigenvalues/ArpackSelfAdjointEigenSolver.h"
#include <Eigen/src/Core/util/ReenableStupidWarnings.h> #include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_ARPACKSUPPORT_MODULE_H #endif // EIGEN_ARPACKSUPPORT_MODULE_H
/* vim: set filetype=cpp et sw=2 ts=2 ai: */ /* vim: set filetype=cpp et sw=2 ts=2 ai: */
...@@ -28,11 +28,17 @@ namespace Eigen { ...@@ -28,11 +28,17 @@ namespace Eigen {
//@{ //@{
} }
#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
#include "src/AutoDiff/AutoDiffScalar.h" #include "src/AutoDiff/AutoDiffScalar.h"
// #include "src/AutoDiff/AutoDiffVector.h" // #include "src/AutoDiff/AutoDiffVector.h"
#include "src/AutoDiff/AutoDiffJacobian.h" #include "src/AutoDiff/AutoDiffJacobian.h"
#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
namespace Eigen { namespace Eigen {
//@} //@}
} }
......
...@@ -10,9 +10,9 @@ ...@@ -10,9 +10,9 @@
#ifndef EIGEN_BVH_MODULE_H #ifndef EIGEN_BVH_MODULE_H
#define EIGEN_BVH_MODULE_H #define EIGEN_BVH_MODULE_H
#include <Eigen/Core> #include "../../Eigen/Core"
#include <Eigen/Geometry> #include "../../Eigen/Geometry"
#include <Eigen/StdVector> #include "../../Eigen/StdVector"
#include <algorithm> #include <algorithm>
#include <queue> #include <queue>
......
...@@ -19,16 +19,16 @@ ...@@ -19,16 +19,16 @@
#undef isnan #undef isnan
#undef isinf #undef isinf
#undef isfinite #undef isfinite
#include <SYCL/sycl.hpp> #include <CL/sycl.hpp>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <memory> #include <memory>
#include <utility> #include <utility>
#endif #endif
#include <Eigen/src/Core/util/DisableStupidWarnings.h>
#include "../SpecialFunctions" #include "../SpecialFunctions"
#include "../../../Eigen/src/Core/util/DisableStupidWarnings.h"
#include "src/util/CXX11Meta.h" #include "src/util/CXX11Meta.h"
#include "src/util/MaxSizeVector.h" #include "src/util/MaxSizeVector.h"
...@@ -40,6 +40,8 @@ ...@@ -40,6 +40,8 @@
* \code * \code
* #include <Eigen/CXX11/Tensor> * #include <Eigen/CXX11/Tensor>
* \endcode * \endcode
*
* Much of the documentation can be found \ref eigen_tensors "here".
*/ */
#include <cmath> #include <cmath>
...@@ -80,12 +82,16 @@ typedef unsigned __int64 uint64_t; ...@@ -80,12 +82,16 @@ typedef unsigned __int64 uint64_t;
#endif #endif
#ifdef EIGEN_USE_GPU #ifdef EIGEN_USE_GPU
#include <iostream> #include <iostream>
#include <cuda_runtime.h> #if defined(EIGEN_USE_HIP)
#if __cplusplus >= 201103L #include <hip/hip_runtime.h>
#include <atomic> #else
#include <unistd.h> #include <cuda_runtime.h>
#endif #endif
#if __cplusplus >= 201103L
#include <atomic>
#include <unistd.h>
#endif
#endif #endif
#include "src/Tensor/TensorMacros.h" #include "src/Tensor/TensorMacros.h"
...@@ -95,7 +101,10 @@ typedef unsigned __int64 uint64_t; ...@@ -95,7 +101,10 @@ typedef unsigned __int64 uint64_t;
#include "src/Tensor/TensorCostModel.h" #include "src/Tensor/TensorCostModel.h"
#include "src/Tensor/TensorDeviceDefault.h" #include "src/Tensor/TensorDeviceDefault.h"
#include "src/Tensor/TensorDeviceThreadPool.h" #include "src/Tensor/TensorDeviceThreadPool.h"
#include "src/Tensor/TensorDeviceCuda.h" #include "src/Tensor/TensorDeviceGpu.h"
#ifndef gpu_assert
#define gpu_assert(x)
#endif
#include "src/Tensor/TensorDeviceSycl.h" #include "src/Tensor/TensorDeviceSycl.h"
#include "src/Tensor/TensorIndexList.h" #include "src/Tensor/TensorIndexList.h"
#include "src/Tensor/TensorDimensionList.h" #include "src/Tensor/TensorDimensionList.h"
...@@ -108,18 +117,19 @@ typedef unsigned __int64 uint64_t; ...@@ -108,18 +117,19 @@ typedef unsigned __int64 uint64_t;
#include "src/Tensor/TensorGlobalFunctions.h" #include "src/Tensor/TensorGlobalFunctions.h"
#include "src/Tensor/TensorBase.h" #include "src/Tensor/TensorBase.h"
#include "src/Tensor/TensorBlock.h"
#include "src/Tensor/TensorEvaluator.h" #include "src/Tensor/TensorEvaluator.h"
#include "src/Tensor/TensorExpr.h" #include "src/Tensor/TensorExpr.h"
#include "src/Tensor/TensorReduction.h" #include "src/Tensor/TensorReduction.h"
#include "src/Tensor/TensorReductionCuda.h" #include "src/Tensor/TensorReductionGpu.h"
#include "src/Tensor/TensorArgMax.h" #include "src/Tensor/TensorArgMax.h"
#include "src/Tensor/TensorConcatenation.h" #include "src/Tensor/TensorConcatenation.h"
#include "src/Tensor/TensorContractionMapper.h" #include "src/Tensor/TensorContractionMapper.h"
#include "src/Tensor/TensorContractionBlocking.h" #include "src/Tensor/TensorContractionBlocking.h"
#include "src/Tensor/TensorContraction.h" #include "src/Tensor/TensorContraction.h"
#include "src/Tensor/TensorContractionThreadPool.h" #include "src/Tensor/TensorContractionThreadPool.h"
#include "src/Tensor/TensorContractionCuda.h" #include "src/Tensor/TensorContractionGpu.h"
#include "src/Tensor/TensorConversion.h" #include "src/Tensor/TensorConversion.h"
#include "src/Tensor/TensorConvolution.h" #include "src/Tensor/TensorConvolution.h"
#include "src/Tensor/TensorFFT.h" #include "src/Tensor/TensorFFT.h"
...@@ -141,6 +151,7 @@ typedef unsigned __int64 uint64_t; ...@@ -141,6 +151,7 @@ typedef unsigned __int64 uint64_t;
#include "src/Tensor/TensorGenerator.h" #include "src/Tensor/TensorGenerator.h"
#include "src/Tensor/TensorAssign.h" #include "src/Tensor/TensorAssign.h"
#include "src/Tensor/TensorScan.h" #include "src/Tensor/TensorScan.h"
#include "src/Tensor/TensorTrace.h"
#include "src/Tensor/TensorSycl.h" #include "src/Tensor/TensorSycl.h"
#include "src/Tensor/TensorExecutor.h" #include "src/Tensor/TensorExecutor.h"
...@@ -154,6 +165,6 @@ typedef unsigned __int64 uint64_t; ...@@ -154,6 +165,6 @@ typedef unsigned __int64 uint64_t;
#include "src/Tensor/TensorIO.h" #include "src/Tensor/TensorIO.h"
#include <Eigen/src/Core/util/ReenableStupidWarnings.h> #include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
//#endif // EIGEN_CXX11_TENSOR_MODULE //#endif // EIGEN_CXX11_TENSOR_MODULE
...@@ -10,9 +10,9 @@ ...@@ -10,9 +10,9 @@
#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE #ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE
#define EIGEN_CXX11_TENSORSYMMETRY_MODULE #define EIGEN_CXX11_TENSORSYMMETRY_MODULE
#include <unsupported/Eigen/CXX11/Tensor> #include "Tensor"
#include <Eigen/src/Core/util/DisableStupidWarnings.h> #include "../../../Eigen/src/Core/util/DisableStupidWarnings.h"
#include "src/util/CXX11Meta.h" #include "src/util/CXX11Meta.h"
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include "src/TensorSymmetry/StaticSymmetry.h" #include "src/TensorSymmetry/StaticSymmetry.h"
#include "src/TensorSymmetry/DynamicSymmetry.h" #include "src/TensorSymmetry/DynamicSymmetry.h"
#include <Eigen/src/Core/util/ReenableStupidWarnings.h> #include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE #endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#include "../../../Eigen/Core" #include "../../../Eigen/Core"
#include <Eigen/src/Core/util/DisableStupidWarnings.h> #include "../../../Eigen/src/Core/util/DisableStupidWarnings.h"
/** \defgroup CXX11_ThreadPool_Module C++11 ThreadPool Module /** \defgroup CXX11_ThreadPool_Module C++11 ThreadPool Module
* *
...@@ -44,35 +44,31 @@ ...@@ -44,35 +44,31 @@
#include <thread> #include <thread>
#include <functional> #include <functional>
#include <memory> #include <memory>
#include "src/util/CXX11Meta.h" #include "src/util/CXX11Meta.h"
#include "src/util/MaxSizeVector.h" #include "src/util/MaxSizeVector.h"
#include "src/ThreadPool/ThreadLocal.h" #include "src/ThreadPool/ThreadLocal.h"
#ifndef EIGEN_THREAD_LOCAL
// There are non-parenthesized calls to "max" in the <unordered_map> header,
// which trigger a check in test/main.h causing compilation to fail.
// We work around the check here by removing the check for max in
// the case where we have to emulate thread_local.
#ifdef max
#undef max
#endif
#include <unordered_map>
#endif
#include "src/ThreadPool/ThreadYield.h" #include "src/ThreadPool/ThreadYield.h"
#include "src/ThreadPool/ThreadCancel.h" #include "src/ThreadPool/ThreadCancel.h"
#include "src/ThreadPool/EventCount.h" #include "src/ThreadPool/EventCount.h"
#include "src/ThreadPool/RunQueue.h" #include "src/ThreadPool/RunQueue.h"
#include "src/ThreadPool/ThreadPoolInterface.h" #include "src/ThreadPool/ThreadPoolInterface.h"
#include "src/ThreadPool/ThreadEnvironment.h" #include "src/ThreadPool/ThreadEnvironment.h"
#include "src/ThreadPool/SimpleThreadPool.h" #include "src/ThreadPool/Barrier.h"
#include "src/ThreadPool/NonBlockingThreadPool.h" #include "src/ThreadPool/NonBlockingThreadPool.h"
// Use the more efficient NonBlockingThreadPool by default.
namespace Eigen {
#ifndef EIGEN_USE_SIMPLE_THREAD_POOL
template <typename Env> using ThreadPoolTempl = NonBlockingThreadPoolTempl<Env>;
typedef NonBlockingThreadPool ThreadPool;
#else
template <typename Env> using ThreadPoolTempl = SimpleThreadPoolTempl<Env>;
typedef SimpleThreadPool ThreadPool;
#endif #endif
} // namespace Eigen
#endif #include "../../../Eigen/src/Core/util/ReenableStupidWarnings.h"
#include <Eigen/src/Core/util/ReenableStupidWarnings.h>
#endif // EIGEN_CXX11_THREADPOOL_MODULE #endif // EIGEN_CXX11_THREADPOOL_MODULE
...@@ -23,12 +23,12 @@ namespace Eigen { ...@@ -23,12 +23,12 @@ namespace Eigen {
* The %Tensor class encompasses only dynamic-size objects so far. * The %Tensor class encompasses only dynamic-size objects so far.
* *
* The first two template parameters are required: * The first two template parameters are required:
* \tparam Scalar_ \anchor tensor_tparam_scalar Numeric type, e.g. float, double, int or std::complex<float>. * \tparam Scalar_ Numeric type, e.g. float, double, int or `std::complex<float>`.
* User defined scalar types are supported as well (see \ref user_defined_scalars "here"). * User defined scalar types are supported as well (see \ref user_defined_scalars "here").
* \tparam NumIndices_ Number of indices (i.e. rank of the tensor) * \tparam NumIndices_ Number of indices (i.e. rank of the tensor)
* *
* The remaining template parameters are optional -- in most cases you don't have to worry about them. * The remaining template parameters are optional -- in most cases you don't have to worry about them.
* \tparam Options_ \anchor tensor_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either * \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of either
* \b #AutoAlign or \b #DontAlign. * \b #AutoAlign or \b #DontAlign.
* The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
* for vectorization. It defaults to aligning tensors. Note that tensors currently do not support any operations that profit from vectorization. * for vectorization. It defaults to aligning tensors. Note that tensors currently do not support any operations that profit from vectorization.
...@@ -42,13 +42,13 @@ namespace Eigen { ...@@ -42,13 +42,13 @@ namespace Eigen {
* \endcode * \endcode
* *
* This class can be extended with the help of the plugin mechanism described on the page * This class can be extended with the help of the plugin mechanism described on the page
* \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN. * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN.
* *
* <i><b>Some notes:</b></i> * <i><b>Some notes:</b></i>
* *
* <dl> * <dl>
* <dt><b>Relation to other parts of Eigen:</b></dt> * <dt><b>Relation to other parts of Eigen:</b></dt>
* <dd>The midterm developement goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that * <dd>The midterm development goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that
* taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code
* by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor
* class does not provide any of these features and is only available as a stand-alone class that just allows for * class does not provide any of these features and is only available as a stand-alone class that just allows for
...@@ -112,7 +112,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp ...@@ -112,7 +112,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
#if EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes> template<typename... IndexTypes>
EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
{ {
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
...@@ -398,6 +398,21 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp ...@@ -398,6 +398,21 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
} }
#if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Tensor(Self&& other)
: Tensor()
{
m_storage.swap(other.m_storage);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Tensor& operator=(Self&& other)
{
m_storage.swap(other.m_storage);
return *this;
}
#endif
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other) EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other)
{ {
...@@ -462,6 +477,18 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp ...@@ -462,6 +477,18 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
// Nothing to do: rank 0 tensors have fixed size // Nothing to do: rank 0 tensors have fixed size
} }
#ifdef EIGEN_HAS_INDEX_LIST
template <typename FirstType, typename... OtherTypes>
EIGEN_DEVICE_FUNC
void resize(const Eigen::IndexList<FirstType, OtherTypes...>& dimensions) {
array<Index, NumIndices> dims;
for (int i = 0; i < NumIndices; ++i) {
dims[i] = static_cast<Index>(dimensions[i]);
}
resize(dims);
}
#endif
/** Custom Dimension */ /** Custom Dimension */
#ifdef EIGEN_HAS_SFINAE #ifdef EIGEN_HAS_SFINAE
template<typename CustomDimension, template<typename CustomDimension,
......
...@@ -87,6 +87,7 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> ...@@ -87,6 +87,7 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
BlockAccess = false, BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout, Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented CoordAccess = false, // to be implemented
RawAccess = false RawAccess = false
...@@ -119,6 +120,12 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device> ...@@ -119,6 +120,12 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
#ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const {
return m_impl;
}
#endif
protected: protected:
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
}; };
...@@ -172,7 +179,7 @@ class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Di ...@@ -172,7 +179,7 @@ class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Di
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr,
const ReduceOp& reduce_op, const ReduceOp& reduce_op,
const int return_dim, const Index return_dim,
const Dims& reduce_dims) const Dims& reduce_dims)
: m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {} : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {}
...@@ -187,12 +194,12 @@ class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Di ...@@ -187,12 +194,12 @@ class TensorTupleReducerOp : public TensorBase<TensorTupleReducerOp<ReduceOp, Di
const Dims& reduce_dims() const { return m_reduce_dims; } const Dims& reduce_dims() const { return m_reduce_dims; }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
int return_dim() const { return m_return_dim; } Index return_dim() const { return m_return_dim; }
protected: protected:
typename XprType::Nested m_xpr; typename XprType::Nested m_xpr;
const ReduceOp m_reduce_op; const ReduceOp m_reduce_op;
const int m_return_dim; const Index m_return_dim;
const Dims m_reduce_dims; const Dims m_reduce_dims;
}; };
...@@ -214,6 +221,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi ...@@ -214,6 +221,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false, PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/ false,
BlockAccess = false, BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout, Layout = TensorEvaluator<const TensorReductionOp<ReduceOp, Dims, const TensorIndexTupleOp<ArgType> >, Device>::Layout,
CoordAccess = false, // to be implemented CoordAccess = false, // to be implemented
RawAccess = false RawAccess = false
...@@ -222,8 +230,11 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi ...@@ -222,8 +230,11 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_orig_impl(op.expression(), device), : m_orig_impl(op.expression(), device),
m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device),
m_return_dim(op.return_dim()) { m_return_dim(op.return_dim())
#ifdef EIGEN_USE_SYCL
,m_device(device)
#endif
{
gen_strides(m_orig_impl.dimensions(), m_strides); gen_strides(m_orig_impl.dimensions(), m_strides);
if (Layout == static_cast<int>(ColMajor)) { if (Layout == static_cast<int>(ColMajor)) {
const Index total_size = internal::array_prod(m_orig_impl.dimensions()); const Index total_size = internal::array_prod(m_orig_impl.dimensions());
...@@ -232,7 +243,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi ...@@ -232,7 +243,7 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
const Index total_size = internal::array_prod(m_orig_impl.dimensions()); const Index total_size = internal::array_prod(m_orig_impl.dimensions());
m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size; m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size;
} }
m_stride_div = m_strides[m_return_dim]; m_stride_div = (m_return_dim >= 0) ? m_strides[m_return_dim] : 1;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
...@@ -252,7 +263,16 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi ...@@ -252,7 +263,16 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div; return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div;
}