// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com> // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*************************************************************************** * Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/
// copy_using_evaluator_traits is based on assign_traits
// TODO distinguish between linear traversal and inner-traversals typedeftypename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType; typedeftypename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType;
private: enum {
DstIsRowMajor = DstFlags&RowMajorBit,
SrcIsRowMajor = SrcFlags&RowMajorBit,
StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
MightVectorize = bool(StorageOrdersAgree)
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
&& bool(functor_traits<AssignFunc>::PacketAccess),
MayInnerVectorize = MightVectorize
&& int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize))) /* slice vectorization can be slow, so we only want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block in a fixed-size matrix
However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
};
template<typename Kernel, int Index, int Stop> struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
{ // FIXME: this is not very clean, perhaps this information should be provided by the kernel? typedeftypename Kernel::DstEvaluatorType DstEvaluatorType; typedeftypename DstEvaluatorType::XprType DstXprType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime
};
template<typename Kernel, int Index, int Stop> struct copy_using_evaluator_innervec_CompleteUnrolling
{ // FIXME: this is not very clean, perhaps this information should be provided by the kernel? typedeftypename Kernel::DstEvaluatorType DstEvaluatorType; typedeftypename DstEvaluatorType::XprType DstXprType; typedeftypename Kernel::PacketType PacketType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
DstAlignment = Kernel::AssignmentTraits::DstAlignment
};
template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment> struct copy_using_evaluator_innervec_InnerUnrolling
{ typedeftypename Kernel::PacketType PacketType;
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
{
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_); enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
}
};
template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment> struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
};
/*************************************************************************** * Part 3 : implementation of all cases
***************************************************************************/
// dense_assignment_loop is based on assign_impl
template<typename Kernel, int Traversal = Kernel::AssignmentTraits::Traversal, int Unrolling = Kernel::AssignmentTraits::Unrolling> struct dense_assignment_loop;
/************************ ***** Special Cases *****
************************/
// Zero-sized assignment is a no-op. template<typename Kernel, int Unrolling> struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
{
EIGEN_DEVICE_FUNC staticvoid EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)
{ typedeftypename Kernel::DstEvaluatorType::XprType DstXprType;
EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
}
};
/*************************** *** Linear vectorization ***
***************************/
// The goal of unaligned_dense_assignment_loop is simply to factorize the handling // of the non vectorizable beginning and ending parts
template <bool IsAligned = false> struct unaligned_dense_assignment_loop
{ // if IsAligned = true, then do nothing template <typename Kernel>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
};
template <> struct unaligned_dense_assignment_loop<false>
{ // MSVC must not inline this functions. If it does, it fails to optimize the // packet access path. // FIXME check which version exhibits this issue #if EIGEN_COMP_MSVC template <typename Kernel> static EIGEN_DONT_INLINE void run(Kernel &kernel,
Index start,
Index end) #else template <typename Kernel>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
Index start,
Index end) #endif
{ for (Index index = start; index < end; ++index)
kernel.assignCoeff(index);
}
};
template<typename Kernel> struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{ typedeftypename Kernel::Scalar Scalar; typedeftypename Kernel::PacketType PacketType; enum {
packetSize = unpacket_traits<PacketType>::size,
requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
dstAlignment = alignable ? int(requestedAlignment)
: int(Kernel::AssignmentTraits::DstAlignment)
}; const Scalar *dst_ptr = kernel.dstDataPtr(); if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
{ // the pointer is not aligned-on scalar, so alignment is not possible return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
} const Index packetAlignedMask = packetSize - 1; const Index innerSize = kernel.innerSize(); const Index outerSize = kernel.outerSize(); const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
for(Index outer = 0; outer < outerSize; ++outer)
{ const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); // do the non-vectorizable part of the assignment for(Index inner = 0; inner<alignedStart ; ++inner)
kernel.assignCoeffByOuterInner(outer, inner);
// do the vectorizable part of the assignment for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
// do the non-vectorizable part of the assignment for(Index inner = alignedEnd; inner<innerSize ; ++inner)
kernel.assignCoeffByOuterInner(outer, inner);
/*************************************************************************** * Part 4 : Generic dense assignment kernel
***************************************************************************/
// This class generalize the assignment of a coefficient (or packet) from one dense evaluator // to another dense writable evaluator. // It is parametrized by the two evaluators, and the actual assignment functor. // This abstraction level permits to keep the evaluation loops as simple and as generic as possible. // One can customize the assignment using this generic dense_assignment_kernel with different // functors, or by completely overloading it, by-passing a functor. template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized> class generic_dense_assignment_kernel
{ protected: typedeftypename DstEvaluatorTypeT::XprType DstXprType; typedeftypename SrcEvaluatorTypeT::XprType SrcXprType; public:
/// Assign src(row,col) to dst(row,col) through the assignment functor.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
{
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
}
protected:
DstEvaluatorType& m_dst; const SrcEvaluatorType& m_src; const Functor &m_functor; // TODO find a way to avoid the needs of the original expression
DstXprType& m_dstExpr;
};
// Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the // PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used // when computing the product.
/*************************************************************************** * Part 5 : Entry point for dense rectangular assignment
***************************************************************************/
// NOTE To properly handle A = (A*A.transpose())/s with A rectangular, // we need to resize the destination after the source evaluator has been created.
resize_if_allowed(dst, src, func);
/*************************************************************************** * Part 6 : Generic assignment
***************************************************************************/
// Based on the respective shapes of the destination and source, // the class AssignmentKind determine the kind of assignment mechanism. // AssignmentKind must define a Kind typedef. template<typename DstShape, typename SrcShape> struct AssignmentKind;
// Assignment kind defined in this file: struct Dense2Dense {}; struct EigenBase2EigenBase {};
// This is the main assignment class template< typename DstXprType, typename SrcXprType, typename Functor, typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind, typename EnableIf = void> struct Assignment;
// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition. // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated. // So this intermediate function removes everything related to "assume-aliasing" such that Assignment // does not has to bother about these annoying details.
// by-pass "assume-aliasing" // When there is no aliasing, we require that 'dst' has been properly resized template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
{
call_assignment_no_alias(dst.expression(), src, func);
}
// TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
{ // TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
// Generic Dense to Dense assignment // Note that the last template argument "Weak" is needed to make it possible to perform // both partial specialization+SFINAE without ambiguous specialization template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{ #ifndef EIGEN_NO_DEBUG
internal::check_for_aliasing(dst, src); #endif
call_dense_assignment_loop(dst, src, func);
}
};
// Generic assignment through evalTo. // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. // Note that the last template argument "Weak" is needed to make it possible to perform // both partial specialization+SFINAE without ambiguous specialization template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
{
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols(); if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
// NOTE The following two functions are templated to avoid their instantiation if not needed // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type. template<typename SrcScalarType>
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
{
Index dstRows = src.rows();
Index dstCols = src.cols(); if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.