// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/* Optimized col-major matrix * vector product: * This algorithm processes the matrix per vertical panels, * which are then processed horizontaly per chunck of 8*PacketSize x 1 vertical segments. * * Mixing type logic: C += alpha * A * B * | A | B |alpha| comments * |real |cplx |cplx | no vectorization * |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp * |cplx |real |real | optimal case, vectorization possible via real-cplx mul * * The same reasoning apply for the transposed case.
*/ template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
{ typedef gemv_traits<LhsScalar,RhsScalar> Traits; typedef gemv_traits<LhsScalar,RhsScalar,GEMVPacketHalf> HalfTraits; typedef gemv_traits<LhsScalar,RhsScalar,GEMVPacketQuarter> QuarterTraits;
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE staticvoid run(
Index rows, Index cols, const LhsMapper& lhs, const RhsMapper& rhs,
ResScalar* res, Index resIncr,
RhsScalar alpha);
};
template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
Index rows, Index cols, const LhsMapper& alhs, const RhsMapper& rhs,
ResScalar* res, Index resIncr,
RhsScalar alpha)
{
EIGEN_UNUSED_VARIABLE(resIncr);
eigen_internal_assert(resIncr==1);
// The following copy tells the compiler that lhs's attributes are not modified outside this function // This helps GCC to generate propoer code.
LhsMapper lhs(alhs);
const Index n8 = rows-8*ResPacketSize+1; const Index n4 = rows-4*ResPacketSize+1; const Index n3 = rows-3*ResPacketSize+1; const Index n2 = rows-2*ResPacketSize+1; const Index n1 = rows-1*ResPacketSize+1; const Index n_half = rows-1*ResPacketSizeHalf+1; const Index n_quarter = rows-1*ResPacketSizeQuarter+1;
// TODO: improve the following heuristic: const Index block_cols = cols<128 ? cols : (lhsStride*sizeof(LhsScalar)<32000?16:4);
ResPacket palpha = pset1<ResPacket>(alpha);
ResPacketHalf palpha_half = pset1<ResPacketHalf>(alpha);
ResPacketQuarter palpha_quarter = pset1<ResPacketQuarter>(alpha);
/* Optimized row-major matrix * vector product: * This algorithm processes 4 rows at once that allows to both reduce * the number of load/stores of the result by a factor 4 and to reduce * the instruction dependency. Moreover, we know that all bands have the * same alignment pattern. * * Mixing type logic: * - alpha is always a complex (or converted to a complex) * - no vectorization
*/ template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
{ typedef gemv_traits<LhsScalar,RhsScalar> Traits; typedef gemv_traits<LhsScalar,RhsScalar,GEMVPacketHalf> HalfTraits; typedef gemv_traits<LhsScalar,RhsScalar,GEMVPacketQuarter> QuarterTraits;
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE staticvoid run(
Index rows, Index cols, const LhsMapper& lhs, const RhsMapper& rhs,
ResScalar* res, Index resIncr,
ResScalar alpha);
};
template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
Index rows, Index cols, const LhsMapper& alhs, const RhsMapper& rhs,
ResScalar* res, Index resIncr,
ResScalar alpha)
{ // The following copy tells the compiler that lhs's attributes are not modified outside this function // This helps GCC to generate propoer code.
LhsMapper lhs(alhs);
// TODO: fine tune the following heuristic. The rationale is that if the matrix is very large, // processing 8 rows at once might be counter productive wrt cache. const Index n8 = lhs.stride()*sizeof(LhsScalar)>32000 ? 0 : rows-7; const Index n4 = rows-3; const Index n2 = rows-1;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.