// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/* Optimized triangular matrix * matrix (_TRMM++) product built on top of * the general matrix matrix product.
*/ template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs, int ResStorageOrder, int ResInnerStride, int Version = Specialized> struct product_triangular_matrix_matrix;
template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,Version>
{ static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsStride,
Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
product_triangular_matrix_matrix<Scalar, Index,
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
(!LhsIsTriangular),
RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateRhs,
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateLhs,
ColMajor, ResInnerStride>
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
}
};
// implements col-major += alpha * op(triangular) * op(general) template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
{
static EIGEN_DONT_INLINE void run(
Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride, int Version>
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ // strip zeros
Index diagSize = (std::min)(_rows,_depth);
Index rows = IsLower ? _rows : diagSize;
Index depth = IsLower ? diagSize : _depth;
Index cols = _cols;
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction // The small panel size must not be larger than blocking size. // Usually this should never be the case because SmallPanelWidth^2 is very small // compared to L2 cache size, but let's be safe:
Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));
// To work around an "error: member reference base type 'Matrix<...> // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is // not a structure or union" compilation error in nvcc (tested V8.0.61), // create a dummy internal::constructor_without_unaligned_array_assert // object to pass to the Matrix constructor.
internal::constructor_without_unaligned_array_assert a;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero(); else
triangularBuffer.diagonal().setOnes();
// align blocks with the end of the triangular part for trapezoidal lhs if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
{
actual_kc = rows-k2;
k2 = k2+actual_kc-kc;
}
// the selected lhs's panel has to be split in three different parts: // 1 - the part which is zero => skip it // 2 - the diagonal block => special kernel // 3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP
// the block diagonal, if any: if(IsLower || actual_k2<rows)
{ // for each small vertical panels of lhs for (Index k1=0; k1<actual_kc; k1+=panelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
Index startBlock = actual_k2+k1;
Index blockBOffset = k1;
// => GEBP with the micro triangular block // The trick is to pack this micro block while filling the opposite triangular part with zeros. // To this end we do an extra triangular copy to a small temporary buffer for (Index k=0;k<actualPanelWidth;++k)
{ if (SetDiag)
triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k); for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
}
pack_lhs(blockA, LhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()), actualPanelWidth, actualPanelWidth);
static EIGEN_DONT_INLINE void run(
Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs, int ResInnerStride, int Version>
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar); // strip zeros
Index diagSize = (std::min)(_cols,_depth);
Index rows = _rows;
Index depth = IsLower ? _depth : diagSize;
Index cols = IsLower ? diagSize : _cols;
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
// align blocks with the end of the triangular part for trapezoidal rhs if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
{
actual_kc = cols-k2;
k2 = actual_k2 + actual_kc - kc;
}
// remaining size
Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2; // size of the triangular part
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
// pack the triangular part of the rhs padding the unrolled blocks with zeros if(ts>0)
{ for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
Index actual_j2 = actual_k2 + j2;
Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2; // general part
pack_rhs_panel(blockB+j2*actual_kc,
rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
panelLength, actualPanelWidth,
actual_kc, panelOffset);
// append the triangular part via a temporary buffer for (Index j=0;j<actualPanelWidth;++j)
{ if (SetDiag)
triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j); for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
}
// Apply correction if the diagonal is unit and a scalar factor was nested: if ((Mode&UnitDiag)==UnitDiag)
{ if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
} elseif ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
{
Index diagSize = (std::min)(rhs.rows(),rhs.cols());
dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
}
}
}
};
} // end namespace internal
} // end namespace Eigen
#endif// EIGEN_TRIANGULAR_MATRIX_MATRIX_H
¤ Dauer der Verarbeitung: 0.14 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.