// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2001 Intel Corporation // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // // The algorithm below is a reimplementation of former \src\LU\Inverse_SSE.h using PacketMath. // inv(M) = M#/|M|, where inv(M), M# and |M| denote the inverse of M, // adjugate of M and determinant of M respectively. M# is computed block-wise // using specific formulae. For proof, see: // https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html // Variable names are adopted from \src\LU\Inverse_SSE.h. // // The SSE code for the 4x4 float and double matrix inverse in former (deprecated) \src\LU\Inverse_SSE.h // comes from the following Intel's library: // http://software.intel.com/en-us/articles/optimized-matrix-library-for-use-with-the-intel-pentiumr-4-processors-sse2-instructions/ // // Here is the respective copyright and license statement: // // Copyright (c) 2001 Intel Corporation. // // Permition is granted to use, copy, distribute and prepare derivative works // of this library for any purpose and without fee, provided, that the above // copyright notice and this statement appear in all copies. // Intel makes no representations about the suitability of this software for // any purpose, and specifically disclaims all warranties. // See LEGAL.TXT for all the legal information. // // TODO: Unify implementations of different data types (i.e. float and double). #ifndef EIGEN_INVERSE_SIZE_4_H #define EIGEN_INVERSE_SIZE_4_H
// Four 2x2 sub-matrices of the input matrix // input = [[A, B], // [C, D]]
Packet4f A, B, C, D;
if (!StorageOrdersMatch)
{
A = vec4f_unpacklo(_L1, _L2);
B = vec4f_unpacklo(_L3, _L4);
C = vec4f_unpackhi(_L1, _L2);
D = vec4f_unpackhi(_L3, _L4);
} else
{
A = vec4f_movelh(_L1, _L2);
B = vec4f_movehl(_L2, _L1);
C = vec4f_movelh(_L3, _L4);
D = vec4f_movehl(_L4, _L3);
}
Packet4f AB, DC;
// AB = A# * B, where A# denotes the adjugate of A, and * denotes matrix product.
AB = pmul(vec4f_swizzle2(A, A, 3, 3, 0, 0), B);
AB = psub(AB, pmul(vec4f_swizzle2(A, A, 1, 1, 2, 2), vec4f_swizzle2(B, B, 2, 3, 0, 1)));
// DC = D#*C
DC = pmul(vec4f_swizzle2(D, D, 3, 3, 0, 0), C);
DC = psub(DC, pmul(vec4f_swizzle2(D, D, 1, 1, 2, 2), vec4f_swizzle2(C, C, 2, 3, 0, 1)));
// determinants of the sub-matrices
Packet4f dA, dB, dC, dD;
dA = pmul(vec4f_swizzle2(A, A, 3, 3, 1, 1), A);
dA = psub(dA, vec4f_movehl(dA, dA));
dB = pmul(vec4f_swizzle2(B, B, 3, 3, 1, 1), B);
dB = psub(dB, vec4f_movehl(dB, dB));
dC = pmul(vec4f_swizzle2(C, C, 3, 3, 1, 1), C);
dC = psub(dC, vec4f_movehl(dC, dC));
// Four 2x2 sub-matrices of the input matrix, each is further divided into upper and lower // row e.g. A1, upper row of A, A2, lower row of A // input = [[A, B], = [[[A1, [B1, // [C, D]] A2], B2]], // [[C1, [D1, // C2], D2]]]
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.