/* * Copyright (c) 2018, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the s * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
// Referenced by fft_avx2.c. void aom_transpose_float_sse2(constfloat *A, float *B, int n);
void aom_transpose_float_sse2(constfloat *A, float *B, int n) { for (int y = 0; y < n; y += 4) { for (int x = 0; x < n; x += 4) {
transpose4x4(A + y * n + x, B + x * n + y, n, n);
}
}
}
// Referenced by fft_avx2.c. void aom_fft_unpack_2d_output_sse2(constfloat *packed, float *output, int n);
for (int c = 1; c < n2; ++c) {
output[2 * (0 * n + c)] = packed[c];
output[2 * (0 * n + c) + 1] = packed[c + n2];
output[2 * (n2 * n + c) + 0] = packed[n2 * n + c];
output[2 * (n2 * n + c) + 1] = packed[n2 * n + c + n2];
} for (int r = 1; r < n2; ++r) {
output[2 * (r * n + 0)] = packed[r * n];
output[2 * (r * n + 0) + 1] = packed[(r + n2) * n];
output[2 * (r * n + n2) + 0] = packed[r * n + n2];
output[2 * (r * n + n2) + 1] = packed[(r + n2) * n + n2];
for (int c = 1; c < AOMMIN(n2, 4); ++c) {
output[2 * (r * n + c)] =
packed[r * n + c] - packed[(r + n2) * n + c + n2];
output[2 * (r * n + c) + 1] =
packed[(r + n2) * n + c] + packed[r * n + c + n2];
}
for (int c = 4; c < n2; c += 4) {
__m128 real1 = _mm_load_ps(packed + r * n + c);
__m128 real2 = _mm_load_ps(packed + (r + n2) * n + c + n2);
__m128 imag1 = _mm_load_ps(packed + (r + n2) * n + c);
__m128 imag2 = _mm_load_ps(packed + r * n + c + n2);
real1 = _mm_sub_ps(real1, real2);
imag1 = _mm_add_ps(imag1, imag2);
_mm_store_ps(output + 2 * (r * n + c), _mm_unpacklo_ps(real1, imag1));
_mm_store_ps(output + 2 * (r * n + c + 2), _mm_unpackhi_ps(real1, imag1));
}
int r2 = r + n2; int r3 = n - r2;
output[2 * (r2 * n + 0)] = packed[r3 * n];
output[2 * (r2 * n + 0) + 1] = -packed[(r3 + n2) * n];
output[2 * (r2 * n + n2)] = packed[r3 * n + n2];
output[2 * (r2 * n + n2) + 1] = -packed[(r3 + n2) * n + n2]; for (int c = 1; c < AOMMIN(4, n2); ++c) {
output[2 * (r2 * n + c)] =
packed[r3 * n + c] + packed[(r3 + n2) * n + c + n2];
output[2 * (r2 * n + c) + 1] =
-packed[(r3 + n2) * n + c] + packed[r3 * n + c + n2];
} for (int c = 4; c < n2; c += 4) {
__m128 real1 = _mm_load_ps(packed + r3 * n + c);
__m128 real2 = _mm_load_ps(packed + (r3 + n2) * n + c + n2);
__m128 imag1 = _mm_load_ps(packed + (r3 + n2) * n + c);
__m128 imag2 = _mm_load_ps(packed + r3 * n + c + n2);
real1 = _mm_add_ps(real1, real2);
imag1 = _mm_sub_ps(imag2, imag1);
_mm_store_ps(output + 2 * (r2 * n + c), _mm_unpacklo_ps(real1, imag1));
_mm_store_ps(output + 2 * (r2 * n + c + 2),
_mm_unpackhi_ps(real1, imag1));
}
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.