// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
for (Index i = 0; i < m_size; ++i) {
buf[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(i));
}
for (size_t i = 0; i < m_fft.size(); ++i) {
Index dim = m_fft[i];
eigen_assert(dim >= 0 && dim < NumDims);
Index line_len = m_dimensions[dim];
eigen_assert(line_len >= 1);
ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); constbool is_power_of_two = isPowerOfTwo(line_len); const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite);
ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); if (!is_power_of_two) { // Compute twiddle factors // t_n = exp(sqrt(-1) * pi * n^2 / line_len) // for n = 0, 1,..., line_len-1. // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2
// The recurrence is correct in exact arithmetic, but causes // numerical issues for large transforms, especially in // single-precision floating point. // // pos_j_base_powered[0] = ComplexScalar(1, 0); // if (line_len > 1) { // const ComplexScalar pos_j_base = ComplexScalar( // numext::cos(M_PI / line_len), numext::sin(M_PI / line_len)); // pos_j_base_powered[1] = pos_j_base; // if (line_len > 2) { // const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; // for (int i = 2; i < line_len + 1; ++i) { // pos_j_base_powered[i] = pos_j_base_powered[i - 1] * // pos_j_base_powered[i - 1] / // pos_j_base_powered[i - 2] * // pos_j_base_sq; // } // } // } // TODO(rmlarsen): Find a way to use Eigen's vectorized sin // and cosine functions here. for (int j = 0; j < line_len + 1; ++j) { double arg = ((EIGEN_PI * j) * j) / line_len;
std::complex<double> tmp(numext::cos(arg), numext::sin(arg));
pos_j_base_powered[j] = static_cast<ComplexScalar>(tmp);
}
}
for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { const Index base_offset = getBaseOffsetFromIndex(partial_index, dim);
// get data into line_buf const Index stride = m_strides[dim]; if (stride == 1) {
m_device.memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar));
} else {
Index offset = base_offset; for (int j = 0; j < line_len; ++j, offset += stride) {
line_buf[j] = buf[offset];
}
}
// process the line if (is_power_of_two) {
processDataLineCooleyTukey(line_buf, line_len, log_len);
} else {
processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered);
}
// The composite number for padding, used in Bluestein's FFT algorithm
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) {
Index i = 2; while (i < 2 * n - 1) i *= 2; return i;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) {
Index log2m = 0; while (m >>= 1) log2m++; return log2m;
}
// Call Cooley Tukey algorithm directly, data length must be power of 2
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) {
eigen_assert(isPowerOfTwo(line_len));
scramble_FFT(line_buf, line_len);
compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len);
}
// Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) {
Index n = line_len;
Index m = good_composite;
ComplexScalar* data = line_buf;
for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) {
a[i] = data[i] * numext::conj(pos_j_base_powered[i]);
} else {
a[i] = data[i] * pos_j_base_powered[i];
}
} for (Index i = n; i < m; ++i) {
a[i] = ComplexScalar(0, 0);
}
for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) {
b[i] = pos_j_base_powered[i];
} else {
b[i] = numext::conj(pos_j_base_powered[i]);
}
} for (Index i = n; i < m - n; ++i) {
b[i] = ComplexScalar(0, 0);
} for (Index i = m - n; i < m; ++i) { if(FFTDir == FFT_FORWARD) {
b[i] = pos_j_base_powered[m-i];
} else {
b[i] = numext::conj(pos_j_base_powered[m-i]);
}
}
scramble_FFT(a, m);
compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len);
scramble_FFT(b, m);
compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len);
for (Index i = 0; i < m; ++i) {
a[i] *= b[i];
}
scramble_FFT(a, m);
compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len);
//Do the scaling after ifft for (Index i = 0; i < m; ++i) {
a[i] /= m;
}
for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) {
data[i] = a[i] * numext::conj(pos_j_base_powered[i]);
} else {
data[i] = a[i] * pos_j_base_powered[i];
}
}
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE staticvoid scramble_FFT(ComplexScalar* data, Index n) {
eigen_assert(isPowerOfTwo(n));
Index j = 1; for (Index i = 1; i < n; ++i){ if (j > i) {
std::swap(data[j-1], data[i-1]);
}
Index m = n >> 1; while (m >= 2 && j > m) {
j -= m;
m >>= 1;
}
j += m;
}
}
template <int Dir>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(
ComplexScalar* data, Index n, Index n_power_of_2) {
eigen_assert(isPowerOfTwo(n)); if (n > 8) {
compute_1D_Butterfly<Dir>(data, n / 2, n_power_of_2 - 1);
compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1);
butterfly_1D_merge<Dir>(data, n, n_power_of_2);
} elseif (n == 8) {
butterfly_8<Dir>(data);
} elseif (n == 4) {
butterfly_4<Dir>(data);
} elseif (n == 2) {
butterfly_2<Dir>(data);
}
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const {
Index result = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { for (int i = NumDims - 1; i > omitted_dim; --i) { const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; const Index idx = index / partial_m_stride;
index -= idx * partial_m_stride;
result += idx * m_strides[i];
}
result += index;
} else { for (Index i = 0; i < omitted_dim; ++i) { const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; const Index idx = index / partial_m_stride;
index -= idx * partial_m_stride;
result += idx * m_strides[i];
}
result += index;
} // Value of index_coords[omitted_dim] is not determined to this step return result;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const {
Index result = base + offset * m_strides[omitted_dim] ; return result;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.