Impressum SIMD_avx2.cpp
Interaktion und PortierbarkeitC
/* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// Restricting to x86_64 simplifies things, and we're not particularly // worried about slightly degraded performance on 32 bit processors which // support AVX2, as this should be quite a minority. #ifdefined(MOZILLA_MAY_SUPPORT_AVX2) && defined(__x86_64__)
# include <cstring> # include <immintrin.h> # include <stdint.h> # include <type_traits>
// See the comment in SIMD.cpp over Load32BitsIntoXMM. This is just adapted // from that workaround. Testing this, it also yields the correct instructions // across all tested compilers.
__m128i Load64BitsIntoXMM(uintptr_t ptr) {
int64_t tmp;
memcpy(&tmp, reinterpret_cast<constvoid*>(ptr), sizeof(tmp)); return _mm_cvtsi64_si128(tmp);
}
size_t numBytes = length * sizeof(TValue);
uintptr_t cur = reinterpret_cast<uintptr_t>(ptr);
uintptr_t end = cur + numBytes;
if (numBytes < 8 || (sizeof(TValue) >= 4 && numBytes < 32)) { while (cur < end) { if (GetAs<TValue>(cur) == value) { returnreinterpret_cast<const TValue*>(cur);
}
cur += sizeof(TValue);
} return nullptr;
}
if constexpr (sizeof(TValue) < 4) { if (numBytes < 32) {
__m128i needle_narrow; if (sizeof(TValue) == 1) {
needle_narrow = _mm_set1_epi8(value);
} else {
needle_narrow = _mm_set1_epi16(value);
}
uintptr_t a = cur;
uintptr_t b = cur + ((numBytes & 16) >> 1);
uintptr_t c = end - 8 - ((numBytes & 16) >> 1);
uintptr_t d = end - 8; return Check4x8Bytes<TValue>(needle_narrow, a, b, c, d);
}
}
if (numBytes < 128) { // NOTE: here and below, we have some bit fiddling which could look a // little weird. The important thing to note though is it's just a trick // for getting the number 32 if numBytes is greater than or equal to 64, // and 0 otherwise. This lets us fully cover the range without any // branching for the case where numBytes is in [32,64), and [64,128). We get // four ranges from this - if numbytes > 64, we get: // [0,32), [32,64], [end - 64), [end - 32) // and if numbytes < 64, we get // [0,32), [0,32), [end - 32), [end - 32)
uintptr_t a = cur;
uintptr_t b = cur + ((numBytes & 64) >> 1);
uintptr_t c = end - 32 - ((numBytes & 64) >> 1);
uintptr_t d = end - 32; return Check4x32Bytes<TValue>(needle, a, b, c, d);
}
// Get the initial unaligned load out of the way. This will overlap with the // aligned stuff below, but the overlapped part should effectively be free // (relative to a mispredict from doing a byte-by-byte loop).
__m256i haystack = _mm256_loadu_si256(Cast256(cur));
__m256i cmp = CmpEq256<TValue>(needle, haystack); int cmpMask = _mm256_movemask_epi8(cmp); if (cmpMask) { returnreinterpret_cast<const TValue*>(cur + __builtin_ctz(cmpMask));
}
// Now we're working with aligned memory. Hooray! \o/
cur = AlignUp32(cur);
while (cur < tailStartPtr) {
uintptr_t a = cur;
uintptr_t b = cur + 32;
uintptr_t c = cur + 64;
uintptr_t d = cur + 96; const TValue* result = Check4x32Bytes<TValue>(needle, a, b, c, d); if (result) { return result;
}
cur += 128;
}
uintptr_t a = tailStartPtr;
uintptr_t b = tailStartPtr + 32;
uintptr_t c = tailStartPtr + 64;
uintptr_t d = tailEndPtr; return Check4x32Bytes<TValue>(needle, a, b, c, d);
}
constchar* SIMD::memchr8AVX2(constchar* ptr, char value, size_t length) {
MOZ_RELEASE_ASSERT(false, "AVX2 not supported in this binary.");
}
const char16_t* SIMD::memchr16AVX2(const char16_t* ptr, char16_t value,
size_t length) {
MOZ_RELEASE_ASSERT(false, "AVX2 not supported in this binary.");
}
const uint32_t* SIMD::memchr32AVX2(const uint32_t* ptr, uint32_t value,
size_t length) {
MOZ_RELEASE_ASSERT(false, "AVX2 not supported in this binary.");
}
const uint64_t* SIMD::memchr64AVX2(const uint64_t* ptr, uint64_t value,
size_t length) {
MOZ_RELEASE_ASSERT(false, "AVX2 not supported in this binary.");
}
} // namespace mozilla
#endif
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.15Bemerkung:
Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.