// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file.
// These templates are not found via ADL. using hwy::HWY_NAMESPACE::Broadcast; #if HWY_TARGET != HWY_SCALAR using hwy::HWY_NAMESPACE::CombineShiftRightBytes; #endif using hwy::HWY_NAMESPACE::TableLookupLanes; using hwy::HWY_NAMESPACE::Vec;
// Synthesizes left/right neighbors from a vector of center pixels. class Neighbors { public: using D = HWY_CAPPED(float, 16); using V = Vec<D>;
// Returns l[i] == c[Mirror(i - 1)].
HWY_INLINE HWY_MAYBE_UNUSED static V FirstL1(const V c) { #if HWY_CAP_GE256 const D d;
HWY_ALIGN constexpr int32_t lanes[16] = {0, 0, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14}; constauto indices = SetTableIndices(d, lanes); // c = PONM'LKJI return TableLookupLanes(c, indices); // ONML'KJII #elif HWY_TARGET == HWY_SCALAR return c; // Same (the first mirrored value is the last valid one) #else// 128 bit // c = LKJI #if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(2, 1, 0, 0))}; // KJII #else const D d; // TODO(deymo): Figure out if this can be optimized using a single vsri // instruction to convert LKJI to KJII.
HWY_ALIGN constexpr int lanes[4] = {0, 0, 1, 2}; // KJII constauto indices = SetTableIndices(d, lanes); return TableLookupLanes(c, indices); #endif #endif
}
// Returns l[i] == c[Mirror(i - 2)].
HWY_INLINE HWY_MAYBE_UNUSED static V FirstL2(const V c) { #if HWY_CAP_GE256 const D d;
HWY_ALIGN constexpr int32_t lanes[16] = {1, 0, 0, 1, 2, 3, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13}; constauto indices = SetTableIndices(d, lanes); // c = PONM'LKJI return TableLookupLanes(c, indices); // NMLK'JIIJ #elif HWY_TARGET == HWY_SCALAR const D d;
JXL_DEBUG_ABORT("Unsupported"); return Zero(d); #else// 128 bit // c = LKJI #if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(1, 0, 0, 1))}; // JIIJ #else const D d;
HWY_ALIGN constexpr int lanes[4] = {1, 0, 0, 1}; // JIIJ constauto indices = SetTableIndices(d, lanes); return TableLookupLanes(c, indices); #endif #endif
}
// Returns l[i] == c[Mirror(i - 3)].
HWY_INLINE HWY_MAYBE_UNUSED static V FirstL3(const V c) { #if HWY_CAP_GE256 const D d;
HWY_ALIGN constexpr int32_t lanes[16] = {2, 1, 0, 0, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12}; constauto indices = SetTableIndices(d, lanes); // c = PONM'LKJI return TableLookupLanes(c, indices); // MLKJ'IIJK #elif HWY_TARGET == HWY_SCALAR const D d;
JXL_DEBUG_ABORT("Unsipported"); return Zero(d); #else// 128 bit // c = LKJI #if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(0, 0, 1, 2))}; // IIJK #else const D d;
HWY_ALIGN constexpr int lanes[4] = {2, 1, 0, 0}; // IIJK constauto indices = SetTableIndices(d, lanes); return TableLookupLanes(c, indices); #endif #endif
}
};
// Single entry point for convolution. // "Strategy" (Direct*/Separable*) decides kernel size and how to evaluate it. template <class Strategy> class ConvolveT { static constexpr int64_t kRadius = Strategy::kRadius; using Simd = HWY_CAPPED(float, 16);
public: static size_t MinWidth() { #if HWY_TARGET == HWY_SCALAR // First/Last use mirrored loads of up to +/- kRadius. return 2 * kRadius; #else return Lanes(Simd()) + kRadius; #endif
}
// "Image" is ImageF or Image3F. template <class Image, class Weights> staticvoid Run(const Image& in, const Rect& rect, const Weights& weights,
ThreadPool* pool, Image* out) {
JXL_DASSERT(SameSize(rect, *out));
JXL_DASSERT(rect.xsize() >= MinWidth());
static_assert(static_cast<int64_t>(kRadius) <= 3, "Must handle [0, kRadius) and >= kRadius"); switch (rect.xsize() % Lanes(Simd())) { case 0: return RunRows<0>(in, rect, weights, pool, out); case 1: return RunRows<1>(in, rect, weights, pool, out); case 2: return RunRows<2>(in, rect, weights, pool, out); default: return RunRows<3>(in, rect, weights, pool, out);
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.