/* * Copyright (c) 2023, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
staticinlineunsignedint masked_sad_16xh_small_neon( const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, const uint8_t *m, int m_stride, int height) { // For 12-bit data, we can only accumulate up to 128 elements in the // uint16x8_t type sad accumulator, so we can only process up to 8 rows // before we have to accumulate into 32-bit elements.
assert(height <= 8); const uint16_t *src = CONVERT_TO_SHORTPTR(src8); const uint16_t *a = CONVERT_TO_SHORTPTR(a8); const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
uint16x8_t sad = vdupq_n_u16(0);
do {
sad = masked_sad_16x1_neon(sad, src, a, b, m);
src += src_stride;
a += a_stride;
b += b_stride;
m += m_stride;
} while (--height != 0);
return horizontal_add_u16x8(sad);
}
staticinlineunsignedint masked_sad_8xh_small_neon( const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, const uint8_t *m, int m_stride, int height) { // For 12-bit data, we can only accumulate up to 128 elements in the // uint16x8_t type sad accumulator, so we can only process up to 16 rows // before we have to accumulate into 32-bit elements.
assert(height <= 16); const uint16_t *src = CONVERT_TO_SHORTPTR(src8); const uint16_t *a = CONVERT_TO_SHORTPTR(a8); const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
uint16x8_t sad = vdupq_n_u16(0);
do {
sad = masked_sad_8x1_neon(sad, src, a, b, m);
src += src_stride;
a += a_stride;
b += b_stride;
m += m_stride;
} while (--height != 0);
return horizontal_add_u16x8(sad);
}
staticinlineunsignedint masked_sad_4xh_small_neon( const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, const uint8_t *m, int m_stride, int height) { // For 12-bit data, we can only accumulate up to 64 elements in the // uint16x4_t type sad accumulator, so we can only process up to 16 rows // before we have to accumulate into 32-bit elements.
assert(height <= 16); const uint16_t *src = CONVERT_TO_SHORTPTR(src8); const uint16_t *a = CONVERT_TO_SHORTPTR(a8); const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.