/* * Copyright (c) 2021 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree.
*/
// Process a block of width 4 four rows at a time. staticINLINEvoid variance_4xh_neon_dotprod(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int h,
uint32_t *sse, int *sum) {
uint32x4_t src_sum = vdupq_n_u32(0);
uint32x4_t ref_sum = vdupq_n_u32(0);
uint32x4_t sse_u32 = vdupq_n_u32(0);
int i = h; do { const uint8x16_t s = load_unaligned_u8q(src_ptr, src_stride); const uint8x16_t r = load_unaligned_u8q(ref_ptr, ref_stride);
// Process a block of width 8 two rows at a time. staticINLINEvoid variance_8xh_neon_dotprod(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int h,
uint32_t *sse, int *sum) {
uint32x4_t src_sum = vdupq_n_u32(0);
uint32x4_t ref_sum = vdupq_n_u32(0);
uint32x4_t sse_u32 = vdupq_n_u32(0);
int i = h; do { const uint8x16_t s =
vcombine_u8(vld1_u8(src_ptr), vld1_u8(src_ptr + src_stride)); const uint8x16_t r =
vcombine_u8(vld1_u8(ref_ptr), vld1_u8(ref_ptr + ref_stride));
// Process a block of width 16 one row at a time. staticINLINEvoid variance_16xh_neon_dotprod(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int h,
uint32_t *sse, int *sum) {
uint32x4_t src_sum = vdupq_n_u32(0);
uint32x4_t ref_sum = vdupq_n_u32(0);
uint32x4_t sse_u32 = vdupq_n_u32(0);
int i = h; do { const uint8x16_t s = vld1q_u8(src_ptr); const uint8x16_t r = vld1q_u8(ref_ptr);
// Process a block of any size where the width is divisible by 16. staticINLINEvoid variance_large_neon_dotprod(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int w, int h,
uint32_t *sse, int *sum) {
uint32x4_t src_sum = vdupq_n_u32(0);
uint32x4_t ref_sum = vdupq_n_u32(0);
uint32x4_t sse_u32 = vdupq_n_u32(0);
int i = h; do { int j = 0; do { const uint8x16_t s = vld1q_u8(src_ptr + j); const uint8x16_t r = vld1q_u8(ref_ptr + j);
unsignedint vpx_get4x4sse_cs_neon_dotprod(constunsignedchar *src_ptr, int src_stride, constunsignedchar *ref_ptr, int ref_stride) {
uint8x16_t s = load_unaligned_u8q(src_ptr, src_stride);
uint8x16_t r = load_unaligned_u8q(ref_ptr, ref_stride);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.