/* * Copyright (c) 2022 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree.
*/
// Load coeffs as 2 vectors of 4 x 32-bit ints each, take sign and abs values const int32x4_t coeff_0 = vld1q_s32(coeff_ptr); const int32x4_t coeff_1 = vld1q_s32(coeff_ptr + 4);
highbd_quantize_8_neon(coeff_0, coeff_1, zbin, round, quant, quant_shift,
&qcoeff_0, &qcoeff_1);
// Store the 32-bit qcoeffs
vst1q_s32(qcoeff_ptr, qcoeff_0);
vst1q_s32(qcoeff_ptr + 4, qcoeff_1);
// Calculate and store the dqcoeffs
dqcoeff_0 = vmulq_s32(qcoeff_0, dequant);
dqcoeff_1 = vmulq_s32(qcoeff_1, vdupq_lane_s32(vget_low_s32(dequant), 1));
// Only the first element of each vector is DC. // High half has identical elements, but we can reconstruct it from the low // half by duplicating the 2nd element. So we only need to pass a 4x32-bit // vector
int32x4_t zbin = vmovl_s16(vld1_s16(mb_plane->zbin));
int32x4_t round = vmovl_s16(vld1_s16(mb_plane->round)); // Extend the quant, quant_shift vectors to ones of 32-bit elements // scale to high-half, so we can use vqdmulhq_s32
int32x4_t quant = vshlq_n_s32(vmovl_s16(vld1_s16(mb_plane->quant)), 15);
int32x4_t quant_shift =
vshlq_n_s32(vmovl_s16(vld1_s16(mb_plane->quant_shift)), 15);
int32x4_t dequant = vmovl_s16(vld1_s16(dequant_ptr));
// Process first 8 values which include a dc component.
{ const uint16x8_t v_iscan = vreinterpretq_u16_s16(vld1q_s16(iscan));
static VPX_FORCE_INLINE int32x4_t extract_sign_bit(int32x4_t a) { return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 31));
}
static VPX_FORCE_INLINE void highbd_calculate_dqcoeff_and_store_32x32(
int32x4_t dqcoeff_0, int32x4_t dqcoeff_1, tran_low_t *dqcoeff_ptr) { // Add 1 if negative to round towards zero because the C uses division.
dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0));
dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1));
// Load coeffs as 2 vectors of 4 x 32-bit ints each, take sign and abs values const int32x4_t coeff_0 = vld1q_s32(coeff_ptr); const int32x4_t coeff_1 = vld1q_s32(coeff_ptr + 4);
highbd_quantize_8_neon(coeff_0, coeff_1, zbin, round, quant, quant_shift,
&qcoeff_0, &qcoeff_1);
// Store the 32-bit qcoeffs
vst1q_s32(qcoeff_ptr, qcoeff_0);
vst1q_s32(qcoeff_ptr + 4, qcoeff_1);
// Calculate and store the dqcoeffs
dqcoeff_0 = vmulq_s32(qcoeff_0, dequant);
dqcoeff_1 = vmulq_s32(qcoeff_1, vdupq_lane_s32(vget_low_s32(dequant), 1));
// Only the first element of each vector is DC. // High half has identical elements, but we can reconstruct it from the low // half by duplicating the 2nd element. So we only need to pass a 4x32-bit // vector
int32x4_t zbin = vrshrq_n_s32(vmovl_s16(vld1_s16(mb_plane->zbin)), 1);
int32x4_t round = vrshrq_n_s32(vmovl_s16(vld1_s16(mb_plane->round)), 1); // Extend the quant, quant_shift vectors to ones of 32-bit elements // scale to high-half, so we can use vqdmulhq_s32
int32x4_t quant = vshlq_n_s32(vmovl_s16(vld1_s16(mb_plane->quant)), 15);
int32x4_t quant_shift =
vshlq_n_s32(vmovl_s16(vld1_s16(mb_plane->quant_shift)), 16);
int32x4_t dequant = vmovl_s16(vld1_s16(dequant_ptr));
// Process first 8 values which include a dc component.
{ const uint16x8_t v_iscan = vreinterpretq_u16_s16(vld1q_s16(iscan));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.