/* * Copyright (c) 2018, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/ #ifndef AOM_AV1_COMMON_X86_AV1_TXFM_SSE2_H_ #define AOM_AV1_COMMON_X86_AV1_TXFM_SSE2_H_
staticinlinevoid load_buffer_16bit_to_16bit_w4(const int16_t *const in, constint stride,
__m128i *const out, constint out_size) { for (int i = 0; i < out_size; ++i) {
out[i] = _mm_loadl_epi64((const __m128i *)(in + i * stride));
}
}
staticinlinevoid load_buffer_16bit_to_16bit_w4_flip(const int16_t *const in, constint stride,
__m128i *const out, constint out_size) { for (int i = 0; i < out_size; ++i) {
out[out_size - i - 1] = _mm_loadl_epi64((const __m128i *)(in + i * stride));
}
}
staticinlinevoid load_buffer_16bit_to_16bit(const int16_t *in, int stride,
__m128i *out, int out_size) { for (int i = 0; i < out_size; ++i) {
out[i] = load_16bit_to_16bit(in + i * stride);
}
}
staticinlinevoid load_buffer_16bit_to_16bit_flip(const int16_t *in, int stride, __m128i *out, int out_size) { for (int i = 0; i < out_size; ++i) {
out[out_size - i - 1] = load_16bit_to_16bit(in + i * stride);
}
}
staticinlinevoid load_buffer_32bit_to_16bit(const int32_t *in, int stride,
__m128i *out, int out_size) { for (int i = 0; i < out_size; ++i) {
out[i] = load_32bit_to_16bit(in + i * stride);
}
}
staticinlinevoid load_buffer_32bit_to_16bit_w4(const int32_t *in, int stride,
__m128i *out, int out_size) { for (int i = 0; i < out_size; ++i) {
out[i] = load_32bit_to_16bit_w4(in + i * stride);
}
}
staticinlinevoid load_buffer_32bit_to_16bit_flip(const int32_t *in, int stride, __m128i *out, int out_size) { for (int i = 0; i < out_size; ++i) {
out[out_size - i - 1] = load_32bit_to_16bit(in + i * stride);
}
}
staticinlinevoid store_buffer_16bit_to_32bit_w4(const __m128i *const in,
int32_t *const out, constint stride, constint out_size) { for (int i = 0; i < out_size; ++i) {
store_16bit_to_32bit_w4(in[i], out + i * stride);
}
}
staticinlinevoid store_buffer_16bit_to_32bit_w8(const __m128i *const in,
int32_t *const out, constint stride, constint out_size) { for (int i = 0; i < out_size; ++i) {
store_16bit_to_32bit(in[i], out + i * stride);
}
}
staticinlinevoid store_rect_buffer_16bit_to_32bit_w4(const __m128i *const in,
int32_t *const out, constint stride, constint out_size) { for (int i = 0; i < out_size; ++i) {
store_rect_16bit_to_32bit_w4(in[i], out + i * stride);
}
}
staticinlinevoid store_rect_buffer_16bit_to_32bit_w8(const __m128i *const in,
int32_t *const out, constint stride, constint out_size) { for (int i = 0; i < out_size; ++i) {
store_rect_16bit_to_32bit(in[i], out + i * stride);
}
}
staticinlinevoid store_buffer_16bit_to_16bit_8x8(const __m128i *in,
uint16_t *out, constint stride) { for (int i = 0; i < 8; ++i) {
_mm_store_si128((__m128i *)(out + i * stride), in[i]);
}
}
staticinlinevoid round_shift_16bit(__m128i *in, int size, int bit) { if (bit < 0) {
bit = -bit;
__m128i rounding = _mm_set1_epi16(1 << (bit - 1)); for (int i = 0; i < size; ++i) {
in[i] = _mm_adds_epi16(in[i], rounding);
in[i] = _mm_srai_epi16(in[i], bit);
}
} elseif (bit > 0) { for (int i = 0; i < size; ++i) {
in[i] = _mm_slli_epi16(in[i], bit);
}
}
}
staticinlinevoid flip_buf_sse2(__m128i *in, __m128i *out, int size) { for (int i = 0; i < size; ++i) {
out[size - i - 1] = in[i];
}
}
void av1_lowbd_fwd_txfm2d_4x4_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_4x8_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_4x16_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_8x4_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_8x8_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_8x16_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_8x32_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_16x4_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_16x8_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_16x16_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_16x32_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_32x8_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_32x16_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_32x32_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_16x64_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
void av1_lowbd_fwd_txfm2d_64x16_sse2(const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.