/* * Copyright (c) 2022 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. *
*/
/* * Copyright (c) 2021 Loongson Technology Corporation Limited * All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. * * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn> * Xiwei Gu <guxiwei-hf@loongson.cn> * Lu Wang <wanglu@loongson.cn> * * This file is a header file for loongarch builtin extension. *
*/
#ifdef __loongarch_sx #include <lsxintrin.h> /* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_c, in_h, in_l * Outputs - out * Return Type - halfword * Details : Signed byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. Then * the results are added to signed half-word elements from in_c. * Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l) * in_c : 1,2,3,4, 1,2,3,4 * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 * out : 23,40,41,26, 23,40,41,26 * =============================================================================
*/ staticinline __m128i __lsx_vdp2add_h_b(__m128i in_c, __m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_h_b(in_c, in_h, in_l);
out = __lsx_vmaddwod_h_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_c, in_h, in_l * Outputs - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied by * unsigned byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * The results are added to signed half-word elements from in_c. * Example : out = __lsx_vdp2add_h_bu(in_c, in_h, in_l) * in_c : 1,2,3,4, 1,2,3,4 * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 * out : 23,40,41,26, 23,40,41,26 * =============================================================================
*/ staticinline __m128i __lsx_vdp2add_h_bu(__m128i in_c, __m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_h_bu(in_c, in_h, in_l);
out = __lsx_vmaddwod_h_bu(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_c, in_h, in_l * Outputs - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * The results are added to signed half-word elements from in_c. * Example : out = __lsx_vdp2add_h_bu_b(in_c, in_h, in_l) * in_c : 1,1,1,1, 1,1,1,1 * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : -1,-2,-3,-4, -5,-6,-7,-8, 1,2,3,4, 5,6,7,8 * out : -4,-24,-60,-112, 6,26,62,114 * =============================================================================
*/ staticinline __m128i __lsx_vdp2add_h_bu_b(__m128i in_c, __m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_h_bu_b(in_c, in_h, in_l);
out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of half-word vector elements * Arguments : Inputs - in_c, in_h, in_l * Outputs - out * Return Type - __m128i * Details : Signed half-word elements from in_h are multiplied by * signed half-word elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Then the results are added to signed word elements from in_c. * Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l) * in_c : 1,2,3,4 * in_h : 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1 * out : 23,40,41,26 * =============================================================================
*/ staticinline __m128i __lsx_vdp2add_w_h(__m128i in_c, __m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_w_h(in_c, in_h, in_l);
out = __lsx_vmaddwod_w_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Outputs - out * Return Type - halfword * Details : Signed byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Example : out = __lsx_vdp2_h_b(in_h, in_l) * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 * out : 22,38,38,22, 22,38,38,22 * =============================================================================
*/ staticinline __m128i __lsx_vdp2_h_b(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_h_b(in_h, in_l);
out = __lsx_vmaddwod_h_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Outputs - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied by * unsigned byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Example : out = __lsx_vdp2_h_bu(in_h, in_l) * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 * out : 22,38,38,22, 22,38,38,22 * =============================================================================
*/ staticinline __m128i __lsx_vdp2_h_bu(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_h_bu(in_h, in_l);
out = __lsx_vmaddwod_h_bu(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Outputs - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Example : out = __lsx_vdp2_h_bu_b(in_h, in_l) * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,-1 * out : 22,38,38,22, 22,38,38,6 * =============================================================================
*/ staticinline __m128i __lsx_vdp2_h_bu_b(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_h_bu_b(in_h, in_l);
out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Outputs - out * Return Type - halfword * Details : Signed byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Example : out = __lsx_vdp2_w_h(in_h, in_l) * in_h : 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1 * out : 22,38,38,22 * =============================================================================
*/ staticinline __m128i __lsx_vdp2_w_h(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_w_h(in_h, in_l);
out = __lsx_vmaddwod_w_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Outputs - out * Return Type - double * Details : Signed byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Example : out = __lsx_vdp2_d_w(in_h, in_l) * in_h : 1,2,3,4 * in_l : 8,7,6,5 * out : 22,38 * =============================================================================
*/ staticinline __m128i __lsx_vdp2_d_w(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_d_w(in_h, in_l);
out = __lsx_vmaddwod_d_w(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Clip all halfword elements of input vector between min & max * out = ((_in) < (min)) ? (min) : (((_in) > (max)) ? (max) : * (_in)) * Arguments : Inputs - _in (input vector) * - min (min threshold) * - max (max threshold) * Outputs - out (output vector with clipped elements) * Return Type - signed halfword * Example : out = __lsx_vclip_h(_in) * _in : -8,2,280,249, -8,255,280,249 * min : 1,1,1,1, 1,1,1,1 * max : 9,9,9,9, 9,9,9,9 * out : 1,2,9,9, 1,9,9,9 * =============================================================================
*/ staticinline __m128i __lsx_vclip_h(__m128i _in, __m128i min, __m128i max) {
__m128i out;
out = __lsx_vmax_h(min, _in);
out = __lsx_vmin_h(max, out); return out;
}
/* * ============================================================================= * Description : Set each element of vector between 0 and 255 * Arguments : Inputs - _in * Outputs - out * Return Type - halfword * Details : Signed byte elements from _in are clamped between 0 and 255. * Example : out = __lsx_vclip255_h(_in) * _in : -8,255,280,249, -8,255,280,249 * out : 0,255,255,249, 0,255,255,249 * =============================================================================
*/ staticinline __m128i __lsx_vclip255_h(__m128i _in) {
__m128i out;
out = __lsx_vmaxi_h(_in, 0);
out = __lsx_vsat_hu(out, 7); return out;
}
/* * ============================================================================= * Description : Set each element of vector between 0 and 255 * Arguments : Inputs - _in * Outputs - out * Return Type - word * Details : Signed byte elements from _in are clamped between 0 and 255. * Example : out = __lsx_vclip255_w(_in) * _in : -8,255,280,249 * out : 0,255,255,249 * =============================================================================
*/ staticinline __m128i __lsx_vclip255_w(__m128i _in) {
__m128i out;
out = __lsx_vmaxi_w(_in, 0);
out = __lsx_vsat_wu(out, 7); return out;
}
#ifdef __loongarch_asx #include <lasxintrin.h> /* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed halfword * Details : Unsigned byte elements from in_h are multiplied with * unsigned byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Then these multiplied results of adjacent odd-even elements * are added to the out vector * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2_h_bu(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_h_bu(in_h, in_l);
out = __lasx_xvmaddwod_h_bu(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed halfword * Details : Signed byte elements from in_h are multiplied with * signed byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Then these multiplication results of adjacent odd-even elements * are added to the out vector * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2_h_b(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_h_b(in_h, in_l);
out = __lasx_xvmaddwod_h_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed word * Details : Signed halfword elements from in_h are multiplied with * signed halfword elements from in_l producing a result * twice the size of input i.e. signed word. * Then these multiplied results of adjacent odd-even elements * are added to the out vector. * Example : out = __lasx_xvdp2_w_h(in_h, in_l) * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 * out : 22,38,38,22, 22,38,38,22 * =============================================================================
*/ staticinline __m256i __lasx_xvdp2_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_h(in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of word vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed double * Details : Signed word elements from in_h are multiplied with * signed word elements from in_l producing a result * twice the size of input i.e. signed double-word. * Then these multiplied results of adjacent odd-even elements * are added to the out vector. * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2_d_w(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_d_w(in_h, in_l);
out = __lasx_xvmaddwod_d_w(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed word * Details : Unsigned halfword elements from in_h are multiplied with * signed halfword elements from in_l producing a result * twice the size of input i.e. unsigned word. * Multiplication result of adjacent odd-even elements * are added to the out vector * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2_w_hu_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_hu_h(in_h, in_l);
out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - halfword * Details : Signed byte elements from in_h are multiplied with * signed byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Then these multiplied results of adjacent odd-even elements * are added to the in_c vector. * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_h_b(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_h_b(in_c, in_h, in_l);
out = __lasx_xvmaddwod_h_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied with * unsigned byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Then these multiplied results of adjacent odd-even elements * are added to the in_c vector. * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_h_bu(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_h_bu(in_c, in_h, in_l);
out = __lasx_xvmaddwod_h_bu(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied with * signed byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Then these multiplied results of adjacent odd-even elements * are added to the in_c vector. * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_h_bu_b(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_h_bu_b(in_c, in_h, in_l);
out = __lasx_xvmaddwod_h_bu_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_c, in_h, in_l * Output - out * Return Type - per RTYPE * Details : Signed halfword elements from in_h are multiplied with * signed halfword elements from in_l producing a result * twice the size of input i.e. signed word. * Multiplication result of adjacent odd-even elements * are added to the in_c vector. * Example : out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * in_c : 1,2,3,4, 1,2,3,4 * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8, * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1, * out : 23,40,41,26, 23,40,41,26 * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_w_h(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_w_h(in_c, in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_c, in_h, in_l * Output - out * Return Type - signed word * Details : Unsigned halfword elements from in_h are multiplied with * unsigned halfword elements from in_l producing a result * twice the size of input i.e. signed word. * Multiplication result of adjacent odd-even elements * are added to the in_c vector. * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_w_hu(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_w_hu(in_c, in_h, in_l);
out = __lasx_xvmaddwod_w_hu(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_c, in_h, in_l * Output - out * Return Type - signed word * Details : Unsigned halfword elements from in_h are multiplied with * signed halfword elements from in_l producing a result * twice the size of input i.e. signed word. * Multiplication result of adjacent odd-even elements * are added to the in_c vector * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_w_hu_h(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_w_hu_h(in_c, in_h, in_l);
out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Vector Unsigned Dot Product and Subtract * Arguments : Inputs - in_c, in_h, in_l * Output - out * Return Type - signed halfword * Details : Unsigned byte elements from in_h are multiplied with * unsigned byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Multiplication result of adjacent odd-even elements * are added together and subtracted from double width elements * in_c vector. * Example : See out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2sub_h_bu(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_h_bu(in_h, in_l);
out = __lasx_xvmaddwod_h_bu(out, in_h, in_l);
out = __lasx_xvsub_h(in_c, out); return out;
}
/* * ============================================================================= * Description : Vector Signed Dot Product and Subtract * Arguments : Inputs - in_c, in_h, in_l * Output - out * Return Type - signed word * Details : Signed halfword elements from in_h are multiplied with * Signed halfword elements from in_l producing a result * twice the size of input i.e. signed word. * Multiplication result of adjacent odd-even elements * are added together and subtracted from double width elements * in_c vector. * Example : out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l) * in_c : 0,0,0,0, 0,0,0,0 * in_h : 3,1,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1 * in_l : 2,1,1,0, 1,0,0,0, 0,0,1,0, 1,0,0,1 * out : -7,-3,0,0, 0,-1,0,-1 * =============================================================================
*/ staticinline __m256i __lasx_xvdp2sub_w_h(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_h(in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
out = __lasx_xvsub_w(in_c, out); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed word * Details : Signed halfword elements from in_h are multiplied with * signed halfword elements from in_l producing a result * four times the size of input i.e. signed doubleword. * Then these multiplication results of four adjacent elements * are added together and stored to the out vector. * Example : out = __lasx_xvdp4_d_h(in_h, in_l) * in_h : 3,1,3,0, 0,0,0,1, 0,0,1,-1, 0,0,0,1 * in_l : -2,1,1,0, 1,0,0,0, 0,0,1, 0, 1,0,0,1 * out : -2,0,1,1 * =============================================================================
*/ staticinline __m256i __lasx_xvdp4_d_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_h(in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
out = __lasx_xvhaddw_d_w(out, out); return out;
}
/* * ============================================================================= * Description : The high half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are added after the * higher half of the two-fold sign extension (signed byte * to signed halfword) and stored to the out vector. * Example : See out = __lasx_xvaddwh_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvaddwh_h_b(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvh_b(in_h, in_l);
out = __lasx_xvhaddw_h_b(out, out); return out;
}
/* * ============================================================================= * Description : The high half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are added after the * higher half of the two-fold sign extension (signed halfword * to signed word) and stored to the out vector. * Example : out = __lasx_xvaddwh_w_h(in_h, in_l) * in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 * in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1 * out : 1,0,0,-1, 1,0,0, 2 * =============================================================================
*/ staticinline __m256i __lasx_xvaddwh_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvh_h(in_h, in_l);
out = __lasx_xvhaddw_w_h(out, out); return out;
}
/* * ============================================================================= * Description : The low half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are added after the * lower half of the two-fold sign extension (signed byte * to signed halfword) and stored to the out vector. * Example : See out = __lasx_xvaddwl_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvaddwl_h_b(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvl_b(in_h, in_l);
out = __lasx_xvhaddw_h_b(out, out); return out;
}
/* * ============================================================================= * Description : The low half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are added after the * lower half of the two-fold sign extension (signed halfword * to signed word) and stored to the out vector. * Example : out = __lasx_xvaddwl_w_h(in_h, in_l) * in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 * in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1 * out : 5,-1,4,2, 1,0,2,-1 * =============================================================================
*/ staticinline __m256i __lasx_xvaddwl_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvl_h(in_h, in_l);
out = __lasx_xvhaddw_w_h(out, out); return out;
}
/* * ============================================================================= * Description : The low half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The out vector and the out vector are added after the * lower half of the two-fold zero extension (unsigned byte * to unsigned halfword) and stored to the out vector. * Example : See out = __lasx_xvaddwl_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvaddwl_h_bu(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvl_b(in_h, in_l);
out = __lasx_xvhaddw_hu_bu(out, out); return out;
}
/* * ============================================================================= * Description : The low half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_l vector after double zero extension (unsigned byte to * signed halfword),added to the in_h vector. * Example : See out = __lasx_xvaddw_w_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvaddw_h_h_bu(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvsllwil_hu_bu(in_l, 0);
out = __lasx_xvadd_h(in_h, out); return out;
}
/* * ============================================================================= * Description : The low half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_l vector after double sign extension (signed halfword to * signed word), added to the in_h vector. * Example : out = __lasx_xvaddw_w_w_h(in_h, in_l) * in_h : 0, 1,0,0, -1,0,0,1, * in_l : 2,-1,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1, * out : 2, 0,1,2, -1,0,1,1, * =============================================================================
*/ staticinline __m256i __lasx_xvaddw_w_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvsllwil_w_h(in_l, 0);
out = __lasx_xvadd_w(in_h, out); return out;
}
/*
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.55 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.