/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree.
*/
/* Description : Store 4 words with stride Arguments : Inputs - in0, in1, in2, in3, pdst, stride Details : Store word from 'in0' to (pdst) Store word from 'in1' to (pdst + stride) Store word from 'in2' to (pdst + 2 * stride) Store word from 'in3' to (pdst + 3 * stride)
*/ #define SW4(in0, in1, in2, in3, pdst, stride) \
{ \
SW(in0, (pdst)) \
SW(in1, (pdst) + stride); \
SW(in2, (pdst) + 2 * stride); \
SW(in3, (pdst) + 3 * stride); \
}
/* Description : Store 4 double words with stride Arguments : Inputs - in0, in1, in2, in3, pdst, stride Details : Store double word from 'in0' to (pdst) Store double word from 'in1' to (pdst + stride) Store double word from 'in2' to (pdst + 2 * stride) Store double word from 'in3' to (pdst + 3 * stride)
*/ #define SD4(in0, in1, in2, in3, pdst, stride) \
{ \
SD(in0, (pdst)) \
SD(in1, (pdst) + stride); \
SD(in2, (pdst) + 2 * stride); \
SD(in3, (pdst) + 3 * stride); \
}
/* Description : Load vector elements with stride Arguments : Inputs - psrc, stride Outputs - out0, out1 Return Type - as per RTYPE Details : Load 16 byte elements in 'out0' from (psrc) Load 16 byte elements in 'out1' from (psrc + stride)
*/ #define LD_V2(RTYPE, psrc, stride, out0, out1) \
{ \
out0 = LD_V(RTYPE, (psrc)); \
out1 = LD_V(RTYPE, (psrc) + stride); \
} #define LD_UB2(...) LD_V2(v16u8, __VA_ARGS__) #define LD_SB2(...) LD_V2(v16i8, __VA_ARGS__) #define LD_SH2(...) LD_V2(v8i16, __VA_ARGS__) #define LD_SW2(...) LD_V2(v4i32, __VA_ARGS__)
/* Description : Store 2x4 byte block to destination memory from input vector Arguments : Inputs - in, stidx, pdst, stride Details : Index 'stidx' halfword element from 'in' vector is copied to the GP register and stored to (pdst) Index 'stidx+1' halfword element from 'in' vector is copied to the GP register and stored to (pdst + stride) Index 'stidx+2' halfword element from 'in' vector is copied to the GP register and stored to (pdst + 2 * stride) Index 'stidx+3' halfword element from 'in' vector is copied to the GP register and stored to (pdst + 3 * stride)
*/ #define ST2x4_UB(in, stidx, pdst, stride) \
{ \
uint16_t out0_m, out1_m, out2_m, out3_m; \
uint8_t *pblk_2x4_m = (uint8_t *)(pdst); \
\
out0_m = __msa_copy_u_h((v8i16)in, (stidx)); \
out1_m = __msa_copy_u_h((v8i16)in, (stidx + 1)); \
out2_m = __msa_copy_u_h((v8i16)in, (stidx + 2)); \
out3_m = __msa_copy_u_h((v8i16)in, (stidx + 3)); \
\
SH(out0_m, pblk_2x4_m); \
SH(out1_m, pblk_2x4_m + stride); \
SH(out2_m, pblk_2x4_m + 2 * stride); \
SH(out3_m, pblk_2x4_m + 3 * stride); \
}
/* Description : Store 4x2 byte block to destination memory from input vector Arguments : Inputs - in, pdst, stride Details : Index 0 word element from 'in' vector is copied to the GP register and stored to (pdst) Index 1 word element from 'in' vector is copied to the GP register and stored to (pdst + stride)
*/ #define ST4x2_UB(in, pdst, stride) \
{ \
uint32_t out0_m, out1_m; \
uint8_t *pblk_4x2_m = (uint8_t *)(pdst); \
\
out0_m = __msa_copy_u_w((v4i32)in, 0); \
out1_m = __msa_copy_u_w((v4i32)in, 1); \
\
SW(out0_m, pblk_4x2_m); \
SW(out1_m, pblk_4x2_m + stride); \
}
/* Description : Store 4x4 byte block to destination memory from input vector Arguments : Inputs - in0, in1, pdst, stride Details : 'Idx0' word element from input vector 'in0' is copied to the GP register and stored to (pdst) 'Idx1' word element from input vector 'in0' is copied to the GP register and stored to (pdst + stride) 'Idx2' word element from input vector 'in0' is copied to the GP register and stored to (pdst + 2 * stride) 'Idx3' word element from input vector 'in0' is copied to the GP register and stored to (pdst + 3 * stride)
*/ #define ST4x4_UB(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \
{ \
uint32_t out0_m, out1_m, out2_m, out3_m; \
uint8_t *pblk_4x4_m = (uint8_t *)(pdst); \
\
out0_m = __msa_copy_u_w((v4i32)in0, idx0); \
out1_m = __msa_copy_u_w((v4i32)in0, idx1); \
out2_m = __msa_copy_u_w((v4i32)in1, idx2); \
out3_m = __msa_copy_u_w((v4i32)in1, idx3); \
\
SW4(out0_m, out1_m, out2_m, out3_m, pblk_4x4_m, stride); \
} #define ST4x8_UB(in0, in1, pdst, stride) \
{ \
uint8_t *pblk_4x8 = (uint8_t *)(pdst); \
\
ST4x4_UB(in0, in0, 0, 1, 2, 3, pblk_4x8, stride); \
ST4x4_UB(in1, in1, 0, 1, 2, 3, pblk_4x8 + 4 * stride, stride); \
}
/* Description : Store 8x1 byte block to destination memory from input vector Arguments : Inputs - in, pdst Details : Index 0 double word element from 'in' vector is copied to the GP register and stored to (pdst)
*/ #define ST8x1_UB(in, pdst) \
{ \
uint64_t out0_m; \
\
out0_m = __msa_copy_u_d((v2i64)in, 0); \
SD(out0_m, pdst); \
}
/* Description : Store 8x2 byte block to destination memory from input vector Arguments : Inputs - in, pdst, stride Details : Index 0 double word element from 'in' vector is copied to the GP register and stored to (pdst) Index 1 double word element from 'in' vector is copied to the GP register and stored to (pdst + stride)
*/ #define ST8x2_UB(in, pdst, stride) \
{ \
uint64_t out0_m, out1_m; \
uint8_t *pblk_8x2_m = (uint8_t *)(pdst); \
\
out0_m = __msa_copy_u_d((v2i64)in, 0); \
out1_m = __msa_copy_u_d((v2i64)in, 1); \
\
SD(out0_m, pblk_8x2_m); \
SD(out1_m, pblk_8x2_m + stride); \
}
/* Description : Store 8x4 byte block to destination memory from input vectors Arguments : Inputs - in0, in1, pdst, stride Details : Index 0 double word element from 'in0' vector is copied to the GP register and stored to (pdst) Index 1 double word element from 'in0' vector is copied to the GP register and stored to (pdst + stride) Index 0 double word element from 'in1' vector is copied to the GP register and stored to (pdst + 2 * stride) Index 1 double word element from 'in1' vector is copied to the GP register and stored to (pdst + 3 * stride)
*/ #define ST8x4_UB(in0, in1, pdst, stride) \
{ \
uint64_t out0_m, out1_m, out2_m, out3_m; \
uint8_t *pblk_8x4_m = (uint8_t *)(pdst); \
\
out0_m = __msa_copy_u_d((v2i64)in0, 0); \
out1_m = __msa_copy_u_d((v2i64)in0, 1); \
out2_m = __msa_copy_u_d((v2i64)in1, 0); \
out3_m = __msa_copy_u_d((v2i64)in1, 1); \
\
SD4(out0_m, out1_m, out2_m, out3_m, pblk_8x4_m, stride); \
}
/* Description : average with rounding (in0 + in1 + 1) / 2. Arguments : Inputs - in0, in1, in2, in3, Outputs - out0, out1 Return Type - as per RTYPE Details : Each unsigned byte element from 'in0' vector is added with each unsigned byte element from 'in1' vector. Then the average with rounding is calculated and written to 'out0'
*/ #define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_aver_u_b((v16u8)in0, (v16u8)in1); \
out1 = (RTYPE)__msa_aver_u_b((v16u8)in2, (v16u8)in3); \
} #define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
/* Description : Immediate number of elements to slide with zero Arguments : Inputs - in0, in1, slide_val Outputs - out0, out1 Return Type - as per RTYPE Details : Byte elements from 'zero_m' vector are slid into 'in0' by value specified in the 'slide_val'
*/ #define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val) \
{ \
v16i8 zero_m = { 0 }; \
out0 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in0, slide_val); \
out1 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in1, slide_val); \
} #define SLDI_B2_0_SW(...) SLDI_B2_0(v4i32, __VA_ARGS__)
/* Description : Dot product of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Unsigned byte elements from 'mult0' are multiplied with unsigned byte elements from 'cnst0' producing a result twice the size of input i.e. unsigned halfword. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector
*/ #define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
{ \
out0 = (RTYPE)__msa_dotp_u_h((v16u8)mult0, (v16u8)cnst0); \
out1 = (RTYPE)__msa_dotp_u_h((v16u8)mult1, (v16u8)cnst1); \
} #define DOTP_UB2_UH(...) DOTP_UB2(v8u16, __VA_ARGS__)
/* Description : Dot product of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed byte elements from 'mult0' are multiplied with signed byte elements from 'cnst0' producing a result twice the size of input i.e. signed halfword. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector
*/ #define DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
{ \
out0 = (RTYPE)__msa_dotp_s_h((v16i8)mult0, (v16i8)cnst0); \
out1 = (RTYPE)__msa_dotp_s_h((v16i8)mult1, (v16i8)cnst1); \
} #define DOTP_SB2_SH(...) DOTP_SB2(v8i16, __VA_ARGS__)
/* Description : Dot product of halfword vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed halfword elements from 'mult0' are multiplied with signed halfword elements from 'cnst0' producing a result twice the size of input i.e. signed word. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector
*/ #define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
{ \
out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0); \
out1 = (RTYPE)__msa_dotp_s_w((v8i16)mult1, (v8i16)cnst1); \
} #define DOTP_SH2_SW(...) DOTP_SH2(v4i32, __VA_ARGS__)
/* Description : Dot product of word vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed word elements from 'mult0' are multiplied with signed word elements from 'cnst0' producing a result twice the size of input i.e. signed double word. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector
*/ #define DOTP_SW2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
{ \
out0 = (RTYPE)__msa_dotp_s_d((v4i32)mult0, (v4i32)cnst0); \
out1 = (RTYPE)__msa_dotp_s_d((v4i32)mult1, (v4i32)cnst1); \
} #define DOTP_SW2_SD(...) DOTP_SW2(v2i64, __VA_ARGS__)
/* Description : Dot product & addition of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed byte elements from 'mult0' are multiplied with signed byte elements from 'cnst0' producing a result twice the size of input i.e. signed halfword. The multiplication result of adjacent odd-even elements are added to the 'out0' vector
*/ #define DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
{ \
out0 = (RTYPE)__msa_dpadd_s_h((v8i16)out0, (v16i8)mult0, (v16i8)cnst0); \
out1 = (RTYPE)__msa_dpadd_s_h((v8i16)out1, (v16i8)mult1, (v16i8)cnst1); \
} #define DPADD_SB2_SH(...) DPADD_SB2(v8i16, __VA_ARGS__)
/* Description : Dot product & addition of halfword vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed halfword elements from 'mult0' are multiplied with signed halfword elements from 'cnst0' producing a result twice the size of input i.e. signed word. The multiplication result of adjacent odd-even elements are added to the 'out0' vector
*/ #define DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
{ \
out0 = (RTYPE)__msa_dpadd_s_w((v4i32)out0, (v8i16)mult0, (v8i16)cnst0); \
out1 = (RTYPE)__msa_dpadd_s_w((v4i32)out1, (v8i16)mult1, (v8i16)cnst1); \
} #define DPADD_SH2_SW(...) DPADD_SH2(v4i32, __VA_ARGS__)
/* Description : Dot product & addition of double word vector elements Arguments : Inputs - mult0, mult1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each signed word element from 'mult0' is multiplied with itself producing an intermediate result twice the size of input i.e. signed double word The multiplication result of adjacent odd-even elements are added to the 'out0' vector
*/ #define DPADD_SD2(RTYPE, mult0, mult1, out0, out1) \
{ \
out0 = (RTYPE)__msa_dpadd_s_d((v2i64)out0, (v4i32)mult0, (v4i32)mult0); \
out1 = (RTYPE)__msa_dpadd_s_d((v2i64)out1, (v4i32)mult1, (v4i32)mult1); \
} #define DPADD_SD2_SD(...) DPADD_SD2(v2i64, __VA_ARGS__)
/* Description : Minimum values between unsigned elements of either vector are copied to the output vector Arguments : Inputs - in0, in1, min_vec Outputs - in place operation Return Type - as per RTYPE Details : Minimum of unsigned halfword element values from 'in0' and 'min_vec' are written to output vector 'in0'
*/ #define MIN_UH2(RTYPE, in0, in1, min_vec) \
{ \
in0 = (RTYPE)__msa_min_u_h((v8u16)in0, min_vec); \
in1 = (RTYPE)__msa_min_u_h((v8u16)in1, min_vec); \
} #define MIN_UH2_UH(...) MIN_UH2(v8u16, __VA_ARGS__)
/* Description : Horizontal addition of 4 signed word elements of input vector Arguments : Input - in (signed word vector) Output - sum_m (i32 sum) Return Type - signed word (GP) Details : 4 signed word elements of 'in' vector are added together and the resulting integer sum is returned
*/ #define HADD_SW_S32(in) \
({ \
v2i64 hadd_sw_s32_res0_m, hadd_sw_s32_res1_m; \
int32_t hadd_sw_s32_sum_m; \
\
hadd_sw_s32_res0_m = __msa_hadd_s_d((v4i32)in, (v4i32)in); \
hadd_sw_s32_res1_m = __msa_splati_d(hadd_sw_s32_res0_m, 1); \
hadd_sw_s32_res0_m = hadd_sw_s32_res0_m + hadd_sw_s32_res1_m; \
hadd_sw_s32_sum_m = __msa_copy_s_w((v4i32)hadd_sw_s32_res0_m, 0); \
hadd_sw_s32_sum_m; \
})
/* Description : Horizontal addition of 4 unsigned word elements Arguments : Input - in (unsigned word vector) Output - sum_m (u32 sum) Return Type - unsigned word (GP) Details : 4 unsigned word elements of 'in' vector are added together and the resulting integer sum is returned
*/ #define HADD_UW_U32(in) \
({ \
v2u64 hadd_uw_u32_res0_m, hadd_uw_u32_res1_m; \
uint32_t hadd_uw_u32_sum_m; \
\
hadd_uw_u32_res0_m = __msa_hadd_u_d((v4u32)in, (v4u32)in); \
hadd_uw_u32_res1_m = (v2u64)__msa_splati_d((v2i64)hadd_uw_u32_res0_m, 1); \
hadd_uw_u32_res0_m += hadd_uw_u32_res1_m; \
hadd_uw_u32_sum_m = __msa_copy_u_w((v4i32)hadd_uw_u32_res0_m, 0); \
hadd_uw_u32_sum_m; \
})
/* Description : Horizontal addition of 8 unsigned halfword elements Arguments : Input - in (unsigned halfword vector) Output - sum_m (u32 sum) Return Type - unsigned word Details : 8 unsigned halfword elements of 'in' vector are added together and the resulting integer sum is returned
*/ #define HADD_UH_U32(in) \
({ \
v4u32 hadd_uh_u32_res_m; \
uint32_t hadd_uh_u32_sum_m; \
\
hadd_uh_u32_res_m = __msa_hadd_u_w((v8u16)in, (v8u16)in); \
hadd_uh_u32_sum_m = HADD_UW_U32(hadd_uh_u32_res_m); \
hadd_uh_u32_sum_m; \
})
/* Description : Horizontal addition of unsigned byte vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each unsigned odd byte element from 'in0' is added to even unsigned byte element from 'in0' (pairwise) and the halfword result is written to 'out0'
*/ #define HADD_UB2(RTYPE, in0, in1, out0, out1) \
{ \
out0 = (RTYPE)__msa_hadd_u_h((v16u8)in0, (v16u8)in0); \
out1 = (RTYPE)__msa_hadd_u_h((v16u8)in1, (v16u8)in1); \
} #define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
/* Description : Horizontal subtraction of unsigned byte vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each unsigned odd byte element from 'in0' is subtracted from even unsigned byte element from 'in0' (pairwise) and the halfword result is written to 'out0'
*/ #define HSUB_UB2(RTYPE, in0, in1, out0, out1) \
{ \
out0 = (RTYPE)__msa_hsub_u_h((v16u8)in0, (v16u8)in0); \
out1 = (RTYPE)__msa_hsub_u_h((v16u8)in1, (v16u8)in1); \
} #define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)
/* Description : SAD (Sum of Absolute Difference) Arguments : Inputs - in0, in1, ref0, ref1 Outputs - sad_m (halfword vector) Return Type - unsigned halfword Details : Absolute difference of all the byte elements from 'in0' with 'ref0' is calculated and preserved in 'diff0'. Then even-odd pairs are added together to generate 8 halfword results.
*/ #define SAD_UB2_UH(in0, in1, ref0, ref1) \
({ \
v16u8 diff0_m, diff1_m; \
v8u16 sad_m = { 0 }; \
\
diff0_m = __msa_asub_u_b((v16u8)in0, (v16u8)ref0); \
diff1_m = __msa_asub_u_b((v16u8)in1, (v16u8)ref1); \
\
sad_m += __msa_hadd_u_h((v16u8)diff0_m, (v16u8)diff0_m); \
sad_m += __msa_hadd_u_h((v16u8)diff1_m, (v16u8)diff1_m); \
\
sad_m; \
})
/* Description : Horizontal subtraction of signed halfword vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each signed odd halfword element from 'in0' is subtracted from even signed halfword element from 'in0' (pairwise) and the word result is written to 'out0'
*/ #define HSUB_UH2(RTYPE, in0, in1, out0, out1) \
{ \
out0 = (RTYPE)__msa_hsub_s_w((v8i16)in0, (v8i16)in0); \
out1 = (RTYPE)__msa_hsub_s_w((v8i16)in1, (v8i16)in1); \
} #define HSUB_UH2_SW(...) HSUB_UH2(v4i32, __VA_ARGS__)
/* Description : Set element n input vector to GPR value Arguments : Inputs - in0, in1, in2, in3 Output - out Return Type - as per RTYPE Details : Set element 0 in vector 'out' to value specified in 'in0'
*/ #define INSERT_W2(RTYPE, in0, in1, out) \
{ \
out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \
out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \
} #define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
/* Description : Interleave even byte elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even byte elements of 'in0' and 'in1' are interleaved and written to 'out0'
*/ #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \
out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \
} #define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__) #define ILVEV_B2_SH(...) ILVEV_B2(v8i16, __VA_ARGS__)
/* Description : Interleave even halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even halfword elements of 'in0' and 'in1' are interleaved and written to 'out0'
*/ #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \
out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \
} #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__) #define ILVEV_H2_SH(...) ILVEV_H2(v8i16, __VA_ARGS__) #define ILVEV_H2_SW(...) ILVEV_H2(v4i32, __VA_ARGS__)
/* Description : Interleave even word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even word elements of 'in0' and 'in1' are interleaved and written to 'out0'
*/ #define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvev_w((v4i32)in1, (v4i32)in0); \
out1 = (RTYPE)__msa_ilvev_w((v4i32)in3, (v4i32)in2); \
} #define ILVEV_W2_SB(...) ILVEV_W2(v16i8, __VA_ARGS__)
/* Description : Interleave even double word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even double word elements of 'in0' and 'in1' are interleaved and written to 'out0'
*/ #define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvev_d((v2i64)in1, (v2i64)in0); \
out1 = (RTYPE)__msa_ilvev_d((v2i64)in3, (v2i64)in2); \
} #define ILVEV_D2_UB(...) ILVEV_D2(v16u8, __VA_ARGS__)
/* Description : Interleave left half of byte elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Left half of byte elements of 'in0' and 'in1' are interleaved and written to 'out0'.
*/ #define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
out1 = (RTYPE)__msa_ilvl_b((v16i8)in2, (v16i8)in3); \
} #define ILVL_B2_UB(...) ILVL_B2(v16u8, __VA_ARGS__) #define ILVL_B2_SB(...) ILVL_B2(v16i8, __VA_ARGS__) #define ILVL_B2_UH(...) ILVL_B2(v8u16, __VA_ARGS__) #define ILVL_B2_SH(...) ILVL_B2(v8i16, __VA_ARGS__)
/* Description : Interleave left half of halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Left half of halfword elements of 'in0' and 'in1' are interleaved and written to 'out0'.
*/ #define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
out1 = (RTYPE)__msa_ilvl_h((v8i16)in2, (v8i16)in3); \
} #define ILVL_H2_SH(...) ILVL_H2(v8i16, __VA_ARGS__) #define ILVL_H2_SW(...) ILVL_H2(v4i32, __VA_ARGS__)
/* Description : Interleave left half of word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Left half of word elements of 'in0' and 'in1' are interleaved and written to 'out0'.
*/ #define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \
out1 = (RTYPE)__msa_ilvl_w((v4i32)in2, (v4i32)in3); \
} #define ILVL_W2_UB(...) ILVL_W2(v16u8, __VA_ARGS__) #define ILVL_W2_SH(...) ILVL_W2(v8i16, __VA_ARGS__)
/* Description : Interleave right half of byte elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of byte elements of 'in0' and 'in1' are interleaved and written to out0.
*/ #define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
out1 = (RTYPE)__msa_ilvr_b((v16i8)in2, (v16i8)in3); \
} #define ILVR_B2_UB(...) ILVR_B2(v16u8, __VA_ARGS__) #define ILVR_B2_SB(...) ILVR_B2(v16i8, __VA_ARGS__) #define ILVR_B2_UH(...) ILVR_B2(v8u16, __VA_ARGS__) #define ILVR_B2_SH(...) ILVR_B2(v8i16, __VA_ARGS__)
/* Description : Interleave right half of halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of halfword elements of 'in0' and 'in1' are interleaved and written to 'out0'.
*/ #define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
out1 = (RTYPE)__msa_ilvr_h((v8i16)in2, (v8i16)in3); \
} #define ILVR_H2_SH(...) ILVR_H2(v8i16, __VA_ARGS__) #define ILVR_H2_SW(...) ILVR_H2(v4i32, __VA_ARGS__)
/* Description : Interleave right half of double word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of double word elements of 'in0' and 'in1' are interleaved and written to 'out0'.
*/ #define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvr_d((v2i64)(in0), (v2i64)(in1)); \
out1 = (RTYPE)__msa_ilvr_d((v2i64)(in2), (v2i64)(in3)); \
} #define ILVR_D2_UB(...) ILVR_D2(v16u8, __VA_ARGS__) #define ILVR_D2_SB(...) ILVR_D2(v16i8, __VA_ARGS__) #define ILVR_D2_SH(...) ILVR_D2(v8i16, __VA_ARGS__)
/* Description : Interleave both left and right half of input vectors Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of byte elements from 'in0' and 'in1' are interleaved and written to 'out0'
*/ #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
} #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) #define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__) #define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__) #define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
/* Description : Saturate the halfword element values to the max unsigned value of (sat_val + 1) bits The element data width remains unchanged Arguments : Inputs - in0, in1, sat_val Outputs - in place operation Return Type - as per RTYPE Details : Each unsigned halfword element from 'in0' is saturated to the value generated with (sat_val + 1) bit range. The results are written in place
*/ #define SAT_UH2(RTYPE, in0, in1, sat_val) \
{ \
in0 = (RTYPE)__msa_sat_u_h((v8u16)in0, sat_val); \
in1 = (RTYPE)__msa_sat_u_h((v8u16)in1, sat_val); \
} #define SAT_UH2_UH(...) SAT_UH2(v8u16, __VA_ARGS__)
/* Description : Saturate the halfword element values to the max unsigned value of (sat_val + 1) bits The element data width remains unchanged Arguments : Inputs - in0, in1, sat_val Outputs - in place operation Return Type - as per RTYPE Details : Each unsigned halfword element from 'in0' is saturated to the value generated with (sat_val + 1) bit range The results are written in place
*/ #define SAT_SH2(RTYPE, in0, in1, sat_val) \
{ \
in0 = (RTYPE)__msa_sat_s_h((v8i16)in0, sat_val); \
in1 = (RTYPE)__msa_sat_s_h((v8i16)in1, sat_val); \
} #define SAT_SH2_SH(...) SAT_SH2(v8i16, __VA_ARGS__)
/* Description : Indexed halfword element values are replicated to all elements in output vector Arguments : Inputs - in, idx0, idx1 Outputs - out0, out1 Return Type - as per RTYPE Details : 'idx0' element value from 'in' vector is replicated to all elements in 'out0' vector Valid index range for halfword operation is 0-7
*/ #define SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \
{ \
out0 = (RTYPE)__msa_splati_h((v8i16)in, idx0); \
out1 = (RTYPE)__msa_splati_h((v8i16)in, idx1); \
} #define SPLATI_H2_SH(...) SPLATI_H2(v8i16, __VA_ARGS__)
/* Description : Pack even byte elements of vector pairs Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even byte elements of 'in0' are copied to the left half of 'out0' & even byte elements of 'in1' are copied to the right half of 'out0'.
*/ #define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_pckev_b((v16i8)in0, (v16i8)in1); \
out1 = (RTYPE)__msa_pckev_b((v16i8)in2, (v16i8)in3); \
} #define PCKEV_B2_SB(...) PCKEV_B2(v16i8, __VA_ARGS__) #define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__) #define PCKEV_B2_SH(...) PCKEV_B2(v8i16, __VA_ARGS__)
/* Description : Pack even halfword elements of vector pairs Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even halfword elements of 'in0' are copied to the left half of 'out0' & even halfword elements of 'in1' are copied to the right half of 'out0'.
*/ #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \
out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3); \
} #define PCKEV_H2_SH(...) PCKEV_H2(v8i16, __VA_ARGS__) #define PCKEV_H2_SW(...) PCKEV_H2(v4i32, __VA_ARGS__)
/* Description : Pack even double word elements of vector pairs Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even double elements of 'in0' are copied to the left half of 'out0' & even double elements of 'in1' are copied to the right half of 'out0'.
*/ #define PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
{ \
out0 = (RTYPE)__msa_pckev_d((v2i64)in0, (v2i64)in1); \
out1 = (RTYPE)__msa_pckev_d((v2i64)in2, (v2i64)in3); \
} #define PCKEV_D2_UB(...) PCKEV_D2(v16u8, __VA_ARGS__) #define PCKEV_D2_SH(...) PCKEV_D2(v8i16, __VA_ARGS__)
/* Description : Each byte element is logically xor'ed with immediate 128 Arguments : Inputs - in0, in1 Outputs - in place operation Return Type - as per RTYPE Details : Each unsigned byte element from input vector 'in0' is logically xor'ed with 128 and the result is stored in-place.
*/ #define XORI_B2_128(RTYPE, in0, in1) \
{ \
in0 = (RTYPE)__msa_xori_b((v16u8)in0, 128); \
in1 = (RTYPE)__msa_xori_b((v16u8)in1, 128); \
}
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.