Quellcodebibliothek Statistik Leitseite products/Sources/formale Sprachen/C/Firefox/third_party/simde/simde/x86/   (Browser von der Mozilla Stiftung Version 136.0.1©)  Datei vom 10.2.2025 mit Größe 260 kB image not shown  

Quelle  sse2.h   Sprache: C

 
/* SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * Copyright:
 *   2017-2020 Evan Nemerson <evan@nemerson.com>
 *   2015-2017 John W. Ratcliff <jratcliffscarab@gmail.com>
 *   2015      Brandon Rowlett <browlett@nvidia.com>
 *   2015      Ken Fast <kfast@gdeb.com>
 *   2017      Hasindu Gamaarachchi <hasindu@unsw.edu.au>
 *   2018      Jeff Daily <jeff.daily@amd.com>
 */


#if !defined(SIMDE_X86_SSE2_H)
#define SIMDE_X86_SSE2_H

#include "sse.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

typedef union {
  #if defined(SIMDE_VECTOR_SUBSCRIPT)
    SIMDE_ALIGN_TO_16 int8_t          i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 int16_t        i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 int32_t        i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 int64_t        i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 uint8_t         u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 uint16_t       u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 uint32_t       u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 uint64_t       u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    #if defined(SIMDE_HAVE_INT128_)
    SIMDE_ALIGN_TO_16 simde_int128  i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    #endif
    SIMDE_ALIGN_TO_16 simde_float32  f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 simde_float64  f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;

    SIMDE_ALIGN_TO_16 int_fast32_t  i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
  #else
    SIMDE_ALIGN_TO_16 int8_t         i8[16];
    SIMDE_ALIGN_TO_16 int16_t        i16[8];
    SIMDE_ALIGN_TO_16 int32_t        i32[4];
    SIMDE_ALIGN_TO_16 int64_t        i64[2];
    SIMDE_ALIGN_TO_16 uint8_t        u8[16];
    SIMDE_ALIGN_TO_16 uint16_t       u16[8];
    SIMDE_ALIGN_TO_16 uint32_t       u32[4];
    SIMDE_ALIGN_TO_16 uint64_t       u64[2];
    #if defined(SIMDE_HAVE_INT128_)
    SIMDE_ALIGN_TO_16 simde_int128  i128[1];
    SIMDE_ALIGN_TO_16 simde_uint128 u128[1];
    #endif
    SIMDE_ALIGN_TO_16 simde_float32  f32[4];
    SIMDE_ALIGN_TO_16 simde_float64  f64[2];

    SIMDE_ALIGN_TO_16 int_fast32_t  i32f[16 / sizeof(int_fast32_t)];
    SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
  #endif

    SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];
    SIMDE_ALIGN_TO_16 simde__m64         m64[2];

  #if defined(SIMDE_X86_SSE2_NATIVE)
    SIMDE_ALIGN_TO_16 __m128i        n;
  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
    SIMDE_ALIGN_TO_16 int8x16_t      neon_i8;
    SIMDE_ALIGN_TO_16 int16x8_t      neon_i16;
    SIMDE_ALIGN_TO_16 int32x4_t      neon_i32;
    SIMDE_ALIGN_TO_16 int64x2_t      neon_i64;
    SIMDE_ALIGN_TO_16 uint8x16_t     neon_u8;
    SIMDE_ALIGN_TO_16 uint16x8_t     neon_u16;
    SIMDE_ALIGN_TO_16 uint32x4_t     neon_u32;
    SIMDE_ALIGN_TO_16 uint64x2_t     neon_u64;
    #if defined(__ARM_FP16_FORMAT_IEEE)
    SIMDE_ALIGN_TO_16 float16x8_t    neon_f16;
    #endif
    SIMDE_ALIGN_TO_16 float32x4_t    neon_f32;
    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
    SIMDE_ALIGN_TO_16 float64x2_t    neon_f64;
    #endif
  #elif defined(SIMDE_MIPS_MSA_NATIVE)
    v16i8 msa_i8;
    v8i16 msa_i16;
    v4i32 msa_i32;
    v2i64 msa_i64;
    v16u8 msa_u8;
    v8u16 msa_u16;
    v4u32 msa_u32;
    v2u64 msa_u64;
  #elif defined(SIMDE_WASM_SIMD128_NATIVE)
    SIMDE_ALIGN_TO_16 v128_t         wasm_v128;
  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char)          altivec_i8;
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short)         altivec_i16;
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)           altivec_i32;
    #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__)  altivec_i32f;
    #else
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)           altivec_i32f;
    #endif
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)        altivec_u8;
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short)       altivec_u16;
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)         altivec_u32;
    #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f;
    #else
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)         altivec_u32f;
    #endif
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float)                altivec_f32;
    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long)   altivec_i64;
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double)             altivec_f64;
    #endif
  #endif
} simde__m128i_private;

typedef union {
  #if defined(SIMDE_VECTOR_SUBSCRIPT)
    SIMDE_ALIGN_TO_16 int8_t          i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 int16_t        i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 int32_t        i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 int64_t        i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 uint8_t         u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 uint16_t       u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 uint32_t       u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 uint64_t       u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 simde_float32  f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 simde_float64  f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 int_fast32_t  i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
    SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
  #else
    SIMDE_ALIGN_TO_16 int8_t         i8[16];
    SIMDE_ALIGN_TO_16 int16_t        i16[8];
    SIMDE_ALIGN_TO_16 int32_t        i32[4];
    SIMDE_ALIGN_TO_16 int64_t        i64[2];
    SIMDE_ALIGN_TO_16 uint8_t        u8[16];
    SIMDE_ALIGN_TO_16 uint16_t       u16[8];
    SIMDE_ALIGN_TO_16 uint32_t       u32[4];
    SIMDE_ALIGN_TO_16 uint64_t       u64[2];
    SIMDE_ALIGN_TO_16 simde_float32  f32[4];
    SIMDE_ALIGN_TO_16 simde_float64  f64[2];
    SIMDE_ALIGN_TO_16 int_fast32_t  i32f[16 / sizeof(int_fast32_t)];
    SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)];
  #endif

    SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2];
    SIMDE_ALIGN_TO_16 simde__m64         m64[2];

  #if defined(SIMDE_X86_SSE2_NATIVE)
    SIMDE_ALIGN_TO_16 __m128d        n;
  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
    SIMDE_ALIGN_TO_16 int8x16_t      neon_i8;
    SIMDE_ALIGN_TO_16 int16x8_t      neon_i16;
    SIMDE_ALIGN_TO_16 int32x4_t      neon_i32;
    SIMDE_ALIGN_TO_16 int64x2_t      neon_i64;
    SIMDE_ALIGN_TO_16 uint8x16_t     neon_u8;
    SIMDE_ALIGN_TO_16 uint16x8_t     neon_u16;
    SIMDE_ALIGN_TO_16 uint32x4_t     neon_u32;
    SIMDE_ALIGN_TO_16 uint64x2_t     neon_u64;
    SIMDE_ALIGN_TO_16 float32x4_t    neon_f32;
    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
    SIMDE_ALIGN_TO_16 float64x2_t    neon_f64;
    #endif
  #elif defined(SIMDE_MIPS_MSA_NATIVE)
    v16i8 msa_i8;
    v8i16 msa_i16;
    v4i32 msa_i32;
    v2i64 msa_i64;
    v16u8 msa_u8;
    v8u16 msa_u16;
    v4u32 msa_u32;
    v2u64 msa_u64;
  #elif defined(SIMDE_WASM_SIMD128_NATIVE)
    SIMDE_ALIGN_TO_16 v128_t         wasm_v128;
  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char)          altivec_i8;
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short)         altivec_i16;
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)           altivec_i32;
    #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__)  altivec_i32f;
    #else
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int)           altivec_i32f;
    #endif
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char)        altivec_u8;
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short)       altivec_u16;
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)         altivec_u32;
    #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE))
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f;
    #else
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int)         altivec_u32f;
    #endif
    SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float)                altivec_f32;
    #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long)   altivec_i64;
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64;
      SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double)             altivec_f64;
    #endif
  #endif
} simde__m128d_private;

#if defined(SIMDE_X86_SSE2_NATIVE)
  typedef __m128i simde__m128i;
  typedef __m128d simde__m128d;
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
   typedef int64x2_t simde__m128i;
#  if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
     typedef float64x2_t simde__m128d;
#  elif defined(SIMDE_VECTOR_SUBSCRIPT)
     typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
#  else
     typedef simde__m128d_private simde__m128d;
#  endif
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
   typedef v128_t simde__m128i;
   typedef v128_t simde__m128d;
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
  typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i;
  #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
     typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d;
  #else
     typedef simde__m128d_private simde__m128d;
  #endif
#elif defined(SIMDE_VECTOR_SUBSCRIPT)
  typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
  typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS;
#else
  typedef simde__m128i_private simde__m128i;
  typedef simde__m128d_private simde__m128d;
#endif

#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  typedef simde__m128i __m128i;
  typedef simde__m128d __m128d;
#endif

HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect");
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect");
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect");
HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect");
#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF)
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned");
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned");
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned");
HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned");
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde__m128i_from_private(simde__m128i_private v) {
  simde__m128i r;
  simde_memcpy(&r, &v, sizeof(r));
  return r;
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i_private
simde__m128i_to_private(simde__m128i v) {
  simde__m128i_private r;
  simde_memcpy(&r, &v, sizeof(r));
  return r;
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde__m128d_from_private(simde__m128d_private v) {
  simde__m128d r;
  simde_memcpy(&r, &v, sizeof(r));
  return r;
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d_private
simde__m128d_to_private(simde__m128d v) {
  simde__m128d_private r;
  simde_memcpy(&r, &v, sizeof(r));
  return r;
}

#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32)
  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64)
  #endif
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32)
  #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)
    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)
  #endif
#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */

#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32)
  #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64)
  #endif
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32)
  #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64)
    SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64)
    #if defined(SIMDE_BUG_GCC_95782)
      SIMDE_FUNCTION_ATTRIBUTES
      SIMDE_POWER_ALTIVEC_VECTOR(double)
      simde__m128d_to_altivec_f64(simde__m128d value) {
        simde__m128d_private r_ = simde__m128d_to_private(value);
        return r_.altivec_f64;
      }

      SIMDE_FUNCTION_ATTRIBUTES
      simde__m128d
      simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) {
        simde__m128d_private r_;
        r_.altivec_f64 = value;
        return simde__m128d_from_private(r_);
      }
    #else
      SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64)
    #endif
  #endif
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128);
  SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128);
#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_set_pd (simde_float64 e1, simde_float64 e0) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_set_pd(e1, e0);
  #else
    simde__m128d_private r_;

    #if defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_f64x2_make(e0, e1);
    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 };
      r_.neon_f64 = vld1q_f64(data);
    #else
      r_.f64[0] = e0;
      r_.f64[1] = e1;
    #endif

    return simde__m128d_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_set1_pd (simde_float64 a) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_set1_pd(a);
  #else
    simde__m128d_private r_;

    #if defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_f64x2_splat(a);
    #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      r_.neon_f64 = vdupq_n_f64(a);
    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a));
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
        r_.f64[i] = a;
      }
    #endif

    return simde__m128d_from_private(r_);
  #endif
}
#define simde_mm_set_pd1(a) simde_mm_set1_pd(a)
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_set1_pd(a) simde_mm_set1_pd(a)
  #define _mm_set_pd1(a) simde_mm_set1_pd(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_x_mm_abs_pd(simde__m128d a) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    simde_float64 mask_;
    uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF);
    simde_memcpy(&mask_, &u64_, sizeof(u64_));
    return _mm_and_pd(_mm_set1_pd(mask_), a);
  #else
    simde__m128d_private
      r_,
      a_ = simde__m128d_to_private(a);

    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      r_.neon_f64 = vabsq_f64(a_.neon_f64);
    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      r_.altivec_f64 = vec_abs(a_.altivec_f64);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
        r_.f64[i] = simde_math_fabs(a_.f64[i]);
      }
    #endif

    return simde__m128d_from_private(r_);
  #endif
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_x_mm_not_pd(simde__m128d a) {
  #if defined(SIMDE_X86_AVX512VL_NATIVE)
    __m128i ai = _mm_castpd_si128(a);
    return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55));
  #else
    simde__m128d_private
      r_,
      a_ = simde__m128d_to_private(a);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i32 = vmvnq_s32(a_.neon_i32);
    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
      r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
      r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_v128_not(a_.wasm_v128);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i32f = ~a_.i32f;
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
        r_.i32f[i] = ~(a_.i32f[i]);
      }
    #endif

    return simde__m128d_from_private(r_);
  #endif
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) {
  /* This function is for when you want to blend two elements together
   * according to a mask.  It is similar to _mm_blendv_pd, except that
   * it is undefined whether the blend is based on the highest bit in
   * each lane (like blendv) or just bitwise operations.  This allows
   * us to implement the function efficiently everywhere.
   *
   * Basically, you promise that all the lanes in mask are either 0 or
   * ~0. */

  #if defined(SIMDE_X86_SSE4_1_NATIVE)
    return _mm_blendv_pd(a, b, mask);
  #else
    simde__m128d_private
      r_,
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b),
      mask_ = simde__m128d_to_private(mask);

    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64);
    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
        r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]);
      }
    #endif

    return simde__m128d_from_private(r_);
  #endif
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_add_epi8 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_add_epi8(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
      r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i8 = a_.i8 + b_.i8;
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
        r_.i8[i] = a_.i8[i] + b_.i8[i];
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_add_epi16 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_add_epi16(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
      r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i16 = a_.i16 + b_.i16;
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
        r_.i16[i] = a_.i16[i] + b_.i16[i];
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_add_epi32 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_add_epi32(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
      r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i32 = a_.i32 + b_.i32;
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
        r_.i32[i] = a_.i32[i] + b_.i32[i];
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_add_epi64 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_add_epi64(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64);
    #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE)
      r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i64 = a_.i64 + b_.i64;
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) {
        r_.i64[i] = a_.i64[i] + b_.i64[i];
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_add_pd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_add_pd(a, b);
  #else
    simde__m128d_private
      r_,
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);

    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
      r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.f64 = a_.f64 + b_.f64;
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
        r_.f64[i] = a_.f64[i] + b_.f64[i];
      }
    #endif

    return simde__m128d_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_add_pd(a, b) simde_mm_add_pd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_move_sd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_move_sd(a, b);
  #else
    simde__m128d_private
      r_,
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);

    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0);
    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
      #if defined(HEDLEY_IBM_VERSION)
        r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1);
      #else
        r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1);
      #endif
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1);
    #elif defined(SIMDE_SHUFFLE_VECTOR_)
      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1);
    #else
      r_.f64[0] = b_.f64[0];
      r_.f64[1] = a_.f64[1];
    #endif

    return simde__m128d_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_move_sd(a, b) simde_mm_move_sd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_x_mm_broadcastlow_pd(simde__m128d a) {
  /* This function broadcasts the first element in the input vector to
   * all lanes.  It is used to avoid generating spurious exceptions in
   * *_sd functions since there may be garbage in the upper lanes. */


  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44));
  #else
    simde__m128d_private
      r_,
      a_ = simde__m128d_to_private(a);

    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0);
    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
      r_.altivec_f64 = vec_splat(a_.altivec_f64, 0);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]);
    #elif defined(SIMDE_SHUFFLE_VECTOR_)
      r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
        r_.f64[i] = a_.f64[0];
      }
    #endif

    return simde__m128d_from_private(r_);
  #endif
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_add_sd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_add_sd(a, b);
  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS)
    return simde_mm_move_sd(a, simde_mm_add_pd(a, b));
  #elif (SIMDE_NATURAL_VECTOR_SIZE > 0)
    return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b)));
  #else
    simde__m128d_private
      r_,
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);

    r_.f64[0] = a_.f64[0] + b_.f64[0];
    r_.f64[1] = a_.f64[1];

    return simde__m128d_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_add_sd(a, b) simde_mm_add_sd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m64
simde_mm_add_si64 (simde__m64 a, simde__m64 b) {
  #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE)
    return _mm_add_si64(a, b);
  #else
    simde__m64_private
      r_,
      a_ = simde__m64_to_private(a),
      b_ = simde__m64_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64);
    #else
      r_.i64[0] = a_.i64[0] + b_.i64[0];
    #endif

    return simde__m64_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_add_si64(a, b) simde_mm_add_si64(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_adds_epi8(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
      r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
        r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]);
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_adds_epi16(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
      r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
        r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]);
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_adds_epu8(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
      r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
        r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]);
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_adds_epu16(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
      r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
        r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]);
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_and_pd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_and_pd(a, b);
  #else
    simde__m128d_private
      r_,
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
      r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i32f = a_.i32f & b_.i32f;
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
        r_.i32f[i] = a_.i32f[i] & b_.i32f[i];
      }
    #endif

    return simde__m128d_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_and_pd(a, b) simde_mm_and_pd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_and_si128 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_and_si128(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
      r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i32f = a_.i32f & b_.i32f;
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
        r_.i32f[i] = a_.i32f[i] & b_.i32f[i];
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_and_si128(a, b) simde_mm_and_si128(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_andnot_pd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_andnot_pd(a, b);
  #else
    simde__m128d_private
      r_,
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
      r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i32f = ~a_.i32f & b_.i32f;
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) {
        r_.u64[i] = ~a_.u64[i] & b_.u64[i];
      }
    #endif

    return simde__m128d_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_andnot_si128(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i32f = ~a_.i32f & b_.i32f;
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
        r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i];
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_xor_pd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_xor_pd(a, b);
  #else
    simde__m128d_private
      r_,
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);

    #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i32f = a_.i32f ^ b_.i32f;
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
        r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i];
      }
    #endif

    return simde__m128d_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_avg_epu8(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_)
      uint16_t wa SIMDE_VECTOR(32);
      uint16_t wb SIMDE_VECTOR(32);
      uint16_t wr SIMDE_VECTOR(32);
      SIMDE_CONVERT_VECTOR_(wa, a_.u8);
      SIMDE_CONVERT_VECTOR_(wb, b_.u8);
      wr = (wa + wb + 1) >> 1;
      SIMDE_CONVERT_VECTOR_(r_.u8, wr);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) {
        r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1;
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_avg_epu16(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16);
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_)
      uint32_t wa SIMDE_VECTOR(32);
      uint32_t wb SIMDE_VECTOR(32);
      uint32_t wr SIMDE_VECTOR(32);
      SIMDE_CONVERT_VECTOR_(wa, a_.u16);
      SIMDE_CONVERT_VECTOR_(wb, b_.u16);
      wr = (wa + wb + 1) >> 1;
      SIMDE_CONVERT_VECTOR_(r_.u16, wr);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) {
        r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1;
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_setzero_si128 (void) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_setzero_si128();
  #else
    simde__m128i_private r_;

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_i32 = vdupq_n_s32(0);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0));
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0));
    #elif defined(SIMDE_VECTOR_SUBSCRIPT)
      r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 };
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) {
        r_.i32f[i] = 0;
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_setzero_si128() (simde_mm_setzero_si128())
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_bslli_si128 (simde__m128i a, const int imm8)
    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {
  simde__m128i_private
    r_,
    a_ = simde__m128i_to_private(a);

  if (HEDLEY_UNLIKELY((imm8 & ~15))) {
    return simde_mm_setzero_si128();
  }

  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER)
    r_.altivec_i8 =
      #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
        vec_slo
      #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */
        vec_sro
      #endif
        (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8)));
  #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
    r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3)));
  #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
    r_.u128[0] = a_.u128[0] << (imm8 * 8);
  #else
    r_ = simde__m128i_to_private(simde_mm_setzero_si128());
    for (int i = imm8 ; i < HEDLEY_STATIC_CAST(intsizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
      r_.i8[i] = a_.i8[i - imm8];
    }
  #endif

  return simde__m128i_from_private(r_);
}
#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
  #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__)
  #define simde_mm_bslli_si128(a, imm8) \
  simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8)))))
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
  #define simde_mm_bslli_si128(a, imm8) __extension__ ({        \
    simde__m128i_from_wasm_v128(                                \
      wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)),          \
                         simde__m128i_to_wasm_v128((a)),        \
                         ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \
                         ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); })
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
  #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \
    const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \
    const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \
    simde__m128i_private simde_tmp_r_; \
    if (HEDLEY_UNLIKELY(imm8 > 15)) { \
      simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \
    } else { \
      simde_tmp_r_.i8 = \
        SIMDE_SHUFFLE_VECTOR_(8, 16, \
          simde_tmp_z_.i8, \
          (simde_tmp_a_).i8, \
          HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \
          HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \
    } \
    simde__m128i_from_private(simde_tmp_r_); }))
#endif
#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)
  #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_bsrli_si128 (simde__m128i a, const int imm8)
    SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255)  {
  simde__m128i_private
    r_,
    a_ = simde__m128i_to_private(a);

  if (HEDLEY_UNLIKELY((imm8 & ~15))) {
    return simde_mm_setzero_si128();
  }

  #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER)
    r_.altivec_i8 =
    #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE)
      vec_sro
    #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */
      vec_slo
    #endif
        (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8)));
  #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
    r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3)));
  #else
    SIMDE_VECTORIZE
    for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
      const int e = HEDLEY_STATIC_CAST(int, i) + imm8;
      r_.i8[i] = (e < 16) ? a_.i8[e] : 0;
    }
  #endif

  return simde__m128i_from_private(r_);
}
#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI)
  #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__)
  #define simde_mm_bsrli_si128(a, imm8) \
  simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15))))
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
  #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \
    const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \
    const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \
    simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \
    if (HEDLEY_UNLIKELY(imm8 > 15)) { \
      simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \
    } else { \
      simde_tmp_r_.wasm_v128 = \
      wasm_i8x16_shuffle( \
        simde_tmp_z_.wasm_v128, \
        simde_tmp_a_.wasm_v128, \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \
    } \
    simde__m128i_from_private(simde_tmp_r_); }))
#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
  #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \
    const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \
    const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \
    simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \
    if (HEDLEY_UNLIKELY(imm8 > 15)) { \
      simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \
    } else { \
      simde_tmp_r_.i8 = \
      SIMDE_SHUFFLE_VECTOR_(8, 16, \
        simde_tmp_z_.i8, \
        (simde_tmp_a_).i8, \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \
        HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \
    } \
    simde__m128i_from_private(simde_tmp_r_); }))
#endif
#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))
  #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8))
#endif

SIMDE_FUNCTION_ATTRIBUTES
void
simde_mm_clflush (void const* p) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    _mm_clflush(p);
  #else
    (void) p;
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_clflush(p) simde_mm_clflush(p)
#endif

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_comieq_sd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_comieq_sd(a, b);
  #else
    simde__m128d_private
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);
    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0);
    #else
      return a_.f64[0] == b_.f64[0];
    #endif
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_comige_sd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_comige_sd(a, b);
  #else
    simde__m128d_private
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);
    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0);
    #else
      return a_.f64[0] >= b_.f64[0];
    #endif
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_comigt_sd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_comigt_sd(a, b);
  #else
    simde__m128d_private
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);
    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0);
    #else
      return a_.f64[0] > b_.f64[0];
    #endif
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_comile_sd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_comile_sd(a, b);
  #else
    simde__m128d_private
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);
    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0);
    #else
      return a_.f64[0] <= b_.f64[0];
    #endif
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_comilt_sd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_comilt_sd(a, b);
  #else
    simde__m128d_private
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);
    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0);
    #else
      return a_.f64[0] < b_.f64[0];
    #endif
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
int
simde_mm_comineq_sd (simde__m128d a, simde__m128d b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_comineq_sd(a, b);
  #else
    simde__m128d_private
      a_ = simde__m128d_to_private(a),
      b_ = simde__m128d_to_private(b);
    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0);
    #else
      return a_.f64[0] != b_.f64[0];
    #endif
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) {
  simde__m128d_private
    r_,
    dest_ = simde__m128d_to_private(dest),
    src_ = simde__m128d_to_private(src);

  #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
    #if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
      uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0)));
    #else
      simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0);
      uint64_t u64_nz;
      simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz));
      uint64x2_t sign_pos = vdupq_n_u64(u64_nz);
    #endif
    r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64);
  #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
    #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS)
      r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64);
    #else
      r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64);
    #endif
  #elif defined(simde_math_copysign)
    SIMDE_VECTORIZE
    for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) {
      r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]);
    }
  #else
    simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0));
    return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest));
  #endif

  return simde__m128d_from_private(r_);
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) {
  return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest);
}

SIMDE_FUNCTION_ATTRIBUTES
simde__m128
simde_mm_castpd_ps (simde__m128d a) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_castpd_ps(a);
  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
    return vreinterpretq_f32_f64(a);
  #else
    simde__m128 r;
    simde_memcpy(&r, &a, sizeof(a));
    return r;
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_castpd_ps(a) simde_mm_castpd_ps(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_castpd_si128 (simde__m128d a) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_castpd_si128(a);
  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
    return vreinterpretq_s64_f64(a);
  #else
    simde__m128i r;
    simde_memcpy(&r, &a, sizeof(a));
    return r;
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_castpd_si128(a) simde_mm_castpd_si128(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_castps_pd (simde__m128 a) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_castps_pd(a);
  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
    return vreinterpretq_f64_f32(a);
  #else
    simde__m128d r;
    simde_memcpy(&r, &a, sizeof(a));
    return r;
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_castps_pd(a) simde_mm_castps_pd(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_castps_si128 (simde__m128 a) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_castps_si128(a);
  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
    return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32);
  #else
    simde__m128i r;
    simde_memcpy(&r, &a, sizeof(a));
    return r;
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_castps_si128(a) simde_mm_castps_si128(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128d
simde_mm_castsi128_pd (simde__m128i a) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_castsi128_pd(a);
  #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE)
    return vreinterpretq_f64_s64(a);
  #else
    simde__m128d r;
    simde_memcpy(&r, &a, sizeof(a));
    return r;
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128
simde_mm_castsi128_ps (simde__m128i a) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_castsi128_ps(a);
  #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
    return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a);
  #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
    return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32);
  #else
    simde__m128 r;
    simde_memcpy(&r, &a, sizeof(a));
    return r;
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_cmpeq_epi8(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8));
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8));
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) {
        r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0);
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_cmpeq_epi16(a, b);
  #else
    simde__m128i_private
      r_,
      a_ = simde__m128i_to_private(a),
      b_ = simde__m128i_to_private(b);

    #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
      r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16);
    #elif defined(SIMDE_WASM_SIMD128_NATIVE)
      r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128);
    #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
      r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16));
    #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
      r_.i16 = (a_.i16 == b_.i16);
    #else
      SIMDE_VECTORIZE
      for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) {
        r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0);
      }
    #endif

    return simde__m128i_from_private(r_);
  #endif
}
#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES)
  #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b)
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i
simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) {
  #if defined(SIMDE_X86_SSE2_NATIVE)
    return _mm_cmpeq_epi32(a, b);
  #else
--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5
C=96 H=95 G=95

¤ Dauer der Verarbeitung: 0.38 Sekunden  (vorverarbeitet)  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.