Quelle MacroAssembler-x86-shared-inl.h

Sprache: C

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef jit_x86_shared_MacroAssembler_x86_shared_inl_h
#define jit_x86_shared_MacroAssembler_x86_shared_inl_h

#include "jit/x86-shared/MacroAssembler-x86-shared.h"

#include "mozilla/MathAlgorithms.h"

namespace js {
namespace jit {

//{{{ check_macroassembler_style
// ===============================================================
// Move instructions

void MacroAssembler::moveFloat16ToGPR(FloatRegister src, Register dest) {
  vmovd(src, dest);

  // Ensure the hi-word is zeroed.
  movzwl(dest, dest);
}

void MacroAssembler::moveGPRToFloat16(Register src, FloatRegister dest) {
  // Ensure the hi-word is zeroed.
  movzwl(src, src);

  vmovd(src, dest);
}

void MacroAssembler::moveFloat32ToGPR(FloatRegister src, Register dest) {
  vmovd(src, dest);
}

void MacroAssembler::moveGPRToFloat32(Register src, FloatRegister dest) {
  vmovd(src, dest);
}

void MacroAssembler::moveLowDoubleToGPR(FloatRegister src, Register dest) {
  vmovd(src, dest);
}

void MacroAssembler::move8ZeroExtend(Register src, Register dest) {
  movzbl(src, dest);
}

void MacroAssembler::move8SignExtend(Register src, Register dest) {
  movsbl(src, dest);
}

void MacroAssembler::move16SignExtend(Register src, Register dest) {
  movswl(src, dest);
}

void MacroAssembler::loadAbiReturnAddress(Register dest) {
  loadPtr(Address(getStackPointer(), 0), dest);
}

// ===============================================================
// Logical instructions

void MacroAssembler::not32(Register reg) { notl(reg); }

void MacroAssembler::and32(Register src, Register dest) { andl(src, dest); }

void MacroAssembler::and32(Imm32 imm, Register dest) { andl(imm, dest); }

void MacroAssembler::and32(Imm32 imm, const Address& dest) {
  andl(imm, Operand(dest));
}

void MacroAssembler::and32(const Address& src, Register dest) {
  andl(Operand(src), dest);
}

void MacroAssembler::or32(Register src, Register dest) { orl(src, dest); }

void MacroAssembler::or32(Imm32 imm, Register dest) { orl(imm, dest); }

void MacroAssembler::or32(Imm32 imm, const Address& dest) {
  orl(imm, Operand(dest));
}

void MacroAssembler::xor32(Register src, Register dest) { xorl(src, dest); }

void MacroAssembler::xor32(Imm32 imm, Register dest) { xorl(imm, dest); }

void MacroAssembler::xor32(Imm32 imm, const Address& dest) {
  xorl(imm, Operand(dest));
}

void MacroAssembler::xor32(const Address& src, Register dest) {
  xorl(Operand(src), dest);
}

void MacroAssembler::clz32(Register src, Register dest, bool knownNotZero) {
  if (AssemblerX86Shared::HasLZCNT()) {
    lzcntl(src, dest);
    return;
  }

  bsrl(src, dest);
  if (!knownNotZero) {
    // If the source is zero then bsrl leaves garbage in the destination.
    Label nonzero;
    j(Assembler::NonZero, &nonzero);
    movl(Imm32(0x3F), dest);
    bind(&nonzero);
  }
  xorl(Imm32(0x1F), dest);
}

void MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero) {
  if (AssemblerX86Shared::HasBMI1()) {
    tzcntl(src, dest);
    return;
  }

  bsfl(src, dest);
  if (!knownNotZero) {
    Label nonzero;
    j(Assembler::NonZero, &nonzero);
    movl(Imm32(32), dest);
    bind(&nonzero);
  }
}

void MacroAssembler::popcnt32(Register input, Register output, Register tmp) {
  if (AssemblerX86Shared::HasPOPCNT()) {
    popcntl(input, output);
    return;
  }

  MOZ_ASSERT(tmp != InvalidReg);

  // Equivalent to mozilla::CountPopulation32()

  movl(input, tmp);
  if (input != output) {
    movl(input, output);
  }
  shrl(Imm32(1), output);
  andl(Imm32(0x55555555), output);
  subl(output, tmp);
  movl(tmp, output);
  andl(Imm32(0x33333333), output);
  shrl(Imm32(2), tmp);
  andl(Imm32(0x33333333), tmp);
  addl(output, tmp);
  movl(tmp, output);
  shrl(Imm32(4), output);
  addl(tmp, output);
  andl(Imm32(0xF0F0F0F), output);
  imull(Imm32(0x1010101), output, output);
  shrl(Imm32(24), output);
}

// ===============================================================
// Swap instructions

void MacroAssembler::byteSwap16SignExtend(Register reg) {
  rolw(Imm32(8), reg);
  movswl(reg, reg);
}

void MacroAssembler::byteSwap16ZeroExtend(Register reg) {
  rolw(Imm32(8), reg);
  movzwl(reg, reg);
}

void MacroAssembler::byteSwap32(Register reg) { bswapl(reg); }

// ===============================================================
// Arithmetic instructions

void MacroAssembler::add32(Register src, Register dest) { addl(src, dest); }

void MacroAssembler::add32(Imm32 imm, Register dest) { addl(imm, dest); }

void MacroAssembler::add32(Imm32 imm, Register src, Register dest) {
  leal(Operand(src, imm.value), dest);
}

void MacroAssembler::add32(Imm32 imm, const Address& dest) {
  addl(imm, Operand(dest));
}

void MacroAssembler::add32(Imm32 imm, const AbsoluteAddress& dest) {
  addl(imm, Operand(dest));
}

void MacroAssembler::addFloat32(FloatRegister src, FloatRegister dest) {
  vaddss(src, dest, dest);
}

void MacroAssembler::addDouble(FloatRegister src, FloatRegister dest) {
  vaddsd(src, dest, dest);
}

void MacroAssembler::sub32(Register src, Register dest) { subl(src, dest); }

void MacroAssembler::sub32(Imm32 imm, Register dest) { subl(imm, dest); }

void MacroAssembler::sub32(const Address& src, Register dest) {
  subl(Operand(src), dest);
}

void MacroAssembler::subDouble(FloatRegister src, FloatRegister dest) {
  vsubsd(src, dest, dest);
}

void MacroAssembler::subFloat32(FloatRegister src, FloatRegister dest) {
  vsubss(src, dest, dest);
}

void MacroAssembler::mul32(Register rhs, Register srcDest) {
  imull(rhs, srcDest);
}

void MacroAssembler::mul32(Imm32 imm, Register srcDest) { imull(imm, srcDest); }

void MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest) {
  vmulss(src, dest, dest);
}

void MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest) {
  vmulsd(src, dest, dest);
}

void MacroAssembler::quotient32(Register rhs, Register srcDest,
                                Register tempEdx, bool isUnsigned) {
  MOZ_ASSERT(srcDest == eax && tempEdx == edx);

  // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit.
  if (isUnsigned) {
    mov(ImmWord(0), edx);
    udiv(rhs);
  } else {
    cdq();
    idiv(rhs);
  }
}

void MacroAssembler::remainder32(Register rhs, Register srcDest,
                                 Register tempEdx, bool isUnsigned) {
  MOZ_ASSERT(srcDest == eax && tempEdx == edx);

  // Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit.
  if (isUnsigned) {
    mov(ImmWord(0), edx);
    udiv(rhs);
  } else {
    cdq();
    idiv(rhs);
  }
  mov(edx, eax);
}

void MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest) {
  vdivss(src, dest, dest);
}

void MacroAssembler::divDouble(FloatRegister src, FloatRegister dest) {
  vdivsd(src, dest, dest);
}

void MacroAssembler::neg32(Register reg) { negl(reg); }

void MacroAssembler::negateFloat(FloatRegister reg) {
  ScratchFloat32Scope scratch(*this);
  vpcmpeqw(Operand(scratch), scratch, scratch);
  vpsllq(Imm32(31), scratch, scratch);

  // XOR the float in a float register with -0.0.
  vxorps(scratch, reg, reg);  // s ^ 0x80000000
}

void MacroAssembler::negateDouble(FloatRegister reg) {
  // From MacroAssemblerX86Shared::maybeInlineDouble
  ScratchDoubleScope scratch(*this);
  vpcmpeqw(Operand(scratch), scratch, scratch);
  vpsllq(Imm32(63), scratch, scratch);

  // XOR the float in a float register with -0.0.
  vxorpd(scratch, reg, reg);  // s ^ 0x80000000000000
}

void MacroAssembler::abs32(Register src, Register dest) {
  if (src != dest) {
    move32(src, dest);
  }
  Label positive;
  branchTest32(Assembler::NotSigned, dest, dest, &positive);
  neg32(dest);
  bind(&positive);
}

void MacroAssembler::absFloat32(FloatRegister src, FloatRegister dest) {
  ScratchFloat32Scope scratch(*this);
  loadConstantFloat32(mozilla::SpecificNaN<float>(
                          0, mozilla::FloatingPoint<float>::kSignificandBits),
                      scratch);
  vandps(scratch, src, dest);
}

void MacroAssembler::absDouble(FloatRegister src, FloatRegister dest) {
  ScratchDoubleScope scratch(*this);
  loadConstantDouble(mozilla::SpecificNaN<double>(
                         0, mozilla::FloatingPoint<double>::kSignificandBits),
                     scratch);
  vandpd(scratch, src, dest);
}

void MacroAssembler::sqrtFloat32(FloatRegister src, FloatRegister dest) {
  vsqrtss(src, dest, dest);
}

void MacroAssembler::sqrtDouble(FloatRegister src, FloatRegister dest) {
  vsqrtsd(src, dest, dest);
}

void MacroAssembler::minFloat32(FloatRegister other, FloatRegister srcDest,
                                bool handleNaN) {
  minMaxFloat32(srcDest, other, handleNaN, false);
}

void MacroAssembler::minDouble(FloatRegister other, FloatRegister srcDest,
                               bool handleNaN) {
  minMaxDouble(srcDest, other, handleNaN, false);
}

void MacroAssembler::maxFloat32(FloatRegister other, FloatRegister srcDest,
                                bool handleNaN) {
  minMaxFloat32(srcDest, other, handleNaN, true);
}

void MacroAssembler::maxDouble(FloatRegister other, FloatRegister srcDest,
                               bool handleNaN) {
  minMaxDouble(srcDest, other, handleNaN, true);
}

// ===============================================================
// Rotation instructions
void MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest) {
  MOZ_ASSERT(input == dest, "defineReuseInput");
  count.value &= 0x1f;
  if (count.value) {
    roll(count, input);
  }
}

void MacroAssembler::rotateLeft(Register count, Register input, Register dest) {
  MOZ_ASSERT(input == dest, "defineReuseInput");
  MOZ_ASSERT(count == ecx, "defineFixed(ecx)");
  roll_cl(input);
}

void MacroAssembler::rotateRight(Imm32 count, Register input, Register dest) {
  MOZ_ASSERT(input == dest, "defineReuseInput");
  count.value &= 0x1f;
  if (count.value) {
    rorl(count, input);
  }
}

void MacroAssembler::rotateRight(Register count, Register input,
                                 Register dest) {
  MOZ_ASSERT(input == dest, "defineReuseInput");
  MOZ_ASSERT(count == ecx, "defineFixed(ecx)");
  rorl_cl(input);
}

// ===============================================================
// Shift instructions

void MacroAssembler::lshift32(Register shift, Register srcDest) {
  if (HasBMI2()) {
    shlxl(srcDest, shift, srcDest);
    return;
  }
  MOZ_ASSERT(shift == ecx);
  shll_cl(srcDest);
}

void MacroAssembler::flexibleLshift32(Register shift, Register srcDest) {
  if (HasBMI2()) {
    shlxl(srcDest, shift, srcDest);
    return;
  }
  if (shift == ecx) {
    shll_cl(srcDest);
  } else {
    // Shift amount must be in ecx.
    xchg(shift, ecx);
    shll_cl(shift == srcDest ? ecx : srcDest == ecx ? shift : srcDest);
    xchg(shift, ecx);
  }
}

void MacroAssembler::rshift32(Register shift, Register srcDest) {
  if (HasBMI2()) {
    shrxl(srcDest, shift, srcDest);
    return;
  }
  MOZ_ASSERT(shift == ecx);
  shrl_cl(srcDest);
}

void MacroAssembler::flexibleRshift32(Register shift, Register srcDest) {
  if (HasBMI2()) {
    shrxl(srcDest, shift, srcDest);
    return;
  }
  if (shift == ecx) {
    shrl_cl(srcDest);
  } else {
    // Shift amount must be in ecx.
    xchg(shift, ecx);
    shrl_cl(shift == srcDest ? ecx : srcDest == ecx ? shift : srcDest);
    xchg(shift, ecx);
  }
}

void MacroAssembler::rshift32Arithmetic(Register shift, Register srcDest) {
  if (HasBMI2()) {
    sarxl(srcDest, shift, srcDest);
    return;
  }
  MOZ_ASSERT(shift == ecx);
  sarl_cl(srcDest);
}

void MacroAssembler::flexibleRshift32Arithmetic(Register shift,
                                                Register srcDest) {
  if (HasBMI2()) {
    sarxl(srcDest, shift, srcDest);
    return;
  }
  if (shift == ecx) {
    sarl_cl(srcDest);
  } else {
    // Shift amount must be in ecx.
    xchg(shift, ecx);
    sarl_cl(shift == srcDest ? ecx : srcDest == ecx ? shift : srcDest);
    xchg(shift, ecx);
  }
}

void MacroAssembler::lshift32(Imm32 shift, Register srcDest) {
  shll(shift, srcDest);
}

void MacroAssembler::rshift32(Imm32 shift, Register srcDest) {
  shrl(shift, srcDest);
}

void MacroAssembler::rshift32Arithmetic(Imm32 shift, Register srcDest) {
  sarl(shift, srcDest);
}

// ===============================================================
// Condition functions

void MacroAssembler::cmp8Set(Condition cond, Address lhs, Imm32 rhs,
                             Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(lhs, rhs, dest);
  cmp8(lhs, rhs);
  emitSet(cond, dest, destIsZero);
}

void MacroAssembler::cmp16Set(Condition cond, Address lhs, Imm32 rhs,
                              Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(lhs, rhs, dest);
  cmp16(lhs, rhs);
  emitSet(cond, dest, destIsZero);
}

template <typename T1, typename T2>
void MacroAssembler::cmp32Set(Condition cond, T1 lhs, T2 rhs, Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(lhs, rhs, dest);
  cmp32(lhs, rhs);
  emitSet(cond, dest, destIsZero);
}

// ===============================================================
// Branch instructions

void MacroAssembler::branch8(Condition cond, const Address& lhs, Imm32 rhs,
                             Label* label) {
  cmp8(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branch8(Condition cond, const BaseIndex& lhs, Register rhs,
                             Label* label) {
  cmp8(Operand(lhs), rhs);
  j(cond, label);
}

void MacroAssembler::branch16(Condition cond, const Address& lhs, Imm32 rhs,
                              Label* label) {
  cmp16(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branch32(Condition cond, Register lhs, Register rhs,
                              Label* label) {
  cmp32(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branch32(Condition cond, Register lhs, Imm32 rhs,
                              Label* label) {
  cmp32(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs,
                              Label* label) {
  cmp32(Operand(lhs), rhs);
  j(cond, label);
}

void MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 rhs,
                              Label* label) {
  cmp32(Operand(lhs), rhs);
  j(cond, label);
}

void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs,
                              Register rhs, Label* label) {
  cmp32(Operand(lhs), rhs);
  j(cond, label);
}

void MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs,
                              Label* label) {
  cmp32(Operand(lhs), rhs);
  j(cond, label);
}

void MacroAssembler::branch32(Condition cond, const Operand& lhs, Register rhs,
                              Label* label) {
  cmp32(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branch32(Condition cond, const Operand& lhs, Imm32 rhs,
                              Label* label) {
  cmp32(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branchPtr(Condition cond, Register lhs, Register rhs,
                               Label* label) {
  cmpPtr(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branchPtr(Condition cond, Register lhs, Imm32 rhs,
                               Label* label) {
  branchPtrImpl(cond, lhs, rhs, label);
}

void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmPtr rhs,
                               Label* label) {
  branchPtrImpl(cond, lhs, rhs, label);
}

void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmGCPtr rhs,
                               Label* label) {
  branchPtrImpl(cond, lhs, rhs, label);
}

void MacroAssembler::branchPtr(Condition cond, Register lhs, ImmWord rhs,
                               Label* label) {
  branchPtrImpl(cond, lhs, rhs, label);
}

void MacroAssembler::branchPtr(Condition cond, const Address& lhs, Register rhs,
                               Label* label) {
  branchPtrImpl(cond, lhs, rhs, label);
}

void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs,
                               Label* label) {
  branchPtrImpl(cond, lhs, rhs, label);
}

void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs,
                               Label* label) {
  branchPtrImpl(cond, lhs, rhs, label);
}

void MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs,
                               Label* label) {
  branchPtrImpl(cond, lhs, rhs, label);
}

void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
                               ImmWord rhs, Label* label) {
  branchPtrImpl(cond, lhs, rhs, label);
}

void MacroAssembler::branchPtr(Condition cond, const BaseIndex& lhs,
                               Register rhs, Label* label) {
  branchPtrImpl(cond, lhs, rhs, label);
}

template <typename T, typename S, typename L>
void MacroAssembler::branchPtrImpl(Condition cond, const T& lhs, const S& rhs,
                                   L label) {
  cmpPtr(Operand(lhs), rhs);
  j(cond, label);
}

void MacroAssembler::branchFloat(DoubleCondition cond, FloatRegister lhs,
                                 FloatRegister rhs, Label* label) {
  compareFloat(cond, lhs, rhs);

  if (cond == DoubleEqual) {
    Label unordered;
    j(Parity, &unordered);
    j(Equal, label);
    bind(&unordered);
    return;
  }

  if (cond == DoubleNotEqualOrUnordered) {
    j(NotEqual, label);
    j(Parity, label);
    return;
  }

  MOZ_ASSERT(!(cond & DoubleConditionBitSpecial));
  j(ConditionFromDoubleCondition(cond), label);
}

void MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs,
                                  FloatRegister rhs, Label* label) {
  compareDouble(cond, lhs, rhs);

  if (cond == DoubleEqual) {
    Label unordered;
    j(Parity, &unordered);
    j(Equal, label);
    bind(&unordered);
    return;
  }
  if (cond == DoubleNotEqualOrUnordered) {
    j(NotEqual, label);
    j(Parity, label);
    return;
  }

  MOZ_ASSERT(!(cond & DoubleConditionBitSpecial));
  j(ConditionFromDoubleCondition(cond), label);
}

template <typename T>
void MacroAssembler::branchAdd32(Condition cond, T src, Register dest,
                                 Label* label) {
  addl(src, dest);
  j(cond, label);
}

template <typename T>
void MacroAssembler::branchSub32(Condition cond, T src, Register dest,
                                 Label* label) {
  subl(src, dest);
  j(cond, label);
}

template <typename T>
void MacroAssembler::branchMul32(Condition cond, T src, Register dest,
                                 Label* label) {
  mul32(src, dest);
  j(cond, label);
}

template <typename T>
void MacroAssembler::branchRshift32(Condition cond, T src, Register dest,
                                    Label* label) {
  MOZ_ASSERT(cond == Zero || cond == NonZero);
  rshift32(src, dest);
  j(cond, label);
}

void MacroAssembler::branchNeg32(Condition cond, Register reg, Label* label) {
  MOZ_ASSERT(cond == Overflow);
  neg32(reg);
  j(cond, label);
}

template <typename T>
void MacroAssembler::branchAddPtr(Condition cond, T src, Register dest,
                                  Label* label) {
  addPtr(src, dest);
  j(cond, label);
}

template <typename T>
void MacroAssembler::branchSubPtr(Condition cond, T src, Register dest,
                                  Label* label) {
  subPtr(src, dest);
  j(cond, label);
}

void MacroAssembler::branchMulPtr(Condition cond, Register src, Register dest,
                                  Label* label) {
  mulPtr(src, dest);
  j(cond, label);
}

void MacroAssembler::branchNegPtr(Condition cond, Register reg, Label* label) {
  MOZ_ASSERT(cond == Overflow);
  negPtr(reg);
  j(cond, label);
}

void MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs,
                                  Label* label) {
  subPtr(rhs, lhs);
  j(cond, label);
}

void MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs,
                                  Label* label) {
  MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
             cond == NotSigned);
  test32(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs,
                                  Label* label) {
  MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
             cond == NotSigned);
  test32(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs,
                                  Label* label) {
  MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed ||
             cond == NotSigned);
  test32(Operand(lhs), rhs);
  j(cond, label);
}

void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs,
                                   Label* label) {
  testPtr(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branchTestPtr(Condition cond, Register lhs, Imm32 rhs,
                                   Label* label) {
  testPtr(lhs, rhs);
  j(cond, label);
}

void MacroAssembler::branchTestPtr(Condition cond, const Address& lhs,
                                   Imm32 rhs, Label* label) {
  testPtr(Operand(lhs), rhs);
  j(cond, label);
}

void MacroAssembler::branchTestUndefined(Condition cond, Register tag,
                                         Label* label) {
  branchTestUndefinedImpl(cond, tag, label);
}

void MacroAssembler::branchTestUndefined(Condition cond, const Address& address,
                                         Label* label) {
  branchTestUndefinedImpl(cond, address, label);
}

void MacroAssembler::branchTestUndefined(Condition cond,
                                         const BaseIndex& address,
                                         Label* label) {
  branchTestUndefinedImpl(cond, address, label);
}

void MacroAssembler::branchTestUndefined(Condition cond,
                                         const ValueOperand& value,
                                         Label* label) {
  branchTestUndefinedImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestUndefinedImpl(Condition cond, const T& t,
                                             Label* label) {
  cond = testUndefined(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestInt32(Condition cond, Register tag,
                                     Label* label) {
  branchTestInt32Impl(cond, tag, label);
}

void MacroAssembler::branchTestInt32(Condition cond, const Address& address,
                                     Label* label) {
  branchTestInt32Impl(cond, address, label);
}

void MacroAssembler::branchTestInt32(Condition cond, const BaseIndex& address,
                                     Label* label) {
  branchTestInt32Impl(cond, address, label);
}

void MacroAssembler::branchTestInt32(Condition cond, const ValueOperand& value,
                                     Label* label) {
  branchTestInt32Impl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestInt32Impl(Condition cond, const T& t,
                                         Label* label) {
  cond = testInt32(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestInt32Truthy(bool truthy,
                                           const ValueOperand& value,
                                           Label* label) {
  Condition cond = testInt32Truthy(truthy, value);
  j(cond, label);
}

void MacroAssembler::branchTestDouble(Condition cond, Register tag,
                                      Label* label) {
  branchTestDoubleImpl(cond, tag, label);
}

void MacroAssembler::branchTestDouble(Condition cond, const Address& address,
                                      Label* label) {
  branchTestDoubleImpl(cond, address, label);
}

void MacroAssembler::branchTestDouble(Condition cond, const BaseIndex& address,
                                      Label* label) {
  branchTestDoubleImpl(cond, address, label);
}

void MacroAssembler::branchTestDouble(Condition cond, const ValueOperand& value,
                                      Label* label) {
  branchTestDoubleImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestDoubleImpl(Condition cond, const T& t,
                                          Label* label) {
  cond = testDouble(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestDoubleTruthy(bool truthy, FloatRegister reg,
                                            Label* label) {
  Condition cond = testDoubleTruthy(truthy, reg);
  j(cond, label);
}

void MacroAssembler::branchTestNumber(Condition cond, Register tag,
                                      Label* label) {
  branchTestNumberImpl(cond, tag, label);
}

void MacroAssembler::branchTestNumber(Condition cond, const ValueOperand& value,
                                      Label* label) {
  branchTestNumberImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestNumberImpl(Condition cond, const T& t,
                                          Label* label) {
  cond = testNumber(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestBoolean(Condition cond, Register tag,
                                       Label* label) {
  branchTestBooleanImpl(cond, tag, label);
}

void MacroAssembler::branchTestBoolean(Condition cond, const Address& address,
                                       Label* label) {
  branchTestBooleanImpl(cond, address, label);
}

void MacroAssembler::branchTestBoolean(Condition cond, const BaseIndex& address,
                                       Label* label) {
  branchTestBooleanImpl(cond, address, label);
}

void MacroAssembler::branchTestBoolean(Condition cond,
                                       const ValueOperand& value,
                                       Label* label) {
  branchTestBooleanImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestBooleanImpl(Condition cond, const T& t,
                                           Label* label) {
  cond = testBoolean(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestString(Condition cond, Register tag,
                                      Label* label) {
  branchTestStringImpl(cond, tag, label);
}

void MacroAssembler::branchTestString(Condition cond, const Address& address,
                                      Label* label) {
  branchTestStringImpl(cond, address, label);
}

void MacroAssembler::branchTestString(Condition cond, const BaseIndex& address,
                                      Label* label) {
  branchTestStringImpl(cond, address, label);
}

void MacroAssembler::branchTestString(Condition cond, const ValueOperand& value,
                                      Label* label) {
  branchTestStringImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestStringImpl(Condition cond, const T& t,
                                          Label* label) {
  cond = testString(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestStringTruthy(bool truthy,
                                            const ValueOperand& value,
                                            Label* label) {
  Condition cond = testStringTruthy(truthy, value);
  j(cond, label);
}

void MacroAssembler::branchTestSymbol(Condition cond, Register tag,
                                      Label* label) {
  branchTestSymbolImpl(cond, tag, label);
}

void MacroAssembler::branchTestSymbol(Condition cond, const Address& address,
                                      Label* label) {
  branchTestSymbolImpl(cond, address, label);
}

void MacroAssembler::branchTestSymbol(Condition cond, const BaseIndex& address,
                                      Label* label) {
  branchTestSymbolImpl(cond, address, label);
}

void MacroAssembler::branchTestSymbol(Condition cond, const ValueOperand& value,
                                      Label* label) {
  branchTestSymbolImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestSymbolImpl(Condition cond, const T& t,
                                          Label* label) {
  cond = testSymbol(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestBigInt(Condition cond, Register tag,
                                      Label* label) {
  branchTestBigIntImpl(cond, tag, label);
}

void MacroAssembler::branchTestBigInt(Condition cond, const Address& address,
                                      Label* label) {
  branchTestBigIntImpl(cond, address, label);
}

void MacroAssembler::branchTestBigInt(Condition cond, const BaseIndex& address,
                                      Label* label) {
  branchTestBigIntImpl(cond, address, label);
}

void MacroAssembler::branchTestBigInt(Condition cond, const ValueOperand& value,
                                      Label* label) {
  branchTestBigIntImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestBigIntImpl(Condition cond, const T& t,
                                          Label* label) {
  cond = testBigInt(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestBigIntTruthy(bool truthy,
                                            const ValueOperand& value,
                                            Label* label) {
  Condition cond = testBigIntTruthy(truthy, value);
  j(cond, label);
}

void MacroAssembler::branchTestNull(Condition cond, Register tag,
                                    Label* label) {
  branchTestNullImpl(cond, tag, label);
}

void MacroAssembler::branchTestNull(Condition cond, const Address& address,
                                    Label* label) {
  branchTestNullImpl(cond, address, label);
}

void MacroAssembler::branchTestNull(Condition cond, const BaseIndex& address,
                                    Label* label) {
  branchTestNullImpl(cond, address, label);
}

void MacroAssembler::branchTestNull(Condition cond, const ValueOperand& value,
                                    Label* label) {
  branchTestNullImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestNullImpl(Condition cond, const T& t,
                                        Label* label) {
  cond = testNull(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestObject(Condition cond, Register tag,
                                      Label* label) {
  branchTestObjectImpl(cond, tag, label);
}

void MacroAssembler::branchTestObject(Condition cond, const Address& address,
                                      Label* label) {
  branchTestObjectImpl(cond, address, label);
}

void MacroAssembler::branchTestObject(Condition cond, const BaseIndex& address,
                                      Label* label) {
  branchTestObjectImpl(cond, address, label);
}

void MacroAssembler::branchTestObject(Condition cond, const ValueOperand& value,
                                      Label* label) {
  branchTestObjectImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestObjectImpl(Condition cond, const T& t,
                                          Label* label) {
  cond = testObject(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestGCThing(Condition cond, const Address& address,
                                       Label* label) {
  branchTestGCThingImpl(cond, address, label);
}

void MacroAssembler::branchTestGCThing(Condition cond, const BaseIndex& address,
                                       Label* label) {
  branchTestGCThingImpl(cond, address, label);
}

void MacroAssembler::branchTestGCThing(Condition cond,
                                       const ValueOperand& value,
                                       Label* label) {
  branchTestGCThingImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestGCThingImpl(Condition cond, const T& t,
                                           Label* label) {
  cond = testGCThing(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestPrimitive(Condition cond, Register tag,
                                         Label* label) {
  branchTestPrimitiveImpl(cond, tag, label);
}

void MacroAssembler::branchTestPrimitive(Condition cond,
                                         const ValueOperand& value,
                                         Label* label) {
  branchTestPrimitiveImpl(cond, value, label);
}

template <typename T>
void MacroAssembler::branchTestPrimitiveImpl(Condition cond, const T& t,
                                             Label* label) {
  cond = testPrimitive(cond, t);
  j(cond, label);
}

void MacroAssembler::branchTestMagic(Condition cond, Register tag,
                                     Label* label) {
  branchTestMagicImpl(cond, tag, label);
}

void MacroAssembler::branchTestMagic(Condition cond, const Address& address,
                                     Label* label) {
  branchTestMagicImpl(cond, address, label);
}

void MacroAssembler::branchTestMagic(Condition cond, const BaseIndex& address,
                                     Label* label) {
  branchTestMagicImpl(cond, address, label);
}

void MacroAssembler::branchTestMagic(Condition cond, const ValueOperand& value,
                                     Label* label) {
  branchTestMagicImpl(cond, value, label);
}

template <typename T, class L>
void MacroAssembler::branchTestMagicImpl(Condition cond, const T& t, L label) {
  cond = testMagic(cond, t);
  j(cond, label);
}

template <typename T>
void MacroAssembler::testNumberSet(Condition cond, const T& src,
                                   Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(src, dest);
  cond = testNumber(cond, src);
  emitSet(cond, dest, destIsZero);
}

template <typename T>
void MacroAssembler::testBooleanSet(Condition cond, const T& src,
                                    Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(src, dest);
  cond = testBoolean(cond, src);
  emitSet(cond, dest, destIsZero);
}

template <typename T>
void MacroAssembler::testStringSet(Condition cond, const T& src,
                                   Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(src, dest);
  cond = testString(cond, src);
  emitSet(cond, dest, destIsZero);
}

template <typename T>
void MacroAssembler::testSymbolSet(Condition cond, const T& src,
                                   Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(src, dest);
  cond = testSymbol(cond, src);
  emitSet(cond, dest, destIsZero);
}

template <typename T>
void MacroAssembler::testBigIntSet(Condition cond, const T& src,
                                   Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(src, dest);
  cond = testBigInt(cond, src);
  emitSet(cond, dest, destIsZero);
}

void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Imm32 rhs,
                                 Register src, Register dest) {
  cmp32(lhs, rhs);
  cmovCCl(cond, src, dest);
}

void MacroAssembler::cmp32Move32(Condition cond, Register lhs, Register rhs,
                                 Register src, Register dest) {
  cmp32(lhs, rhs);
  cmovCCl(cond, src, dest);
}

void MacroAssembler::cmp32Move32(Condition cond, Register lhs,
                                 const Address& rhs, Register src,
                                 Register dest) {
  cmp32(lhs, Operand(rhs));
  cmovCCl(cond, src, dest);
}

void MacroAssembler::cmp32Load32(Condition cond, Register lhs,
                                 const Address& rhs, const Address& src,
                                 Register dest) {
  cmp32(lhs, Operand(rhs));
  cmovCCl(cond, Operand(src), dest);
}

void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Register rhs,
                                 const Address& src, Register dest) {
  cmp32(lhs, rhs);
  cmovCCl(cond, Operand(src), dest);
}

void MacroAssembler::cmp32Load32(Condition cond, Register lhs, Imm32 rhs,
                                 const Address& src, Register dest) {
  cmp32(lhs, rhs);
  cmovCCl(cond, Operand(src), dest);
}

void MacroAssembler::spectreZeroRegister(Condition cond, Register scratch,
                                         Register dest) {
  // Note: use movl instead of move32/xorl to ensure flags are not clobbered.
  movl(Imm32(0), scratch);
  spectreMovePtr(cond, scratch, dest);
}

// ========================================================================
// Memory access primitives.
FaultingCodeOffset MacroAssembler::storeUncanonicalizedDouble(
    FloatRegister src, const Address& dest) {
  FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
  vmovsd(src, dest);
  return fco;
}
FaultingCodeOffset MacroAssembler::storeUncanonicalizedDouble(
    FloatRegister src, const BaseIndex& dest) {
  FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
  vmovsd(src, dest);
  return fco;
}
FaultingCodeOffset MacroAssembler::storeUncanonicalizedDouble(
    FloatRegister src, const Operand& dest) {
  switch (dest.kind()) {
    case Operand::MEM_REG_DISP:
      return storeUncanonicalizedDouble(src, dest.toAddress());
    case Operand::MEM_SCALE:
      return storeUncanonicalizedDouble(src, dest.toBaseIndex());
    default:
      MOZ_CRASH("unexpected operand kind");
  }
}

template FaultingCodeOffset MacroAssembler::storeDouble(FloatRegister src,
                                                        const Operand& dest);

FaultingCodeOffset MacroAssembler::storeUncanonicalizedFloat32(
    FloatRegister src, const Address& dest) {
  FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
  vmovss(src, dest);
  return fco;
}
FaultingCodeOffset MacroAssembler::storeUncanonicalizedFloat32(
    FloatRegister src, const BaseIndex& dest) {
  FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
  vmovss(src, dest);
  return fco;
}
FaultingCodeOffset MacroAssembler::storeUncanonicalizedFloat32(
    FloatRegister src, const Operand& dest) {
  switch (dest.kind()) {
    case Operand::MEM_REG_DISP:
      return storeUncanonicalizedFloat32(src, dest.toAddress());
    case Operand::MEM_SCALE:
      return storeUncanonicalizedFloat32(src, dest.toBaseIndex());
    default:
      MOZ_CRASH("unexpected operand kind");
  }
}

template FaultingCodeOffset MacroAssembler::storeFloat32(FloatRegister src,
                                                         const Operand& dest);

FaultingCodeOffset MacroAssembler::storeUncanonicalizedFloat16(
    FloatRegister src, const Address& dest, Register scratch) {
  vmovd(src, scratch);

  FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
  movw(scratch, Operand(dest));
  return fco;
}
FaultingCodeOffset MacroAssembler::storeUncanonicalizedFloat16(
    FloatRegister src, const BaseIndex& dest, Register scratch) {
  vmovd(src, scratch);

  FaultingCodeOffset fco = FaultingCodeOffset(currentOffset());
  movw(scratch, Operand(dest));
  return fco;
}

void MacroAssembler::memoryBarrier(MemoryBarrier barrier) {
  if (barrier.hasStoreLoad()) {
    // This implementation follows Linux.
    masm.mfence();
  }
}

// ========================================================================
// Wasm SIMD
//
// Some parts of the masm API are currently agnostic as to the data's
// interpretation as int or float, despite the Intel architecture having
// separate functional units and sometimes penalizing type-specific instructions
// that operate on data in the "wrong" unit.
//
// For the time being, we always choose the integer interpretation when we are
// forced to choose blind, but whether that is right or wrong depends on the
// application.  This applies to moveSimd128, loadConstantSimd128,
// loadUnalignedSimd128, and storeUnalignedSimd128, at least.
//
// SSE4.1 or better is assumed.
//
// The order of operations here follows the header file.

// Moves.  See comments above regarding integer operation.

void MacroAssembler::moveSimd128(FloatRegister src, FloatRegister dest) {
  MacroAssemblerX86Shared::moveSimd128Int(src, dest);
}

// Constants.  See comments above regarding integer operation.

void MacroAssembler::loadConstantSimd128(const SimdConstant& v,
                                         FloatRegister dest) {
  if (v.isFloatingType()) {
    loadConstantSimd128Float(v, dest);
  } else {
    loadConstantSimd128Int(v, dest);
  }
}

// Splat

void MacroAssembler::splatX16(Register src, FloatRegister dest) {
  MacroAssemblerX86Shared::splatX16(src, dest);
}

void MacroAssembler::splatX8(Register src, FloatRegister dest) {
  MacroAssemblerX86Shared::splatX8(src, dest);
}

void MacroAssembler::splatX4(Register src, FloatRegister dest) {
  MacroAssemblerX86Shared::splatX4(src, dest);
}

void MacroAssembler::splatX4(FloatRegister src, FloatRegister dest) {
  MacroAssemblerX86Shared::splatX4(src, dest);
}

void MacroAssembler::splatX2(FloatRegister src, FloatRegister dest) {
  MacroAssemblerX86Shared::splatX2(src, dest);
}

// Extract lane as scalar

void MacroAssembler::extractLaneInt8x16(uint32_t lane, FloatRegister src,
                                        Register dest) {
  MacroAssemblerX86Shared::extractLaneInt8x16(src, dest, lane,
                                              SimdSign::Signed);
}

void MacroAssembler::unsignedExtractLaneInt8x16(uint32_t lane,
                                                FloatRegister src,
                                                Register dest) {
  MacroAssemblerX86Shared::extractLaneInt8x16(src, dest, lane,
                                              SimdSign::Unsigned);
}

void MacroAssembler::extractLaneInt16x8(uint32_t lane, FloatRegister src,
                                        Register dest) {
  MacroAssemblerX86Shared::extractLaneInt16x8(src, dest, lane,
                                              SimdSign::Signed);
}

void MacroAssembler::unsignedExtractLaneInt16x8(uint32_t lane,
                                                FloatRegister src,
                                                Register dest) {
  MacroAssemblerX86Shared::extractLaneInt16x8(src, dest, lane,
                                              SimdSign::Unsigned);
}

void MacroAssembler::extractLaneInt32x4(uint32_t lane, FloatRegister src,
                                        Register dest) {
  MacroAssemblerX86Shared::extractLaneInt32x4(src, dest, lane);
}

void MacroAssembler::extractLaneFloat32x4(uint32_t lane, FloatRegister src,
                                          FloatRegister dest) {
  MacroAssemblerX86Shared::extractLaneFloat32x4(src, dest, lane);
}

void MacroAssembler::extractLaneFloat64x2(uint32_t lane, FloatRegister src,
                                          FloatRegister dest) {
  MacroAssemblerX86Shared::extractLaneFloat64x2(src, dest, lane);
}

// Replace lane value

void MacroAssembler::replaceLaneInt8x16(unsigned lane, FloatRegister lhs,
                                        Register rhs, FloatRegister dest) {
  vpinsrb(lane, Operand(rhs), lhs, dest);
}

void MacroAssembler::replaceLaneInt8x16(unsigned lane, Register rhs,
                                        FloatRegister lhsDest) {
  vpinsrb(lane, Operand(rhs), lhsDest, lhsDest);
}

void MacroAssembler::replaceLaneInt16x8(unsigned lane, FloatRegister lhs,
                                        Register rhs, FloatRegister dest) {
  vpinsrw(lane, Operand(rhs), lhs, dest);
}

void MacroAssembler::replaceLaneInt16x8(unsigned lane, Register rhs,
                                        FloatRegister lhsDest) {
  vpinsrw(lane, Operand(rhs), lhsDest, lhsDest);
}

void MacroAssembler::replaceLaneInt32x4(unsigned lane, FloatRegister lhs,
                                        Register rhs, FloatRegister dest) {
  vpinsrd(lane, rhs, lhs, dest);
}

void MacroAssembler::replaceLaneInt32x4(unsigned lane, Register rhs,
                                        FloatRegister lhsDest) {
  vpinsrd(lane, rhs, lhsDest, lhsDest);
}

void MacroAssembler::replaceLaneFloat32x4(unsigned lane, FloatRegister lhs,
                                          FloatRegister rhs,
                                          FloatRegister dest) {
  MacroAssemblerX86Shared::replaceLaneFloat32x4(lane, lhs, rhs, dest);
}

void MacroAssembler::replaceLaneFloat32x4(unsigned lane, FloatRegister rhs,
                                          FloatRegister lhsDest) {
  MacroAssemblerX86Shared::replaceLaneFloat32x4(lane, lhsDest, rhs, lhsDest);
}

void MacroAssembler::replaceLaneFloat64x2(unsigned lane, FloatRegister lhs,
                                          FloatRegister rhs,
                                          FloatRegister dest) {
  MacroAssemblerX86Shared::replaceLaneFloat64x2(lane, lhs, rhs, dest);
}

void MacroAssembler::replaceLaneFloat64x2(unsigned lane, FloatRegister rhs,
                                          FloatRegister lhsDest) {
  MacroAssemblerX86Shared::replaceLaneFloat64x2(lane, lhsDest, rhs, lhsDest);
}

// Shuffle - permute with immediate indices

void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister rhs,
                                    FloatRegister lhsDest) {
  MacroAssemblerX86Shared::shuffleInt8x16(lhsDest, rhs, lhsDest, lanes);
}

void MacroAssembler::shuffleInt8x16(const uint8_t lanes[16], FloatRegister lhs,
                                    FloatRegister rhs, FloatRegister dest) {
  MacroAssemblerX86Shared::shuffleInt8x16(lhs, rhs, dest, lanes);
}

void MacroAssembler::blendInt8x16(const uint8_t lanes[16], FloatRegister lhs,
                                  FloatRegister rhs, FloatRegister dest,
                                  FloatRegister temp) {
  MacroAssemblerX86Shared::blendInt8x16(lhs, rhs, dest, temp, lanes);
}

void MacroAssembler::blendInt16x8(const uint16_t lanes[8], FloatRegister lhs,
                                  FloatRegister rhs, FloatRegister dest) {
  MacroAssemblerX86Shared::blendInt16x8(lhs, rhs, dest, lanes);
}

void MacroAssembler::laneSelectSimd128(FloatRegister mask, FloatRegister lhs,
                                       FloatRegister rhs, FloatRegister dest) {
  MacroAssemblerX86Shared::laneSelectSimd128(mask, lhs, rhs, dest);
}

void MacroAssembler::interleaveHighInt16x8(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpunpckhwd(rhs, lhs, dest);
}

void MacroAssembler::interleaveHighInt32x4(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpunpckhdq(rhs, lhs, dest);
}

void MacroAssembler::interleaveHighInt64x2(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpunpckhqdq(rhs, lhs, dest);
}

void MacroAssembler::interleaveHighInt8x16(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpunpckhbw(rhs, lhs, dest);
}

void MacroAssembler::interleaveLowInt16x8(FloatRegister lhs, FloatRegister rhs,
                                          FloatRegister dest) {
  vpunpcklwd(rhs, lhs, dest);
}

void MacroAssembler::interleaveLowInt32x4(FloatRegister lhs, FloatRegister rhs,
                                          FloatRegister dest) {
  vpunpckldq(rhs, lhs, dest);
}

void MacroAssembler::interleaveLowInt64x2(FloatRegister lhs, FloatRegister rhs,
                                          FloatRegister dest) {
  vpunpcklqdq(rhs, lhs, dest);
}

void MacroAssembler::interleaveLowInt8x16(FloatRegister lhs, FloatRegister rhs,
                                          FloatRegister dest) {
  vpunpcklbw(rhs, lhs, dest);
}

void MacroAssembler::permuteInt8x16(const uint8_t lanes[16], FloatRegister src,
                                    FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpshufbSimd128(SimdConstant::CreateX16((const int8_t*)lanes), src, dest);
}

void MacroAssembler::permuteLowInt16x8(const uint16_t lanes[4],
                                       FloatRegister src, FloatRegister dest) {
  MOZ_ASSERT(lanes[0] < 4 && lanes[1] < 4 && lanes[2] < 4 && lanes[3] < 4);
  vpshuflw(ComputeShuffleMask(lanes[0], lanes[1], lanes[2], lanes[3]), src,
           dest);
}

void MacroAssembler::permuteHighInt16x8(const uint16_t lanes[4],
                                        FloatRegister src, FloatRegister dest) {
  MOZ_ASSERT(lanes[0] < 4 && lanes[1] < 4 && lanes[2] < 4 && lanes[3] < 4);
  vpshufhw(ComputeShuffleMask(lanes[0], lanes[1], lanes[2], lanes[3]), src,
           dest);
}

void MacroAssembler::permuteInt32x4(const uint32_t lanes[4], FloatRegister src,
                                    FloatRegister dest) {
  vpshufd(ComputeShuffleMask(lanes[0], lanes[1], lanes[2], lanes[3]), src,
          dest);
}

void MacroAssembler::concatAndRightShiftSimd128(FloatRegister lhs,
                                                FloatRegister rhs,
                                                FloatRegister dest,
                                                uint32_t shift) {
  vpalignr(Operand(rhs), lhs, dest, shift);
}

void MacroAssembler::leftShiftSimd128(Imm32 count, FloatRegister src,
                                      FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpslldq(count, src, dest);
}

void MacroAssembler::rightShiftSimd128(Imm32 count, FloatRegister src,
                                       FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsrldq(count, src, dest);
}

// Zero extend int values.

void MacroAssembler::zeroExtend8x16To16x8(FloatRegister src,
                                          FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpmovzxbw(Operand(src), dest);
}

void MacroAssembler::zeroExtend8x16To32x4(FloatRegister src,
                                          FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpmovzxbd(Operand(src), dest);
}

void MacroAssembler::zeroExtend8x16To64x2(FloatRegister src,
                                          FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpmovzxbq(Operand(src), dest);
}

void MacroAssembler::zeroExtend16x8To32x4(FloatRegister src,
                                          FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpmovzxwd(Operand(src), dest);
}

void MacroAssembler::zeroExtend16x8To64x2(FloatRegister src,
                                          FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpmovzxwq(Operand(src), dest);
}

void MacroAssembler::zeroExtend32x4To64x2(FloatRegister src,
                                          FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpmovzxdq(Operand(src), dest);
}

// Reverse bytes in lanes.

void MacroAssembler::reverseInt16x8(FloatRegister src, FloatRegister dest) {
  // Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB.
  ScratchSimd128Scope scratch(*this);
  FloatRegister srcForScratch = moveSimd128IntIfNotAVX(src, scratch);
  vpsrlw(Imm32(8), srcForScratch, scratch);
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsllw(Imm32(8), src, dest);
  vpor(scratch, dest, dest);
}

void MacroAssembler::reverseInt32x4(FloatRegister src, FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  int8_t lanes[] = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12};
  vpshufbSimd128(SimdConstant::CreateX16((const int8_t*)lanes), src, dest);
}

void MacroAssembler::reverseInt64x2(FloatRegister src, FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  int8_t lanes[] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
  vpshufbSimd128(SimdConstant::CreateX16((const int8_t*)lanes), src, dest);
}

// Any lane true, ie any bit set

void MacroAssembler::anyTrueSimd128(FloatRegister src, Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(dest);

  vptest(src, src);
  emitSet(Condition::NonZero, dest, destIsZero);
}

// All lanes true

void MacroAssembler::allTrueInt8x16(FloatRegister src, Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(dest);

  ScratchSimd128Scope xtmp(*this);
  // xtmp is all-00h
  vpxor(xtmp, xtmp, xtmp);
  // Set FFh if byte==0 otherwise 00h
  // Operand ordering constraint: lhs==output
  vpcmpeqb(Operand(src), xtmp, xtmp);
  // Check if xtmp is 0.
  vptest(xtmp, xtmp);
  emitSet(Condition::Zero, dest, destIsZero);
}

void MacroAssembler::allTrueInt16x8(FloatRegister src, Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(dest);

  ScratchSimd128Scope xtmp(*this);
  // xtmp is all-00h
  vpxor(xtmp, xtmp, xtmp);
  // Set FFFFh if word==0 otherwise 0000h
  // Operand ordering constraint: lhs==output
  vpcmpeqw(Operand(src), xtmp, xtmp);
  // Check if xtmp is 0.
  vptest(xtmp, xtmp);
  emitSet(Condition::Zero, dest, destIsZero);
}

void MacroAssembler::allTrueInt32x4(FloatRegister src, Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(dest);

  ScratchSimd128Scope xtmp(*this);
  // xtmp is all-00h
  vpxor(xtmp, xtmp, xtmp);
  // Set FFFFFFFFh if doubleword==0 otherwise 00000000h
  // Operand ordering constraint: lhs==output
  vpcmpeqd(Operand(src), xtmp, xtmp);
  // Check if xtmp is 0.
  vptest(xtmp, xtmp);
  emitSet(Condition::Zero, dest, destIsZero);
}

void MacroAssembler::allTrueInt64x2(FloatRegister src, Register dest) {
  bool destIsZero = maybeEmitSetZeroByteRegister(dest);

  ScratchSimd128Scope xtmp(*this);
  // xtmp is all-00h
  vpxor(xtmp, xtmp, xtmp);
  // Set FFFFFFFFFFFFFFFFh if quadword==0 otherwise 0000000000000000h
  // Operand ordering constraint: lhs==output
  vpcmpeqq(Operand(src), xtmp, xtmp);
  // Check if xtmp is 0.
  vptest(xtmp, xtmp);
  emitSet(Condition::Zero, dest, destIsZero);
}

// Bitmask

void MacroAssembler::bitmaskInt8x16(FloatRegister src, Register dest) {
  vpmovmskb(src, dest);
}

void MacroAssembler::bitmaskInt16x8(FloatRegister src, Register dest) {
  ScratchSimd128Scope scratch(*this);
  // A three-instruction sequence is possible by using scratch as a don't-care
  // input and shifting rather than masking at the end, but creates a false
  // dependency on the old value of scratch.  The better fix is to allow src to
  // be clobbered.
  src = moveSimd128IntIfNotAVX(src, scratch);
  vpacksswb(Operand(src), src, scratch);
  vpmovmskb(scratch, dest);
  andl(Imm32(0xFF), dest);
}

void MacroAssembler::bitmaskInt32x4(FloatRegister src, Register dest) {
  vmovmskps(src, dest);
}

void MacroAssembler::bitmaskInt64x2(FloatRegister src, Register dest) {
  vmovmskpd(src, dest);
}

// Swizzle - permute with variable indices

void MacroAssembler::swizzleInt8x16(FloatRegister lhs, FloatRegister rhs,
                                    FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  rhs = moveSimd128IntIfNotAVX(rhs, scratch);
  // Set high bit to 1 for values > 15 via adding with saturation.
  vpaddusbSimd128(SimdConstant::SplatX16(0x70), rhs, scratch);
  vpshufb(scratch, lhs, dest);  // permute
}

void MacroAssembler::swizzleInt8x16Relaxed(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpshufb(rhs, lhs, dest);
}

// Integer Add

void MacroAssembler::addInt8x16(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpaddb(Operand(rhs), lhs, dest);
}

void MacroAssembler::addInt8x16(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddb,
                &MacroAssembler::vpaddbSimd128);
}

void MacroAssembler::addInt16x8(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpaddw(Operand(rhs), lhs, dest);
}

void MacroAssembler::addInt16x8(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddw,
                &MacroAssembler::vpaddwSimd128);
}

void MacroAssembler::addInt32x4(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpaddd(Operand(rhs), lhs, dest);
}

void MacroAssembler::addInt32x4(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddd,
                &MacroAssembler::vpadddSimd128);
}

void MacroAssembler::addInt64x2(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpaddq(Operand(rhs), lhs, dest);
}

void MacroAssembler::addInt64x2(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddq,
                &MacroAssembler::vpaddqSimd128);
}

// Integer subtract

void MacroAssembler::subInt8x16(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpsubb(Operand(rhs), lhs, dest);
}

void MacroAssembler::subInt8x16(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubb,
                &MacroAssembler::vpsubbSimd128);
}

void MacroAssembler::subInt16x8(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpsubw(Operand(rhs), lhs, dest);
}

void MacroAssembler::subInt16x8(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubw,
                &MacroAssembler::vpsubwSimd128);
}

void MacroAssembler::subInt32x4(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpsubd(Operand(rhs), lhs, dest);
}

void MacroAssembler::subInt32x4(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubd,
                &MacroAssembler::vpsubdSimd128);
}

void MacroAssembler::subInt64x2(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpsubq(Operand(rhs), lhs, dest);
}

void MacroAssembler::subInt64x2(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubq,
                &MacroAssembler::vpsubqSimd128);
}

// Integer multiply

void MacroAssembler::mulInt16x8(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpmullw(Operand(rhs), lhs, dest);
}

void MacroAssembler::mulInt16x8(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmullw,
                &MacroAssembler::vpmullwSimd128);
}

void MacroAssembler::mulInt32x4(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpmulld(Operand(rhs), lhs, dest);
}

void MacroAssembler::mulInt32x4(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmulld,
                &MacroAssembler::vpmulldSimd128);
}

void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest, FloatRegister temp) {
  ScratchSimd128Scope temp2(*this);
  // lhs = <D C> <B A>
  // rhs = <H G> <F E>
  // result = <(DG+CH)_low+CG_high CG_low> <(BE+AF)_low+AE_high AE_low>
  FloatRegister lhsForTemp =
      moveSimd128IntIfNotAVX(lhs, temp);  // temp  = <D C> <B A>
  vpsrlq(Imm32(32), lhsForTemp, temp);    // temp  = <0 D> <0 B>
  vpmuludq(rhs, temp, temp);              // temp  = <DG> <BE>
  FloatRegister rhsForTemp =
      moveSimd128IntIfNotAVX(rhs, temp2);  // temp2 = <H G> <F E>
  vpsrlq(Imm32(32), rhsForTemp, temp2);    // temp2 = <0 H> <0 F>
  vpmuludq(lhs, temp2, temp2);             // temp2 = <CH> <AF>
  vpaddq(Operand(temp), temp2, temp2);     // temp2 = <DG+CH> <BE+AF>
  vpsllq(Imm32(32), temp2, temp2);         // temp2 = <(DG+CH)_low 0>
                                           //         <(BE+AF)_low 0>
  vpmuludq(rhs, lhs, dest);                // dest = <CG_high CG_low>
                                           //        <AE_high AE_low>
  vpaddq(Operand(temp2), dest, dest);      // dest =
                                           //    <(DG+CH)_low+CG_high CG_low>
                                           //    <(BE+AF)_low+AE_high AE_low>
}

void MacroAssembler::mulInt64x2(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest, FloatRegister temp) {
  // Check if we can specialize that to less than eight instructions
  // (in comparison with the above mulInt64x2 version).
  const int64_t* c = static_cast<const int64_t*>(rhs.bytes());
  const int64_t val = c[0];
  if (val == c[1]) {
    switch (mozilla::CountPopulation64(val)) {
      case 0:  // val == 0
        vpxor(Operand(dest), dest, dest);
        return;
      case 64:  // val == -1
        negInt64x2(lhs, dest);
        return;
      case 1:  // val == power of 2
        if (val == 1) {
          moveSimd128Int(lhs, dest);
        } else {
          lhs = moveSimd128IntIfNotAVX(lhs, dest);
          vpsllq(Imm32(mozilla::CountTrailingZeroes64(val)), lhs, dest);
        }
        return;
      case 2: {
        // Constants with 2 bits set, such as 3, 5, 10, etc.
        int i0 = mozilla::CountTrailingZeroes64(val);
        int i1 = mozilla::CountTrailingZeroes64(val & (val - 1));
        FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp);
        vpsllq(Imm32(i1), lhsForTemp, temp);
        lhs = moveSimd128IntIfNotAVX(lhs, dest);
        if (i0 > 0) {
          vpsllq(Imm32(i0), lhs, dest);
          lhs = dest;
        }
        vpaddq(Operand(temp), lhs, dest);
        return;
      }
      case 63: {
        // Some constants with 1 bit unset, such as -2, -3, -5, etc.
        FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp);
        vpsllq(Imm32(mozilla::CountTrailingZeroes64(~val)), lhsForTemp, temp);
        negInt64x2(lhs, dest);
        vpsubq(Operand(temp), dest, dest);
        return;
      }
    }
  }

  // lhs = <D C> <B A>
  // rhs = <H G> <F E>
  // result = <(DG+CH)_low+CG_high CG_low> <(BE+AF)_low+AE_high AE_low>

  if ((c[0] >> 32) == 0 && (c[1] >> 32) == 0) {
    // If the H and F == 0, simplify calculations:
    //   result = <DG_low+CG_high CG_low> <BE_low+AE_high AE_low>
    const int64_t rhsShifted[2] = {c[0] << 32, c[1] << 32};
    FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp);
    vpmulldSimd128(SimdConstant::CreateSimd128(rhsShifted), lhsForTemp, temp);
    vpmuludqSimd128(rhs, lhs, dest);
    vpaddq(Operand(temp), dest, dest);
    return;
  }

  const int64_t rhsSwapped[2] = {
      static_cast<int64_t>(static_cast<uint64_t>(c[0]) >> 32) | (c[0] << 32),
      static_cast<int64_t>(static_cast<uint64_t>(c[1]) >> 32) | (c[1] << 32),
  };  // rhsSwapped = <G H> <E F>
  FloatRegister lhsForTemp = moveSimd128IntIfNotAVX(lhs, temp);
  vpmulldSimd128(SimdConstant::CreateSimd128(rhsSwapped), lhsForTemp,
                 temp);                // temp = <DG CH> <BE AF>
  vphaddd(Operand(temp), temp, temp);  // temp = <xx xx> <DG+CH BE+AF>
  vpmovzxdq(Operand(temp), temp);      // temp = <0 DG+CG> <0 BE+AF>
  vpmuludqSimd128(rhs, lhs, dest);     // dest = <CG_high CG_low>
                                       //        <AE_high AE_low>
  vpsllq(Imm32(32), temp, temp);       // temp = <(DG+CH)_low 0>
                                       //        <(BE+AF)_low 0>
  vpaddq(Operand(temp), dest, dest);
}

// Code generation from the PR: https://github.com/WebAssembly/simd/pull/376.
// The double PSHUFD for the 32->64 case is not great, and there's some
// discussion on the PR (scroll down far enough) on how to avoid one of them,
// but we need benchmarking + correctness proofs.

void MacroAssembler::extMulLowInt8x16(FloatRegister lhs, FloatRegister rhs,
                                      FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  widenLowInt8x16(rhs, scratch);
  widenLowInt8x16(lhs, dest);
  mulInt16x8(dest, scratch, dest);
}

void MacroAssembler::extMulHighInt8x16(FloatRegister lhs, FloatRegister rhs,
                                       FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  widenHighInt8x16(rhs, scratch);
  widenHighInt8x16(lhs, dest);
  mulInt16x8(dest, scratch, dest);
}

void MacroAssembler::unsignedExtMulLowInt8x16(FloatRegister lhs,
                                              FloatRegister rhs,
                                              FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  unsignedWidenLowInt8x16(rhs, scratch);
  unsignedWidenLowInt8x16(lhs, dest);
  mulInt16x8(dest, scratch, dest);
}

void MacroAssembler::unsignedExtMulHighInt8x16(FloatRegister lhs,
                                               FloatRegister rhs,
                                               FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  unsignedWidenHighInt8x16(rhs, scratch);
  unsignedWidenHighInt8x16(lhs, dest);
  mulInt16x8(dest, scratch, dest);
}

void MacroAssembler::extMulLowInt16x8(FloatRegister lhs, FloatRegister rhs,
                                      FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
  vpmulhw(Operand(rhs), lhsCopy, scratch);
  vpmullw(Operand(rhs), lhs, dest);
  vpunpcklwd(scratch, dest, dest);
}

void MacroAssembler::extMulHighInt16x8(FloatRegister lhs, FloatRegister rhs,
                                       FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
  vpmulhw(Operand(rhs), lhsCopy, scratch);
  vpmullw(Operand(rhs), lhs, dest);
  vpunpckhwd(scratch, dest, dest);
}

void MacroAssembler::unsignedExtMulLowInt16x8(FloatRegister lhs,
                                              FloatRegister rhs,
                                              FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
  vpmulhuw(Operand(rhs), lhsCopy, scratch);
  vpmullw(Operand(rhs), lhs, dest);
  vpunpcklwd(scratch, dest, dest);
}

void MacroAssembler::unsignedExtMulHighInt16x8(FloatRegister lhs,
                                               FloatRegister rhs,
                                               FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  FloatRegister lhsCopy = moveSimd128IntIfNotAVX(lhs, scratch);
  vpmulhuw(Operand(rhs), lhsCopy, scratch);
  vpmullw(Operand(rhs), lhs, dest);
  vpunpckhwd(scratch, dest, dest);
}

void MacroAssembler::extMulLowInt32x4(FloatRegister lhs, FloatRegister rhs,
                                      FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch);
  vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest);
  vpmuldq(scratch, dest, dest);
}

void MacroAssembler::extMulHighInt32x4(FloatRegister lhs, FloatRegister rhs,
                                       FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch);
  vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest);
  vpmuldq(scratch, dest, dest);
}

void MacroAssembler::unsignedExtMulLowInt32x4(FloatRegister lhs,
                                              FloatRegister rhs,
                                              FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  vpshufd(ComputeShuffleMask(0, 0, 1, 0), lhs, scratch);
  vpshufd(ComputeShuffleMask(0, 0, 1, 0), rhs, dest);
  vpmuludq(Operand(scratch), dest, dest);
}

void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister lhs,
                                               FloatRegister rhs,
                                               FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  vpshufd(ComputeShuffleMask(2, 0, 3, 0), lhs, scratch);
  vpshufd(ComputeShuffleMask(2, 0, 3, 0), rhs, dest);
  vpmuludq(Operand(scratch), dest, dest);
}

void MacroAssembler::q15MulrSatInt16x8(FloatRegister lhs, FloatRegister rhs,
                                       FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  vpmulhrsw(Operand(rhs), lhs, dest);
  FloatRegister destCopy = moveSimd128IntIfNotAVX(dest, scratch);
  vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), destCopy, scratch);
  vpxor(scratch, dest, dest);
}

void MacroAssembler::q15MulrInt16x8Relaxed(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpmulhrsw(Operand(rhs), lhs, dest);
}

// Integer negate

void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  if (src == dest) {
    moveSimd128Int(src, scratch);
    src = scratch;
  }
  vpxor(Operand(dest), dest, dest);
  vpsubb(Operand(src), dest, dest);
}

void MacroAssembler::negInt16x8(FloatRegister src, FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  if (src == dest) {
    moveSimd128Int(src, scratch);
    src = scratch;
  }
  vpxor(Operand(dest), dest, dest);
  vpsubw(Operand(src), dest, dest);
}

void MacroAssembler::negInt32x4(FloatRegister src, FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  if (src == dest) {
    moveSimd128Int(src, scratch);
    src = scratch;
  }
  vpxor(Operand(dest), dest, dest);
  vpsubd(Operand(src), dest, dest);
}

void MacroAssembler::negInt64x2(FloatRegister src, FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  if (src == dest) {
    moveSimd128Int(src, scratch);
    src = scratch;
  }
  vpxor(Operand(dest), dest, dest);
  vpsubq(Operand(src), dest, dest);
}

// Saturating integer add

void MacroAssembler::addSatInt8x16(FloatRegister lhs, FloatRegister rhs,
                                   FloatRegister dest) {
  vpaddsb(Operand(rhs), lhs, dest);
}

void MacroAssembler::addSatInt8x16(FloatRegister lhs, const SimdConstant& rhs,
                                   FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddsb,
                &MacroAssembler::vpaddsbSimd128);
}

void MacroAssembler::unsignedAddSatInt8x16(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpaddusb(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedAddSatInt8x16(FloatRegister lhs,
                                           const SimdConstant& rhs,
                                           FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddusb,
                &MacroAssembler::vpaddusbSimd128);
}

void MacroAssembler::addSatInt16x8(FloatRegister lhs, FloatRegister rhs,
                                   FloatRegister dest) {
  vpaddsw(Operand(rhs), lhs, dest);
}

void MacroAssembler::addSatInt16x8(FloatRegister lhs, const SimdConstant& rhs,
                                   FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddsw,
                &MacroAssembler::vpaddswSimd128);
}

void MacroAssembler::unsignedAddSatInt16x8(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpaddusw(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedAddSatInt16x8(FloatRegister lhs,
                                           const SimdConstant& rhs,
                                           FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpaddusw,
                &MacroAssembler::vpadduswSimd128);
}

// Saturating integer subtract

void MacroAssembler::subSatInt8x16(FloatRegister lhs, FloatRegister rhs,
                                   FloatRegister dest) {
  vpsubsb(Operand(rhs), lhs, dest);
}

void MacroAssembler::subSatInt8x16(FloatRegister lhs, const SimdConstant& rhs,
                                   FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubsb,
                &MacroAssembler::vpsubsbSimd128);
}

void MacroAssembler::unsignedSubSatInt8x16(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpsubusb(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedSubSatInt8x16(FloatRegister lhs,
                                           const SimdConstant& rhs,
                                           FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubusb,
                &MacroAssembler::vpsubusbSimd128);
}

void MacroAssembler::subSatInt16x8(FloatRegister lhs, FloatRegister rhs,
                                   FloatRegister dest) {
  vpsubsw(Operand(rhs), lhs, dest);
}

void MacroAssembler::subSatInt16x8(FloatRegister lhs, const SimdConstant& rhs,
                                   FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubsw,
                &MacroAssembler::vpsubswSimd128);
}

void MacroAssembler::unsignedSubSatInt16x8(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpsubusw(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedSubSatInt16x8(FloatRegister lhs,
                                           const SimdConstant& rhs,
                                           FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpsubusw,
                &MacroAssembler::vpsubuswSimd128);
}

// Lane-wise integer minimum

void MacroAssembler::minInt8x16(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpminsb(Operand(rhs), lhs, dest);
}

void MacroAssembler::minInt8x16(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminsb,
                &MacroAssembler::vpminsbSimd128);
}

void MacroAssembler::unsignedMinInt8x16(FloatRegister lhs, FloatRegister rhs,
                                        FloatRegister dest) {
  vpminub(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedMinInt8x16(FloatRegister lhs,
                                        const SimdConstant& rhs,
                                        FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminub,
                &MacroAssembler::vpminubSimd128);
}

void MacroAssembler::minInt16x8(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpminsw(Operand(rhs), lhs, dest);
}

void MacroAssembler::minInt16x8(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminsw,
                &MacroAssembler::vpminswSimd128);
}

void MacroAssembler::unsignedMinInt16x8(FloatRegister lhs, FloatRegister rhs,
                                        FloatRegister dest) {
  vpminuw(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedMinInt16x8(FloatRegister lhs,
                                        const SimdConstant& rhs,
                                        FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminuw,
                &MacroAssembler::vpminuwSimd128);
}

void MacroAssembler::minInt32x4(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpminsd(Operand(rhs), lhs, dest);
}

void MacroAssembler::minInt32x4(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminsd,
                &MacroAssembler::vpminsdSimd128);
}

void MacroAssembler::unsignedMinInt32x4(FloatRegister lhs, FloatRegister rhs,
                                        FloatRegister dest) {
  vpminud(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedMinInt32x4(FloatRegister lhs,
                                        const SimdConstant& rhs,
                                        FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpminud,
                &MacroAssembler::vpminudSimd128);
}

// Lane-wise integer maximum

void MacroAssembler::maxInt8x16(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpmaxsb(Operand(rhs), lhs, dest);
}

void MacroAssembler::maxInt8x16(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxsb,
                &MacroAssembler::vpmaxsbSimd128);
}

void MacroAssembler::unsignedMaxInt8x16(FloatRegister lhs, FloatRegister rhs,
                                        FloatRegister dest) {
  vpmaxub(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedMaxInt8x16(FloatRegister lhs,
                                        const SimdConstant& rhs,
                                        FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxub,
                &MacroAssembler::vpmaxubSimd128);
}

void MacroAssembler::maxInt16x8(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpmaxsw(Operand(rhs), lhs, dest);
}

void MacroAssembler::maxInt16x8(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxsw,
                &MacroAssembler::vpmaxswSimd128);
}

void MacroAssembler::unsignedMaxInt16x8(FloatRegister lhs, FloatRegister rhs,
                                        FloatRegister dest) {
  vpmaxuw(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedMaxInt16x8(FloatRegister lhs,
                                        const SimdConstant& rhs,
                                        FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxuw,
                &MacroAssembler::vpmaxuwSimd128);
}

void MacroAssembler::maxInt32x4(FloatRegister lhs, FloatRegister rhs,
                                FloatRegister dest) {
  vpmaxsd(Operand(rhs), lhs, dest);
}

void MacroAssembler::maxInt32x4(FloatRegister lhs, const SimdConstant& rhs,
                                FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxsd,
                &MacroAssembler::vpmaxsdSimd128);
}

void MacroAssembler::unsignedMaxInt32x4(FloatRegister lhs, FloatRegister rhs,
                                        FloatRegister dest) {
  vpmaxud(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedMaxInt32x4(FloatRegister lhs,
                                        const SimdConstant& rhs,
                                        FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaxud,
                &MacroAssembler::vpmaxudSimd128);
}

// Lane-wise integer rounding average

void MacroAssembler::unsignedAverageInt8x16(FloatRegister lhs,
                                            FloatRegister rhs,
                                            FloatRegister dest) {
  vpavgb(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedAverageInt16x8(FloatRegister lhs,
                                            FloatRegister rhs,
                                            FloatRegister dest) {
  vpavgw(Operand(rhs), lhs, dest);
}

// Lane-wise integer absolute value

void MacroAssembler::absInt8x16(FloatRegister src, FloatRegister dest) {
  vpabsb(Operand(src), dest);
}

void MacroAssembler::absInt16x8(FloatRegister src, FloatRegister dest) {
  vpabsw(Operand(src), dest);
}

void MacroAssembler::absInt32x4(FloatRegister src, FloatRegister dest) {
  vpabsd(Operand(src), dest);
}

void MacroAssembler::absInt64x2(FloatRegister src, FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  signReplicationInt64x2(src, scratch);
  src = moveSimd128IntIfNotAVX(src, dest);
  vpxor(Operand(scratch), src, dest);
  vpsubq(Operand(scratch), dest, dest);
}

// Left shift by scalar

void MacroAssembler::leftShiftInt8x16(Register rhs, FloatRegister lhsDest,
                                      FloatRegister temp) {
  MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(lhsDest, rhs, temp,
                                                          lhsDest);
}

void MacroAssembler::leftShiftInt8x16(Imm32 count, FloatRegister src,
                                      FloatRegister dest) {
  MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(count, src, dest);
}

void MacroAssembler::leftShiftInt16x8(Register rhs, FloatRegister lhsDest) {
  MacroAssemblerX86Shared::packedLeftShiftByScalarInt16x8(lhsDest, rhs,
                                                          lhsDest);
}

void MacroAssembler::leftShiftInt16x8(Imm32 count, FloatRegister src,
                                      FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsllw(count, src, dest);
}

void MacroAssembler::leftShiftInt32x4(Register rhs, FloatRegister lhsDest) {
  MacroAssemblerX86Shared::packedLeftShiftByScalarInt32x4(lhsDest, rhs,
                                                          lhsDest);
}

void MacroAssembler::leftShiftInt32x4(Imm32 count, FloatRegister src,
                                      FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpslld(count, src, dest);
}

void MacroAssembler::leftShiftInt64x2(Register rhs, FloatRegister lhsDest) {
  MacroAssemblerX86Shared::packedLeftShiftByScalarInt64x2(lhsDest, rhs,
                                                          lhsDest);
}

void MacroAssembler::leftShiftInt64x2(Imm32 count, FloatRegister src,
                                      FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsllq(count, src, dest);
}

// Right shift by scalar

void MacroAssembler::rightShiftInt8x16(Register rhs, FloatRegister lhsDest,
                                       FloatRegister temp) {
  MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16(lhsDest, rhs, temp,
                                                           lhsDest);
}

void MacroAssembler::rightShiftInt8x16(Imm32 count, FloatRegister src,
                                       FloatRegister dest) {
  MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16(count, src, dest);
}

void MacroAssembler::unsignedRightShiftInt8x16(Register rhs,
                                               FloatRegister lhsDest,
                                               FloatRegister temp) {
  MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16(
      lhsDest, rhs, temp, lhsDest);
}

void MacroAssembler::unsignedRightShiftInt8x16(Imm32 count, FloatRegister src,
                                               FloatRegister dest) {
  MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16(count, src,
                                                                   dest);
}

void MacroAssembler::rightShiftInt16x8(Register rhs, FloatRegister lhsDest) {
  MacroAssemblerX86Shared::packedRightShiftByScalarInt16x8(lhsDest, rhs,
                                                           lhsDest);
}

void MacroAssembler::rightShiftInt16x8(Imm32 count, FloatRegister src,
                                       FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsraw(count, src, dest);
}

void MacroAssembler::unsignedRightShiftInt16x8(Register rhs,
                                               FloatRegister lhsDest) {
  MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt16x8(lhsDest, rhs,
                                                                   lhsDest);
}

void MacroAssembler::unsignedRightShiftInt16x8(Imm32 count, FloatRegister src,
                                               FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsrlw(count, src, dest);
}

void MacroAssembler::rightShiftInt32x4(Register rhs, FloatRegister lhsDest) {
  MacroAssemblerX86Shared::packedRightShiftByScalarInt32x4(lhsDest, rhs,
                                                           lhsDest);
}

void MacroAssembler::rightShiftInt32x4(Imm32 count, FloatRegister src,
                                       FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsrad(count, src, dest);
}

void MacroAssembler::unsignedRightShiftInt32x4(Register rhs,
                                               FloatRegister lhsDest) {
  MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt32x4(lhsDest, rhs,
                                                                   lhsDest);
}

void MacroAssembler::unsignedRightShiftInt32x4(Imm32 count, FloatRegister src,
                                               FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsrld(count, src, dest);
}

void MacroAssembler::rightShiftInt64x2(Register rhs, FloatRegister lhsDest,
                                       FloatRegister temp) {
  MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2(lhsDest, rhs, temp,
                                                           lhsDest);
}

void MacroAssembler::rightShiftInt64x2(Imm32 count, FloatRegister src,
                                       FloatRegister dest) {
  MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2(count, src, dest);
}

void MacroAssembler::unsignedRightShiftInt64x2(Register rhs,
                                               FloatRegister lhsDest) {
  MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt64x2(lhsDest, rhs,
                                                                   lhsDest);
}

void MacroAssembler::unsignedRightShiftInt64x2(Imm32 count, FloatRegister src,
                                               FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsrlq(count, src, dest);
}

// Sign replication operation

void MacroAssembler::signReplicationInt8x16(FloatRegister src,
                                            FloatRegister dest) {
  MOZ_ASSERT(src != dest);
  vpxor(Operand(dest), dest, dest);
  vpcmpgtb(Operand(src), dest, dest);
}

void MacroAssembler::signReplicationInt16x8(FloatRegister src,
                                            FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsraw(Imm32(15), src, dest);
}

void MacroAssembler::signReplicationInt32x4(FloatRegister src,
                                            FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpsrad(Imm32(31), src, dest);
}

void MacroAssembler::signReplicationInt64x2(FloatRegister src,
                                            FloatRegister dest) {
  vpshufd(ComputeShuffleMask(1, 1, 3, 3), src, dest);
  vpsrad(Imm32(31), dest, dest);
}

// Bitwise and, or, xor, not

void MacroAssembler::bitwiseAndSimd128(FloatRegister rhs,
                                       FloatRegister lhsDest) {
  vpand(Operand(rhs), lhsDest, lhsDest);
}

void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs,
                                       FloatRegister dest) {
  vpand(Operand(rhs), lhs, dest);
}

void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs,
                                       const SimdConstant& rhs,
                                       FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpand,
                &MacroAssembler::vpandSimd128);
}

void MacroAssembler::bitwiseOrSimd128(FloatRegister rhs,
                                      FloatRegister lhsDest) {
  vpor(Operand(rhs), lhsDest, lhsDest);
}

void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs,
                                      FloatRegister dest) {
  vpor(Operand(rhs), lhs, dest);
}

void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs,
                                      const SimdConstant& rhs,
                                      FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpor,
                &MacroAssembler::vporSimd128);
}

void MacroAssembler::bitwiseXorSimd128(FloatRegister rhs,
                                       FloatRegister lhsDest) {
  vpxor(Operand(rhs), lhsDest, lhsDest);
}

void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs,
                                       FloatRegister dest) {
  vpxor(Operand(rhs), lhs, dest);
}

void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs,
                                       const SimdConstant& rhs,
                                       FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpxor,
                &MacroAssembler::vpxorSimd128);
}

void MacroAssembler::bitwiseNotSimd128(FloatRegister src, FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  bitwiseXorSimd128(src, SimdConstant::SplatX16(-1), dest);
}

// Bitwise and-not

void MacroAssembler::bitwiseNotAndSimd128(FloatRegister rhs,
                                          FloatRegister lhsDest) {
  vpandn(Operand(rhs), lhsDest, lhsDest);
}

void MacroAssembler::bitwiseNotAndSimd128(FloatRegister lhs, FloatRegister rhs,
                                          FloatRegister dest) {
  vpandn(Operand(rhs), lhs, dest);
}

// Bitwise select

void MacroAssembler::bitwiseSelectSimd128(FloatRegister mask,
                                          FloatRegister onTrue,
                                          FloatRegister onFalse,
                                          FloatRegister dest,
                                          FloatRegister temp) {
  MacroAssemblerX86Shared::selectSimd128(mask, onTrue, onFalse, temp, dest);
}

// Population count

void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest,
                                   FloatRegister temp) {
  MacroAssemblerX86Shared::popcntInt8x16(src, temp, dest);
}

// Comparisons (integer and floating-point)

void MacroAssembler::compareInt8x16(Assembler::Condition cond,
                                    FloatRegister rhs, FloatRegister lhsDest) {
  MacroAssemblerX86Shared::compareInt8x16(lhsDest, Operand(rhs), cond, lhsDest);
}

void MacroAssembler::compareInt8x16(Assembler::Condition cond,
                                    FloatRegister lhs, FloatRegister rhs,
                                    FloatRegister dest) {
  MacroAssemblerX86Shared::compareInt8x16(lhs, Operand(rhs), cond, dest);
}

void MacroAssembler::compareInt8x16(Assembler::Condition cond,
                                    FloatRegister lhs, const SimdConstant& rhs,
                                    FloatRegister dest) {
  MOZ_ASSERT(cond != Assembler::Condition::LessThan &&
             cond != Assembler::Condition::GreaterThanOrEqual);
  MacroAssemblerX86Shared::compareInt8x16(cond, lhs, rhs, dest);
}

void MacroAssembler::compareInt16x8(Assembler::Condition cond,
                                    FloatRegister rhs, FloatRegister lhsDest) {
  MacroAssemblerX86Shared::compareInt16x8(lhsDest, Operand(rhs), cond, lhsDest);
}

void MacroAssembler::compareInt16x8(Assembler::Condition cond,
                                    FloatRegister lhs, FloatRegister rhs,
                                    FloatRegister dest) {
  MacroAssemblerX86Shared::compareInt16x8(lhs, Operand(rhs), cond, dest);
}

void MacroAssembler::compareInt16x8(Assembler::Condition cond,
                                    FloatRegister lhs, const SimdConstant& rhs,
                                    FloatRegister dest) {
  MOZ_ASSERT(cond != Assembler::Condition::LessThan &&
             cond != Assembler::Condition::GreaterThanOrEqual);
  MacroAssemblerX86Shared::compareInt16x8(cond, lhs, rhs, dest);
}

void MacroAssembler::compareInt32x4(Assembler::Condition cond,
                                    FloatRegister rhs, FloatRegister lhsDest) {
  MacroAssemblerX86Shared::compareInt32x4(lhsDest, Operand(rhs), cond, lhsDest);
}

void MacroAssembler::compareInt32x4(Assembler::Condition cond,
                                    FloatRegister lhs, FloatRegister rhs,
                                    FloatRegister dest) {
  MacroAssemblerX86Shared::compareInt32x4(lhs, Operand(rhs), cond, dest);
}

void MacroAssembler::compareInt32x4(Assembler::Condition cond,
                                    FloatRegister lhs, const SimdConstant& rhs,
                                    FloatRegister dest) {
  MOZ_ASSERT(cond != Assembler::Condition::LessThan &&
             cond != Assembler::Condition::GreaterThanOrEqual);
  MacroAssemblerX86Shared::compareInt32x4(cond, lhs, rhs, dest);
}

void MacroAssembler::compareForEqualityInt64x2(Assembler::Condition cond,
                                               FloatRegister lhs,
                                               FloatRegister rhs,
                                               FloatRegister dest) {
  MacroAssemblerX86Shared::compareForEqualityInt64x2(lhs, Operand(rhs), cond,
                                                     dest);
}

void MacroAssembler::compareForOrderingInt64x2(
    Assembler::Condition cond, FloatRegister lhs, FloatRegister rhs,
    FloatRegister dest, FloatRegister temp1, FloatRegister temp2) {
  if (HasAVX() && HasSSE42()) {
    MacroAssemblerX86Shared::compareForOrderingInt64x2AVX(lhs, rhs, cond, dest);
  } else {
    MacroAssemblerX86Shared::compareForOrderingInt64x2(lhs, Operand(rhs), cond,
                                                       temp1, temp2, dest);
  }
}

void MacroAssembler::compareFloat32x4(Assembler::Condition cond,
                                      FloatRegister rhs,
                                      FloatRegister lhsDest) {
  // Code in the SIMD implementation allows operands to be reversed like this,
  // this benefits the baseline compiler.  Ion takes care of the reversing
  // itself and never generates GT/GE.
  if (cond == Assembler::GreaterThan) {
    MacroAssemblerX86Shared::compareFloat32x4(rhs, Operand(lhsDest),
                                              Assembler::LessThan, lhsDest);
  } else if (cond == Assembler::GreaterThanOrEqual) {
    MacroAssemblerX86Shared::compareFloat32x4(
        rhs, Operand(lhsDest), Assembler::LessThanOrEqual, lhsDest);
  } else {
    MacroAssemblerX86Shared::compareFloat32x4(lhsDest, Operand(rhs), cond,
                                              lhsDest);
  }
}

void MacroAssembler::compareFloat32x4(Assembler::Condition cond,
                                      FloatRegister lhs, FloatRegister rhs,
                                      FloatRegister dest) {
  MacroAssemblerX86Shared::compareFloat32x4(lhs, Operand(rhs), cond, dest);
}

void MacroAssembler::compareFloat32x4(Assembler::Condition cond,
                                      FloatRegister lhs,
                                      const SimdConstant& rhs,
                                      FloatRegister dest) {
  MOZ_ASSERT(cond != Assembler::Condition::GreaterThan &&
             cond != Assembler::Condition::GreaterThanOrEqual);
  MacroAssemblerX86Shared::compareFloat32x4(cond, lhs, rhs, dest);
}

void MacroAssembler::compareFloat64x2(Assembler::Condition cond,
                                      FloatRegister rhs,
                                      FloatRegister lhsDest) {
  compareFloat64x2(cond, lhsDest, rhs, lhsDest);
}

void MacroAssembler::compareFloat64x2(Assembler::Condition cond,
                                      FloatRegister lhs, FloatRegister rhs,
                                      FloatRegister dest) {
  // Code in the SIMD implementation allows operands to be reversed like this,
  // this benefits the baseline compiler.  Ion takes care of the reversing
  // itself and never generates GT/GE.
  if (cond == Assembler::GreaterThan) {
    MacroAssemblerX86Shared::compareFloat64x2(rhs, Operand(lhs),
                                              Assembler::LessThan, dest);
  } else if (cond == Assembler::GreaterThanOrEqual) {
    MacroAssemblerX86Shared::compareFloat64x2(rhs, Operand(lhs),
                                              Assembler::LessThanOrEqual, dest);
  } else {
    MacroAssemblerX86Shared::compareFloat64x2(lhs, Operand(rhs), cond, dest);
  }
}

void MacroAssembler::compareFloat64x2(Assembler::Condition cond,
                                      FloatRegister lhs,
                                      const SimdConstant& rhs,
                                      FloatRegister dest) {
  MOZ_ASSERT(cond != Assembler::Condition::GreaterThan &&
             cond != Assembler::Condition::GreaterThanOrEqual);
  MacroAssemblerX86Shared::compareFloat64x2(cond, lhs, rhs, dest);
}

// Load.  See comments above regarding integer operation.

void MacroAssembler::loadUnalignedSimd128(const Operand& src,
                                          FloatRegister dest) {
  loadUnalignedSimd128Int(src, dest);
}

FaultingCodeOffset MacroAssembler::loadUnalignedSimd128(const Address& src,
                                                        FloatRegister dest) {
  return loadUnalignedSimd128Int(src, dest);
}

FaultingCodeOffset MacroAssembler::loadUnalignedSimd128(const BaseIndex& src,
                                                        FloatRegister dest) {
  return loadUnalignedSimd128Int(src, dest);
}

// Store.  See comments above regarding integer operation.

FaultingCodeOffset MacroAssembler::storeUnalignedSimd128(FloatRegister src,
                                                         const Address& dest) {
  return storeUnalignedSimd128Int(src, dest);
}

FaultingCodeOffset MacroAssembler::storeUnalignedSimd128(
    FloatRegister src, const BaseIndex& dest) {
  return storeUnalignedSimd128Int(src, dest);
}

// Floating point negation

void MacroAssembler::negFloat32x4(FloatRegister src, FloatRegister dest) {
  src = moveSimd128FloatIfNotAVX(src, dest);
  bitwiseXorSimd128(src, SimdConstant::SplatX4(-0.f), dest);
}

void MacroAssembler::negFloat64x2(FloatRegister src, FloatRegister dest) {
  src = moveSimd128FloatIfNotAVX(src, dest);
  bitwiseXorSimd128(src, SimdConstant::SplatX2(-0.0), dest);
}

// Floating point absolute value

void MacroAssembler::absFloat32x4(FloatRegister src, FloatRegister dest) {
  src = moveSimd128FloatIfNotAVX(src, dest);
  bitwiseAndSimd128(src, SimdConstant::SplatX4(0x7FFFFFFF), dest);
}

void MacroAssembler::absFloat64x2(FloatRegister src, FloatRegister dest) {
  src = moveSimd128FloatIfNotAVX(src, dest);
  bitwiseAndSimd128(src, SimdConstant::SplatX2(int64_t(0x7FFFFFFFFFFFFFFFll)),
                    dest);
}

// NaN-propagating minimum

void MacroAssembler::minFloat32x4(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest, FloatRegister temp1,
                                  FloatRegister temp2) {
  MacroAssemblerX86Shared::minFloat32x4(lhs, rhs, temp1, temp2, dest);
}

void MacroAssembler::minFloat64x2(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest, FloatRegister temp1,
                                  FloatRegister temp2) {
  MacroAssemblerX86Shared::minFloat64x2(lhs, rhs, temp1, temp2, dest);
}

// NaN-propagating maximum

void MacroAssembler::maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest, FloatRegister temp1,
                                  FloatRegister temp2) {
  MacroAssemblerX86Shared::maxFloat32x4(lhs, rhs, temp1, temp2, dest);
}

void MacroAssembler::maxFloat64x2(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest, FloatRegister temp1,
                                  FloatRegister temp2) {
  MacroAssemblerX86Shared::maxFloat64x2(lhs, rhs, temp1, temp2, dest);
}

// Compare-based minimum

void MacroAssembler::pseudoMinFloat32x4(FloatRegister rhsOrRhsDest,
                                        FloatRegister lhsOrLhsDest) {
  // Shut up the linter by using the same names as in the declaration, then
  // aliasing here.
  FloatRegister rhsDest = rhsOrRhsDest;
  FloatRegister lhs = lhsOrLhsDest;
  vminps(Operand(lhs), rhsDest, rhsDest);
}

void MacroAssembler::pseudoMinFloat32x4(FloatRegister lhs, FloatRegister rhs,
                                        FloatRegister dest) {
  vminps(Operand(rhs), lhs, dest);
}

void MacroAssembler::pseudoMinFloat64x2(FloatRegister rhsOrRhsDest,
                                        FloatRegister lhsOrLhsDest) {
  FloatRegister rhsDest = rhsOrRhsDest;
  FloatRegister lhs = lhsOrLhsDest;
  vminpd(Operand(lhs), rhsDest, rhsDest);
}

void MacroAssembler::pseudoMinFloat64x2(FloatRegister lhs, FloatRegister rhs,
                                        FloatRegister dest) {
  vminpd(Operand(rhs), lhs, dest);
}

// Compare-based maximum

void MacroAssembler::pseudoMaxFloat32x4(FloatRegister rhsOrRhsDest,
                                        FloatRegister lhsOrLhsDest) {
  FloatRegister rhsDest = rhsOrRhsDest;
  FloatRegister lhs = lhsOrLhsDest;
  vmaxps(Operand(lhs), rhsDest, rhsDest);
}

void MacroAssembler::pseudoMaxFloat32x4(FloatRegister lhs, FloatRegister rhs,
                                        FloatRegister dest) {
  vmaxps(Operand(rhs), lhs, dest);
}

void MacroAssembler::pseudoMaxFloat64x2(FloatRegister rhsOrRhsDest,
                                        FloatRegister lhsOrLhsDest) {
  FloatRegister rhsDest = rhsOrRhsDest;
  FloatRegister lhs = lhsOrLhsDest;
  vmaxpd(Operand(lhs), rhsDest, rhsDest);
}

void MacroAssembler::pseudoMaxFloat64x2(FloatRegister lhs, FloatRegister rhs,
                                        FloatRegister dest) {
  vmaxpd(Operand(rhs), lhs, dest);
}

// Widening/pairwise integer dot product

void MacroAssembler::widenDotInt16x8(FloatRegister lhs, FloatRegister rhs,
                                     FloatRegister dest) {
  vpmaddwd(Operand(rhs), lhs, dest);
}

void MacroAssembler::widenDotInt16x8(FloatRegister lhs, const SimdConstant& rhs,
                                     FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpmaddwd,
                &MacroAssembler::vpmaddwdSimd128);
}

void MacroAssembler::dotInt8x16Int7x16(FloatRegister lhs, FloatRegister rhs,
                                       FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  if (lhs == dest && !HasAVX()) {
    moveSimd128Int(lhs, scratch);
    lhs = scratch;
  }
  rhs = moveSimd128IntIfNotAVX(rhs, dest);
  vpmaddubsw(lhs, rhs, dest);
}

void MacroAssembler::dotInt8x16Int7x16ThenAdd(FloatRegister lhs,
                                              FloatRegister rhs,
                                              FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  rhs = moveSimd128IntIfNotAVX(rhs, scratch);
  vpmaddubsw(lhs, rhs, scratch);
  vpmaddwdSimd128(SimdConstant::SplatX8(1), scratch, scratch);
  vpaddd(Operand(scratch), dest, dest);
}

// Rounding

void MacroAssembler::ceilFloat32x4(FloatRegister src, FloatRegister dest) {
  vroundps(Assembler::SSERoundingMode::Ceil, Operand(src), dest);
}

void MacroAssembler::ceilFloat64x2(FloatRegister src, FloatRegister dest) {
  vroundpd(Assembler::SSERoundingMode::Ceil, Operand(src), dest);
}

void MacroAssembler::floorFloat32x4(FloatRegister src, FloatRegister dest) {
  vroundps(Assembler::SSERoundingMode::Floor, Operand(src), dest);
}

void MacroAssembler::floorFloat64x2(FloatRegister src, FloatRegister dest) {
  vroundpd(Assembler::SSERoundingMode::Floor, Operand(src), dest);
}

void MacroAssembler::truncFloat32x4(FloatRegister src, FloatRegister dest) {
  vroundps(Assembler::SSERoundingMode::Trunc, Operand(src), dest);
}

void MacroAssembler::truncFloat64x2(FloatRegister src, FloatRegister dest) {
  vroundpd(Assembler::SSERoundingMode::Trunc, Operand(src), dest);
}

void MacroAssembler::nearestFloat32x4(FloatRegister src, FloatRegister dest) {
  vroundps(Assembler::SSERoundingMode::Nearest, Operand(src), dest);
}

void MacroAssembler::nearestFloat64x2(FloatRegister src, FloatRegister dest) {
  vroundpd(Assembler::SSERoundingMode::Nearest, Operand(src), dest);
}

// Floating add

void MacroAssembler::addFloat32x4(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest) {
  vaddps(Operand(rhs), lhs, dest);
}

void MacroAssembler::addFloat32x4(FloatRegister lhs, const SimdConstant& rhs,
                                  FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vaddps,
                &MacroAssembler::vaddpsSimd128);
}

void MacroAssembler::addFloat64x2(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest) {
  vaddpd(Operand(rhs), lhs, dest);
}

void MacroAssembler::addFloat64x2(FloatRegister lhs, const SimdConstant& rhs,
                                  FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vaddpd,
                &MacroAssembler::vaddpdSimd128);
}

// Floating subtract

void MacroAssembler::subFloat32x4(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest) {
  vsubps(Operand(rhs), lhs, dest);
}

void MacroAssembler::subFloat32x4(FloatRegister lhs, const SimdConstant& rhs,
                                  FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vsubps,
                &MacroAssembler::vsubpsSimd128);
}

void MacroAssembler::subFloat64x2(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest) {
  AssemblerX86Shared::vsubpd(Operand(rhs), lhs, dest);
}

void MacroAssembler::subFloat64x2(FloatRegister lhs, const SimdConstant& rhs,
                                  FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vsubpd,
                &MacroAssembler::vsubpdSimd128);
}

// Floating division

void MacroAssembler::divFloat32x4(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest) {
  vdivps(Operand(rhs), lhs, dest);
}

void MacroAssembler::divFloat32x4(FloatRegister lhs, const SimdConstant& rhs,
                                  FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vdivps,
                &MacroAssembler::vdivpsSimd128);
}

void MacroAssembler::divFloat64x2(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest) {
  vdivpd(Operand(rhs), lhs, dest);
}

void MacroAssembler::divFloat64x2(FloatRegister lhs, const SimdConstant& rhs,
                                  FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vdivpd,
                &MacroAssembler::vdivpdSimd128);
}

// Floating Multiply

void MacroAssembler::mulFloat32x4(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest) {
  vmulps(Operand(rhs), lhs, dest);
}

void MacroAssembler::mulFloat32x4(FloatRegister lhs, const SimdConstant& rhs,
                                  FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vmulps,
                &MacroAssembler::vmulpsSimd128);
}

void MacroAssembler::mulFloat64x2(FloatRegister lhs, FloatRegister rhs,
                                  FloatRegister dest) {
  vmulpd(Operand(rhs), lhs, dest);
}

void MacroAssembler::mulFloat64x2(FloatRegister lhs, const SimdConstant& rhs,
                                  FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vmulpd,
                &MacroAssembler::vmulpdSimd128);
}

// Pairwise add

void MacroAssembler::extAddPairwiseInt8x16(FloatRegister src,
                                           FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  if (dest == src) {
    moveSimd128(src, scratch);
    src = scratch;
  }
  loadConstantSimd128Int(SimdConstant::SplatX16(1), dest);
  vpmaddubsw(src, dest, dest);
}

void MacroAssembler::unsignedExtAddPairwiseInt8x16(FloatRegister src,
                                                   FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpmaddubswSimd128(SimdConstant::SplatX16(1), src, dest);
}

void MacroAssembler::extAddPairwiseInt16x8(FloatRegister src,
                                           FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpmaddwdSimd128(SimdConstant::SplatX8(1), src, dest);
}

void MacroAssembler::unsignedExtAddPairwiseInt16x8(FloatRegister src,
                                                   FloatRegister dest) {
  src = moveSimd128IntIfNotAVX(src, dest);
  vpxorSimd128(SimdConstant::SplatX8(-0x8000), src, dest);
  vpmaddwdSimd128(SimdConstant::SplatX8(1), dest, dest);
  vpadddSimd128(SimdConstant::SplatX4(0x00010000), dest, dest);
}

// Floating square root

void MacroAssembler::sqrtFloat32x4(FloatRegister src, FloatRegister dest) {
  vsqrtps(Operand(src), dest);
}

void MacroAssembler::sqrtFloat64x2(FloatRegister src, FloatRegister dest) {
  vsqrtpd(Operand(src), dest);
}

// Integer to floating point with rounding

void MacroAssembler::convertInt32x4ToFloat32x4(FloatRegister src,
                                               FloatRegister dest) {
  vcvtdq2ps(src, dest);
}

void MacroAssembler::unsignedConvertInt32x4ToFloat32x4(FloatRegister src,
                                                       FloatRegister dest) {
  MacroAssemblerX86Shared::unsignedConvertInt32x4ToFloat32x4(src, dest);
}

void MacroAssembler::convertInt32x4ToFloat64x2(FloatRegister src,
                                               FloatRegister dest) {
  vcvtdq2pd(src, dest);
}

void MacroAssembler::unsignedConvertInt32x4ToFloat64x2(FloatRegister src,
                                                       FloatRegister dest) {
  MacroAssemblerX86Shared::unsignedConvertInt32x4ToFloat64x2(src, dest);
}

// Floating point to integer with saturation

void MacroAssembler::truncSatFloat32x4ToInt32x4(FloatRegister src,
                                                FloatRegister dest) {
  MacroAssemblerX86Shared::truncSatFloat32x4ToInt32x4(src, dest);
}

void MacroAssembler::unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src,
                                                        FloatRegister dest,
                                                        FloatRegister temp) {
  MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4(src, temp, dest);
}

void MacroAssembler::truncSatFloat64x2ToInt32x4(FloatRegister src,
                                                FloatRegister dest,
                                                FloatRegister temp) {
  MacroAssemblerX86Shared::truncSatFloat64x2ToInt32x4(src, temp, dest);
}

void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src,
                                                        FloatRegister dest,
                                                        FloatRegister temp) {
  MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4(src, temp, dest);
}

void MacroAssembler::truncFloat32x4ToInt32x4Relaxed(FloatRegister src,
                                                    FloatRegister dest) {
  vcvttps2dq(src, dest);
}

void MacroAssembler::unsignedTruncFloat32x4ToInt32x4Relaxed(
    FloatRegister src, FloatRegister dest) {
  MacroAssemblerX86Shared::unsignedTruncFloat32x4ToInt32x4Relaxed(src, dest);
}

void MacroAssembler::truncFloat64x2ToInt32x4Relaxed(FloatRegister src,
                                                    FloatRegister dest) {
  vcvttpd2dq(src, dest);
}

void MacroAssembler::unsignedTruncFloat64x2ToInt32x4Relaxed(
    FloatRegister src, FloatRegister dest) {
  MacroAssemblerX86Shared::unsignedTruncFloat64x2ToInt32x4Relaxed(src, dest);
}

// Floating point widening

void MacroAssembler::convertFloat64x2ToFloat32x4(FloatRegister src,
                                                 FloatRegister dest) {
  vcvtpd2ps(src, dest);
}

void MacroAssembler::convertFloat32x4ToFloat64x2(FloatRegister src,
                                                 FloatRegister dest) {
  vcvtps2pd(src, dest);
}

// Integer to integer narrowing

void MacroAssembler::narrowInt16x8(FloatRegister lhs, FloatRegister rhs,
                                   FloatRegister dest) {
  vpacksswb(Operand(rhs), lhs, dest);
}

void MacroAssembler::narrowInt16x8(FloatRegister lhs, const SimdConstant& rhs,
                                   FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpacksswb,
                &MacroAssembler::vpacksswbSimd128);
}

void MacroAssembler::unsignedNarrowInt16x8(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpackuswb(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedNarrowInt16x8(FloatRegister lhs,
                                           const SimdConstant& rhs,
                                           FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpackuswb,
                &MacroAssembler::vpackuswbSimd128);
}

void MacroAssembler::narrowInt32x4(FloatRegister lhs, FloatRegister rhs,
                                   FloatRegister dest) {
  vpackssdw(Operand(rhs), lhs, dest);
}

void MacroAssembler::narrowInt32x4(FloatRegister lhs, const SimdConstant& rhs,
                                   FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpackssdw,
                &MacroAssembler::vpackssdwSimd128);
}

void MacroAssembler::unsignedNarrowInt32x4(FloatRegister lhs, FloatRegister rhs,
                                           FloatRegister dest) {
  vpackusdw(Operand(rhs), lhs, dest);
}

void MacroAssembler::unsignedNarrowInt32x4(FloatRegister lhs,
                                           const SimdConstant& rhs,
                                           FloatRegister dest) {
  binarySimd128(lhs, rhs, dest, &MacroAssembler::vpackusdw,
                &MacroAssembler::vpackusdwSimd128);
}

// Integer to integer widening

void MacroAssembler::widenLowInt8x16(FloatRegister src, FloatRegister dest) {
  vpmovsxbw(Operand(src), dest);
}

void MacroAssembler::widenHighInt8x16(FloatRegister src, FloatRegister dest) {
  vpalignr(Operand(src), dest, dest, 8);
  vpmovsxbw(Operand(dest), dest);
}

void MacroAssembler::unsignedWidenLowInt8x16(FloatRegister src,
                                             FloatRegister dest) {
  vpmovzxbw(Operand(src), dest);
}

void MacroAssembler::unsignedWidenHighInt8x16(FloatRegister src,
                                              FloatRegister dest) {
  vpalignr(Operand(src), dest, dest, 8);
  vpmovzxbw(Operand(dest), dest);
}

void MacroAssembler::widenLowInt16x8(FloatRegister src, FloatRegister dest) {
  vpmovsxwd(Operand(src), dest);
}

void MacroAssembler::widenHighInt16x8(FloatRegister src, FloatRegister dest) {
  vpalignr(Operand(src), dest, dest, 8);
  vpmovsxwd(Operand(dest), dest);
}

void MacroAssembler::unsignedWidenLowInt16x8(FloatRegister src,
                                             FloatRegister dest) {
  vpmovzxwd(Operand(src), dest);
}

void MacroAssembler::unsignedWidenHighInt16x8(FloatRegister src,
                                              FloatRegister dest) {
  vpalignr(Operand(src), dest, dest, 8);
  vpmovzxwd(Operand(dest), dest);
}

void MacroAssembler::widenLowInt32x4(FloatRegister src, FloatRegister dest) {
  vpmovsxdq(Operand(src), dest);
}

void MacroAssembler::unsignedWidenLowInt32x4(FloatRegister src,
                                             FloatRegister dest) {
  vpmovzxdq(Operand(src), dest);
}

void MacroAssembler::widenHighInt32x4(FloatRegister src, FloatRegister dest) {
  if (src == dest || HasAVX()) {
    vmovhlps(src, src, dest);
  } else {
    vpshufd(ComputeShuffleMask(2, 3, 2, 3), src, dest);
  }
  vpmovsxdq(Operand(dest), dest);
}

void MacroAssembler::unsignedWidenHighInt32x4(FloatRegister src,
                                              FloatRegister dest) {
  ScratchSimd128Scope scratch(*this);
  src = moveSimd128IntIfNotAVX(src, dest);
  vpxor(scratch, scratch, scratch);
  vpunpckhdq(scratch, src, dest);
}

// Floating multiply-accumulate: srcDest [+-]= src1 * src2
// The Intel FMA feature is some AVX* special sauce, no support yet.

void MacroAssembler::fmaFloat32x4(FloatRegister src1, FloatRegister src2,
                                  FloatRegister srcDest) {
  if (HasFMA()) {
    vfmadd231ps(src2, src1, srcDest);
    return;
  }
  ScratchSimd128Scope scratch(*this);
  src1 = moveSimd128FloatIfNotAVX(src1, scratch);
  mulFloat32x4(src1, src2, scratch);
  addFloat32x4(srcDest, scratch, srcDest);
}

void MacroAssembler::fnmaFloat32x4(FloatRegister src1, FloatRegister src2,
                                   FloatRegister srcDest) {
  if (HasFMA()) {
    vfnmadd231ps(src2, src1, srcDest);
    return;
  }
  ScratchSimd128Scope scratch(*this);
  src1 = moveSimd128FloatIfNotAVX(src1, scratch);
  mulFloat32x4(src1, src2, scratch);
  subFloat32x4(srcDest, scratch, srcDest);
}

void MacroAssembler::fmaFloat64x2(FloatRegister src1, FloatRegister src2,
                                  FloatRegister srcDest) {
  if (HasFMA()) {
    vfmadd231pd(src2, src1, srcDest);
    return;
  }
  ScratchSimd128Scope scratch(*this);
  src1 = moveSimd128FloatIfNotAVX(src1, scratch);
  mulFloat64x2(src1, src2, scratch);
  addFloat64x2(srcDest, scratch, srcDest);
}

void MacroAssembler::fnmaFloat64x2(FloatRegister src1, FloatRegister src2,
                                   FloatRegister srcDest) {
  if (HasFMA()) {
    vfnmadd231pd(src2, src1, srcDest);
    return;
  }
  ScratchSimd128Scope scratch(*this);
  src1 = moveSimd128FloatIfNotAVX(src1, scratch);
  mulFloat64x2(src1, src2, scratch);
  subFloat64x2(srcDest, scratch, srcDest);
}

void MacroAssembler::minFloat32x4Relaxed(FloatRegister src,
                                         FloatRegister srcDest) {
  vminps(Operand(src), srcDest, srcDest);
}

void MacroAssembler::minFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
                                         FloatRegister dest) {
  vminps(Operand(rhs), lhs, dest);
}

void MacroAssembler::maxFloat32x4Relaxed(FloatRegister src,
                                         FloatRegister srcDest) {
  vmaxps(Operand(src), srcDest, srcDest);
}

void MacroAssembler::maxFloat32x4Relaxed(FloatRegister lhs, FloatRegister rhs,
                                         FloatRegister dest) {
  vmaxps(Operand(rhs), lhs, dest);
}

void MacroAssembler::minFloat64x2Relaxed(FloatRegister src,
                                         FloatRegister srcDest) {
  vminpd(Operand(src), srcDest, srcDest);
}

void MacroAssembler::minFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
                                         FloatRegister dest) {
  vminpd(Operand(rhs), lhs, dest);
}

void MacroAssembler::maxFloat64x2Relaxed(FloatRegister src,
                                         FloatRegister srcDest) {
  vmaxpd(Operand(src), srcDest, srcDest);
}

void MacroAssembler::maxFloat64x2Relaxed(FloatRegister lhs, FloatRegister rhs,
                                         FloatRegister dest) {
  vmaxpd(Operand(rhs), lhs, dest);
}

// ========================================================================
// Truncate floating point.

void MacroAssembler::truncateFloat32ToInt64(Address src, Address dest,
                                            Register temp) {
  if (Assembler::HasSSE3()) {
    fld32(Operand(src));
    fisttp(Operand(dest));
    return;
  }

  if (src.base == esp) {
    src.offset += 2 * sizeof(int32_t);
  }
  if (dest.base == esp) {
    dest.offset += 2 * sizeof(int32_t);
  }

  reserveStack(2 * sizeof(int32_t));

  // Set conversion to truncation.
  fnstcw(Operand(esp, 0));
  load32(Operand(esp, 0), temp);
  andl(Imm32(~0xFF00), temp);
  orl(Imm32(0xCFF), temp);
  store32(temp, Address(esp, sizeof(int32_t)));
  fldcw(Operand(esp, sizeof(int32_t)));

  // Load double on fp stack, convert and load regular stack.
  fld32(Operand(src));
  fistp(Operand(dest));

  // Reset the conversion flag.
  fldcw(Operand(esp, 0));

  freeStack(2 * sizeof(int32_t));
}
void MacroAssembler::truncateDoubleToInt64(Address src, Address dest,
                                           Register temp) {
  if (Assembler::HasSSE3()) {
    fld(Operand(src));
    fisttp(Operand(dest));
    return;
  }

  if (src.base == esp) {
    src.offset += 2 * sizeof(int32_t);
  }
  if (dest.base == esp) {
    dest.offset += 2 * sizeof(int32_t);
  }

  reserveStack(2 * sizeof(int32_t));

  // Set conversion to truncation.
  fnstcw(Operand(esp, 0));
  load32(Operand(esp, 0), temp);
  andl(Imm32(~0xFF00), temp);
  orl(Imm32(0xCFF), temp);
  store32(temp, Address(esp, 1 * sizeof(int32_t)));
  fldcw(Operand(esp, 1 * sizeof(int32_t)));

  // Load double on fp stack, convert and load regular stack.
  fld(Operand(src));
  fistp(Operand(dest));

  // Reset the conversion flag.
  fldcw(Operand(esp, 0));

  freeStack(2 * sizeof(int32_t));
}

// ===============================================================
// Clamping functions.

void MacroAssembler::clampIntToUint8(Register reg) {
  Label inRange;
  branchTest32(Assembler::Zero, reg, Imm32(0xffffff00), &inRange);
  {
    sarl(Imm32(31), reg);
    notl(reg);
    andl(Imm32(255), reg);
  }
  bind(&inRange);
}

//}}} check_macroassembler_style
// ===============================================================

}  // namespace jit
}  // namespace js

#endif /* jit_x86_shared_MacroAssembler_x86_shared_inl_h */

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.41 Sekunden (vorverarbeitet am 2026-04-26) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.