Quelle Lowering-x86-shared.cpp Sprache: C

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "jit/x86-shared/Lowering-x86-shared.h"

#include "mozilla/MathAlgorithms.h"

#include "jit/Lowering.h"
#include "jit/MIR-wasm.h"
#include "jit/MIR.h"
#include "wasm/WasmFeatures.h"  // for wasm::ReportSimdAnalysis

#include "jit/shared/Lowering-shared-inl.h"

using namespace js;
using namespace js::jit;

using mozilla::Abs;
using mozilla::FloorLog2;
using mozilla::Maybe;
using mozilla::Nothing;
using mozilla::Some;

LTableSwitch* LIRGeneratorX86Shared::newLTableSwitch(
    const LAllocation& in, const LDefinition& inputCopy) {
  return new (alloc()) LTableSwitch(in, inputCopy, temp());
}

LTableSwitchV* LIRGeneratorX86Shared::newLTableSwitchV(
    const LBoxAllocation& in) {
  return new (alloc()) LTableSwitchV(in, temp(), tempDouble(), temp());
}

void LIRGenerator::visitPowHalf(MPowHalf* ins) {
  MDefinition* input = ins->input();
  MOZ_ASSERT(input->type() == MIRType::Double);
  LPowHalfD* lir = new (alloc()) LPowHalfD(useRegisterAtStart(input));
  define(lir, ins);
}

LUse LIRGeneratorX86Shared::useShiftRegister(MDefinition* mir) {
  // Unless BMI2 is available, the shift register must be ecx. x86 can't shift a
  // non-ecx register.
  if (Assembler::HasBMI2()) {
    return useRegister(mir);
  }
  return useFixed(mir, ecx);
}

LUse LIRGeneratorX86Shared::useShiftRegisterAtStart(MDefinition* mir) {
  // Unless BMI2 is available, the shift register must be ecx. x86 can't shift a
  // non-ecx register.
  if (Assembler::HasBMI2()) {
    return useRegisterAtStart(mir);
  }
  return useFixedAtStart(mir, ecx);
}

LDefinition LIRGeneratorX86Shared::tempShift() {
  // Unless BMI2 is available, the shift register must be ecx. x86 can't shift a
  // non-ecx register.
  if (Assembler::HasBMI2()) {
    return temp();
  }
  return tempFixed(ecx);
}

void LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins,
                                          MDefinition* mir, MDefinition* lhs,
                                          MDefinition* rhs) {
  ins->setOperand(0, useRegisterAtStart(lhs));

  if (rhs->isConstant()) {
    ins->setOperand(1, useOrConstantAtStart(rhs));
  } else if (!mir->isRotate()) {
    ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
                           ? useShiftRegister(rhs)
                           : useShiftRegisterAtStart(rhs));
  } else {
    ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
                           ? useFixed(rhs, ecx)
                           : useFixedAtStart(rhs, ecx));
  }

  defineReuseInput(ins, mir, 0);
}

template <class LInstr>
void LIRGeneratorX86Shared::lowerForShiftInt64(LInstr* ins, MDefinition* mir,
                                               MDefinition* lhs,
                                               MDefinition* rhs) {
  LAllocation rhsAlloc;
  if (rhs->isConstant()) {
    rhsAlloc = useOrConstantAtStart(rhs);
#ifdef JS_CODEGEN_X64
  } else if (std::is_same_v<LInstr, LShiftI64>) {
    rhsAlloc = useShiftRegister(rhs);
#endif
  } else {
    // The operands are int64, but we only care about the lower 32 bits of
    // the RHS. On 32-bit, the code below will load that part in ecx and
    // will discard the upper half.
    ensureDefined(rhs);
    LUse use(ecx);
    use.setVirtualRegister(rhs->virtualRegister());
    rhsAlloc = use;
  }

  if constexpr (std::is_same_v<LInstr, LShiftI64>) {
    ins->setLhs(useInt64RegisterAtStart(lhs));
    ins->setRhs(rhsAlloc);
    defineInt64ReuseInput(ins, mir, LShiftI64::LhsIndex);
  } else {
    ins->setInput(useInt64RegisterAtStart(lhs));
    ins->setCount(rhsAlloc);
#if defined(JS_NUNBOX32)
    ins->setTemp0(temp());
#endif
    defineInt64ReuseInput(ins, mir, LRotateI64::InputIndex);
  }
}

template void LIRGeneratorX86Shared::lowerForShiftInt64(LShiftI64* ins,
                                                        MDefinition* mir,
                                                        MDefinition* lhs,
                                                        MDefinition* rhs);
template void LIRGeneratorX86Shared::lowerForShiftInt64(LRotateI64* ins,
                                                        MDefinition* mir,
                                                        MDefinition* lhs,
                                                        MDefinition* rhs);

void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins,
                                        MDefinition* mir, MDefinition* input) {
  ins->setOperand(0, useRegisterAtStart(input));
  defineReuseInput(ins, mir, 0);
}

void LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins,
                                        MDefinition* mir, MDefinition* lhs,
                                        MDefinition* rhs) {
  ins->setOperand(0, useRegisterAtStart(lhs));
  ins->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
                         ? useOrConstant(rhs)
                         : useOrConstantAtStart(rhs));
  defineReuseInput(ins, mir, 0);
}

template <size_t Temps>
void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins,
                                        MDefinition* mir, MDefinition* lhs,
                                        MDefinition* rhs) {
  // Without AVX, we'll need to use the x86 encodings where one of the
  // inputs must be the same location as the output.
  if (!Assembler::HasAVX()) {
    ins->setOperand(0, useRegisterAtStart(lhs));
    ins->setOperand(
        1, willHaveDifferentLIRNodes(lhs, rhs) ? use(rhs) : useAtStart(rhs));
    defineReuseInput(ins, mir, 0);
  } else {
    ins->setOperand(0, useRegisterAtStart(lhs));
    ins->setOperand(1, useAtStart(rhs));
    define(ins, mir);
  }
}

template void LIRGeneratorX86Shared::lowerForFPU(
    LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs,
    MDefinition* rhs);
template void LIRGeneratorX86Shared::lowerForFPU(
    LInstructionHelper<1, 2, 1>* ins, MDefinition* mir, MDefinition* lhs,
    MDefinition* rhs);

void LIRGeneratorX86Shared::lowerNegI(MInstruction* ins, MDefinition* input) {
  defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(input)), ins, 0);
}

void LIRGeneratorX86Shared::lowerNegI64(MInstruction* ins, MDefinition* input) {
  defineInt64ReuseInput(new (alloc()) LNegI64(useInt64RegisterAtStart(input)),
                        ins, 0);
}

void LIRGenerator::visitAbs(MAbs* ins) {
  defineReuseInput(allocateAbs(ins, useRegisterAtStart(ins->input())), ins, 0);
}

void LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs,
                                      MDefinition* rhs) {
  // Note: If we need a negative zero check, lhs is used twice.
  LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
  LMulI* lir = new (alloc())
      LMulI(useRegisterAtStart(lhs),
            willHaveDifferentLIRNodes(lhs, rhs) ? useOrConstant(rhs)
                                                : useOrConstantAtStart(rhs),
            lhsCopy);
  if (mul->fallible()) {
    assignSnapshot(lir, mul->bailoutKind());
  }
  defineReuseInput(lir, mul, 0);
}

void LIRGeneratorX86Shared::lowerDivI(MDiv* div) {
  if (div->isUnsigned()) {
    lowerUDiv(div);
    return;
  }

  // Division instructions are slow. Division by constant denominators can be
  // rewritten to use other instructions.
  if (div->rhs()->isConstant()) {
    int32_t rhs = div->rhs()->toConstant()->toInt32();

    // Division by powers of two can be done by shifting, and division by
    // other numbers can be done by a reciprocal multiplication technique.
    int32_t shift = FloorLog2(Abs(rhs));
    if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
      LAllocation lhs = useRegisterAtStart(div->lhs());
      LDivPowTwoI* lir;
      // When truncated with maybe a non-zero remainder, we have to round the
      // result toward 0. This requires an extra register to round up/down
      // whether the left-hand-side is signed.
      bool needRoundNeg = div->canBeNegativeDividend() && div->isTruncated();
      if (!needRoundNeg) {
        // Numerator is unsigned, so does not need adjusting.
        lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
      } else {
        // Numerator might be signed, and needs adjusting, and an extra lhs copy
        // is needed to round the result of the integer division towards zero.
        lir = new (alloc())
            LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
      }
      if (div->fallible()) {
        assignSnapshot(lir, div->bailoutKind());
      }
      defineReuseInput(lir, div, 0);
      return;
    }
    if (rhs != 0) {
      LDivOrModConstantI* lir;
      lir = new (alloc())
          LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
      if (div->fallible()) {
        assignSnapshot(lir, div->bailoutKind());
      }
      defineFixed(lir, div, LAllocation(AnyRegister(edx)));
      return;
    }
  }

  LDivI* lir = new (alloc())
      LDivI(useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx));
  if (div->fallible()) {
    assignSnapshot(lir, div->bailoutKind());
  }
  defineFixed(lir, div, LAllocation(AnyRegister(eax)));
}

void LIRGeneratorX86Shared::lowerModI(MMod* mod) {
  if (mod->isUnsigned()) {
    lowerUMod(mod);
    return;
  }

  if (mod->rhs()->isConstant()) {
    int32_t rhs = mod->rhs()->toConstant()->toInt32();
    int32_t shift = FloorLog2(Abs(rhs));
    if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
      LModPowTwoI* lir =
          new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
      if (mod->fallible()) {
        assignSnapshot(lir, mod->bailoutKind());
      }
      defineReuseInput(lir, mod, 0);
      return;
    }
    if (rhs != 0) {
      LDivOrModConstantI* lir;
      lir = new (alloc())
          LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
      if (mod->fallible()) {
        assignSnapshot(lir, mod->bailoutKind());
      }
      defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
      return;
    }
  }

  LModI* lir = new (alloc())
      LModI(useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax));
  if (mod->fallible()) {
    assignSnapshot(lir, mod->bailoutKind());
  }
  defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
}

void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {
  switch (ins->type()) {
    case MIRType::Int32:
      defineReuseInput(new (alloc()) LNegI(useRegisterAtStart(ins->input())),
                       ins, 0);
      break;
    case MIRType::Float32:
      defineReuseInput(new (alloc()) LNegF(useRegisterAtStart(ins->input())),
                       ins, 0);
      break;
    case MIRType::Double:
      defineReuseInput(new (alloc()) LNegD(useRegisterAtStart(ins->input())),
                       ins, 0);
      break;
    default:
      MOZ_CRASH();
  }
}

void LIRGeneratorX86Shared::lowerWasmSelectI(MWasmSelect* select) {
  auto* lir = new (alloc())
      LWasmSelect(useRegisterAtStart(select->trueExpr()),
                  useAny(select->falseExpr()), useRegister(select->condExpr()));
  defineReuseInput(lir, select, LWasmSelect::TrueExprIndex);
}

void LIRGeneratorX86Shared::lowerWasmSelectI64(MWasmSelect* select) {
  auto* lir = new (alloc()) LWasmSelectI64(
      useInt64RegisterAtStart(select->trueExpr()),
      useInt64(select->falseExpr()), useRegister(select->condExpr()));
  defineInt64ReuseInput(lir, select, LWasmSelectI64::TrueExprIndex);
}

void LIRGenerator::visitAsmJSLoadHeap(MAsmJSLoadHeap* ins) {
  MDefinition* base = ins->base();
  MOZ_ASSERT(base->type() == MIRType::Int32);

  MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
  MOZ_ASSERT_IF(ins->needsBoundsCheck(),
                boundsCheckLimit->type() == MIRType::Int32);

  // For simplicity, require a register if we're going to emit a bounds-check
  // branch, so that we don't have special cases for constants. This should
  // only happen in rare constant-folding cases since asm.js sets the minimum
  // heap size based when accessed via constant.
  LAllocation baseAlloc = ins->needsBoundsCheck()
                              ? useRegisterAtStart(base)
                              : useRegisterOrZeroAtStart(base);

  LAllocation limitAlloc = ins->needsBoundsCheck()
                               ? useRegisterAtStart(boundsCheckLimit)
                               : LAllocation();
  LAllocation memoryBaseAlloc = ins->hasMemoryBase()
                                    ? useRegisterAtStart(ins->memoryBase())
                                    : LAllocation();

  auto* lir =
      new (alloc()) LAsmJSLoadHeap(baseAlloc, limitAlloc, memoryBaseAlloc);
  define(lir, ins);
}

void LIRGenerator::visitAsmJSStoreHeap(MAsmJSStoreHeap* ins) {
  MDefinition* base = ins->base();
  MOZ_ASSERT(base->type() == MIRType::Int32);

  MDefinition* boundsCheckLimit = ins->boundsCheckLimit();
  MOZ_ASSERT_IF(ins->needsBoundsCheck(),
                boundsCheckLimit->type() == MIRType::Int32);

  // For simplicity, require a register if we're going to emit a bounds-check
  // branch, so that we don't have special cases for constants. This should
  // only happen in rare constant-folding cases since asm.js sets the minimum
  // heap size based when accessed via constant.
  LAllocation baseAlloc = ins->needsBoundsCheck()
                              ? useRegisterAtStart(base)
                              : useRegisterOrZeroAtStart(base);

  LAllocation limitAlloc = ins->needsBoundsCheck()
                               ? useRegisterAtStart(boundsCheckLimit)
                               : LAllocation();
  LAllocation memoryBaseAlloc = ins->hasMemoryBase()
                                    ? useRegisterAtStart(ins->memoryBase())
                                    : LAllocation();

  LAsmJSStoreHeap* lir = nullptr;
  switch (ins->access().type()) {
    case Scalar::Int8:
    case Scalar::Uint8:
#ifdef JS_CODEGEN_X86
      // See comment for LIRGeneratorX86::useByteOpRegister.
      lir = new (alloc()) LAsmJSStoreHeap(
          baseAlloc, useFixed(ins->value(), eax), limitAlloc, memoryBaseAlloc);
      break;
#endif
    case Scalar::Int16:
    case Scalar::Uint16:
    case Scalar::Int32:
    case Scalar::Uint32:
    case Scalar::Float32:
    case Scalar::Float64:
      // For now, don't allow constant values. The immediate operand affects
      // instruction layout which affects patching.
      lir = new (alloc())
          LAsmJSStoreHeap(baseAlloc, useRegisterAtStart(ins->value()),
                          limitAlloc, memoryBaseAlloc);
      break;
    case Scalar::Int64:
    case Scalar::Simd128:
      MOZ_CRASH("NYI");
    case Scalar::Uint8Clamped:
    case Scalar::BigInt64:
    case Scalar::BigUint64:
    case Scalar::Float16:
    case Scalar::MaxTypedArrayViewType:
      MOZ_CRASH("unexpected array type");
  }
  add(lir, ins);
}

void LIRGeneratorX86Shared::lowerUDiv(MDiv* div) {
  if (div->rhs()->isConstant()) {
    // NOTE: the result of toInt32 is coerced to uint32_t.
    uint32_t rhs = div->rhs()->toConstant()->toInt32();
    int32_t shift = FloorLog2(rhs);

    LAllocation lhs = useRegisterAtStart(div->lhs());
    if (rhs != 0 && uint32_t(1) << shift == rhs) {
      LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, lhs, shift, false);
      if (div->fallible()) {
        assignSnapshot(lir, div->bailoutKind());
      }
      defineReuseInput(lir, div, 0);
    } else {
      LUDivOrModConstant* lir = new (alloc())
          LUDivOrModConstant(useRegister(div->lhs()), rhs, tempFixed(eax));
      if (div->fallible()) {
        assignSnapshot(lir, div->bailoutKind());
      }
      defineFixed(lir, div, LAllocation(AnyRegister(edx)));
    }
    return;
  }

  LUDivOrMod* lir = new (alloc()) LUDivOrMod(
      useRegister(div->lhs()), useRegister(div->rhs()), tempFixed(edx));
  if (div->fallible()) {
    assignSnapshot(lir, div->bailoutKind());
  }
  defineFixed(lir, div, LAllocation(AnyRegister(eax)));
}

void LIRGeneratorX86Shared::lowerUMod(MMod* mod) {
  if (mod->rhs()->isConstant()) {
    uint32_t rhs = mod->rhs()->toConstant()->toInt32();
    int32_t shift = FloorLog2(rhs);

    if (rhs != 0 && uint32_t(1) << shift == rhs) {
      LModPowTwoI* lir =
          new (alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
      if (mod->fallible()) {
        assignSnapshot(lir, mod->bailoutKind());
      }
      defineReuseInput(lir, mod, 0);
    } else {
      LUDivOrModConstant* lir = new (alloc())
          LUDivOrModConstant(useRegister(mod->lhs()), rhs, tempFixed(edx));
      if (mod->fallible()) {
        assignSnapshot(lir, mod->bailoutKind());
      }
      defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
    }
    return;
  }

  LUDivOrMod* lir = new (alloc()) LUDivOrMod(
      useRegister(mod->lhs()), useRegister(mod->rhs()), tempFixed(eax));
  if (mod->fallible()) {
    assignSnapshot(lir, mod->bailoutKind());
  }
  defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
}

void LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) {
  MDefinition* lhs = mir->lhs();
  MDefinition* rhs = mir->rhs();

  MOZ_ASSERT(lhs->type() == MIRType::Int32);
  MOZ_ASSERT(rhs->type() == MIRType::Int32);
  MOZ_ASSERT(mir->type() == MIRType::Double);

#ifdef JS_CODEGEN_X64
  static_assert(ecx == rcx);
#endif

  LUse lhsUse = useRegisterAtStart(lhs);
  LAllocation rhsAlloc;
  if (rhs->isConstant()) {
    rhsAlloc = useOrConstant(rhs);
  } else {
    rhsAlloc = useShiftRegister(rhs);
  }

  LUrshD* lir = new (alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
  define(lir, mir);
}

void LIRGeneratorX86Shared::lowerPowOfTwoI(MPow* mir) {
  int32_t base = mir->input()->toConstant()->toInt32();
  MDefinition* power = mir->power();

  auto* lir = new (alloc()) LPowOfTwoI(useShiftRegister(power), base);
  assignSnapshot(lir, mir->bailoutKind());
  define(lir, mir);
}

void LIRGeneratorX86Shared::lowerBigIntPtrLsh(MBigIntPtrLsh* ins) {
  auto* lir = new (alloc()) LBigIntPtrLsh(
      useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), tempShift());
  assignSnapshot(lir, ins->bailoutKind());
  define(lir, ins);
}

void LIRGeneratorX86Shared::lowerBigIntPtrRsh(MBigIntPtrRsh* ins) {
  auto* lir = new (alloc()) LBigIntPtrRsh(
      useRegister(ins->lhs()), useRegister(ins->rhs()), temp(), tempShift());
  assignSnapshot(lir, ins->bailoutKind());
  define(lir, ins);
}

void LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(
    MCompareExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
  MOZ_ASSERT(!Scalar::isFloatingType(ins->arrayType()));
  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);

  const LUse elements = useRegister(ins->elements());
  const LAllocation index =
      useRegisterOrIndexConstant(ins->index(), ins->arrayType());

  // If the target is a floating register then we need a temp at the
  // lower level; that temp must be eax.
  //
  // Otherwise the target (if used) is an integer register, which
  // must be eax.  If the target is not used the machine code will
  // still clobber eax, so just pretend it's used.
  //
  // oldval must be in a register.
  //
  // newval must be in a register.  If the source is a byte array
  // then newval must be a register that has a byte size: on x86
  // this must be ebx, ecx, or edx (eax is taken for the output).
  //
  // Bug #1077036 describes some further optimization opportunities.

  bool fixedOutput = false;
  LDefinition tempDef = LDefinition::BogusTemp();
  LAllocation newval;
  if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
    tempDef = tempFixed(eax);
    newval = useRegister(ins->newval());
  } else {
    fixedOutput = true;
    if (useI386ByteRegisters && ins->isByteArray()) {
      newval = useFixed(ins->newval(), ebx);
    } else {
      newval = useRegister(ins->newval());
    }
  }

  const LAllocation oldval = useRegister(ins->oldval());

  LCompareExchangeTypedArrayElement* lir =
      new (alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval,
                                                      newval, tempDef);

  if (fixedOutput) {
    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
  } else {
    define(lir, ins);
  }
}

void LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(
    MAtomicExchangeTypedArrayElement* ins, bool useI386ByteRegisters) {
  MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);

  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);

  const LUse elements = useRegister(ins->elements());
  const LAllocation index =
      useRegisterOrIndexConstant(ins->index(), ins->arrayType());
  const LAllocation value = useRegister(ins->value());

  // The underlying instruction is XCHG, which can operate on any
  // register.
  //
  // If the target is a floating register (for Uint32) then we need
  // a temp into which to exchange.
  //
  // If the source is a byte array then we need a register that has
  // a byte size; in this case -- on x86 only -- pin the output to
  // an appropriate register and use that as a temp in the back-end.

  LDefinition tempDef = LDefinition::BogusTemp();
  if (ins->arrayType() == Scalar::Uint32) {
    MOZ_ASSERT(ins->type() == MIRType::Double);
    tempDef = temp();
  }

  LAtomicExchangeTypedArrayElement* lir = new (alloc())
      LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);

  if (useI386ByteRegisters && ins->isByteArray()) {
    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
  } else {
    define(lir, ins);
  }
}

void LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(
    MAtomicTypedArrayElementBinop* ins, bool useI386ByteRegisters) {
  MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
  MOZ_ASSERT(!Scalar::isFloatingType(ins->arrayType()));
  MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
  MOZ_ASSERT(ins->index()->type() == MIRType::IntPtr);

  const LUse elements = useRegister(ins->elements());
  const LAllocation index =
      useRegisterOrIndexConstant(ins->index(), ins->arrayType());

  // Case 1: the result of the operation is not used.
  //
  // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
  // LOCK OR, or LOCK XOR.  We can do this even for the Uint32 case.

  if (ins->isForEffect()) {
    LAllocation value;
    if (useI386ByteRegisters && ins->isByteArray() &&
        !ins->value()->isConstant()) {
      value = useFixed(ins->value(), ebx);
    } else {
      value = useRegisterOrConstant(ins->value());
    }

    LAtomicTypedArrayElementBinopForEffect* lir = new (alloc())
        LAtomicTypedArrayElementBinopForEffect(elements, index, value);

    add(lir, ins);
    return;
  }

  // Case 2: the result of the operation is used.
  //
  // For ADD and SUB we'll use XADD:
  //
  //    movl       src, output
  //    lock xaddl output, mem
  //
  // For the 8-bit variants XADD needs a byte register for the output.
  //
  // For AND/OR/XOR we need to use a CMPXCHG loop:
  //
  //    movl          *mem, eax
  // L: mov           eax, temp
  //    andl          src, temp
  //    lock cmpxchg  temp, mem  ; reads eax also
  //    jnz           L
  //    ; result in eax
  //
  // Note the placement of L, cmpxchg will update eax with *mem if
  // *mem does not have the expected value, so reloading it at the
  // top of the loop would be redundant.
  //
  // If the array is not a uint32 array then:
  //  - eax should be the output (one result of the cmpxchg)
  //  - there is a temp, which must have a byte register if
  //    the array has 1-byte elements elements
  //
  // If the array is a uint32 array then:
  //  - eax is the first temp
  //  - we also need a second temp
  //
  // There are optimization opportunities:
  //  - better register allocation in the x86 8-bit case, Bug #1077036.

  bool bitOp =
      !(ins->operation() == AtomicOp::Add || ins->operation() == AtomicOp::Sub);
  bool fixedOutput = true;
  bool reuseInput = false;
  LDefinition tempDef1 = LDefinition::BogusTemp();
  LDefinition tempDef2 = LDefinition::BogusTemp();
  LAllocation value;

  if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
    value = useRegisterOrConstant(ins->value());
    fixedOutput = false;
    if (bitOp) {
      tempDef1 = tempFixed(eax);
      tempDef2 = temp();
    } else {
      tempDef1 = temp();
    }
  } else if (useI386ByteRegisters && ins->isByteArray()) {
    if (ins->value()->isConstant()) {
      value = useRegisterOrConstant(ins->value());
    } else {
      value = useFixed(ins->value(), ebx);
    }
    if (bitOp) {
      tempDef1 = tempFixed(ecx);
    }
  } else if (bitOp) {
    value = useRegisterOrConstant(ins->value());
    tempDef1 = temp();
  } else if (ins->value()->isConstant()) {
    fixedOutput = false;
    value = useRegisterOrConstant(ins->value());
  } else {
    fixedOutput = false;
    reuseInput = true;
    value = useRegisterAtStart(ins->value());
  }

  LAtomicTypedArrayElementBinop* lir = new (alloc())
      LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);

  if (fixedOutput) {
    defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
  } else if (reuseInput) {
    defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::ValueIndex);
  } else {
    define(lir, ins);
  }
}

void LIRGenerator::visitCopySign(MCopySign* ins) {
  MDefinition* lhs = ins->lhs();
  MDefinition* rhs = ins->rhs();

  MOZ_ASSERT(IsFloatingPointType(lhs->type()));
  MOZ_ASSERT(lhs->type() == rhs->type());
  MOZ_ASSERT(lhs->type() == ins->type());

  LInstructionHelper<1, 2, 2>* lir;
  if (lhs->type() == MIRType::Double) {
    lir = new (alloc()) LCopySignD();
  } else {
    lir = new (alloc()) LCopySignF();
  }

  // As lowerForFPU, but we want rhs to be in a FP register too.
  lir->setOperand(0, useRegisterAtStart(lhs));
  if (!Assembler::HasAVX()) {
    lir->setOperand(1, willHaveDifferentLIRNodes(lhs, rhs)
                           ? useRegister(rhs)
                           : useRegisterAtStart(rhs));
    defineReuseInput(lir, ins, 0);
  } else {
    lir->setOperand(1, useRegisterAtStart(rhs));
    define(lir, ins);
  }
}

// These lowerings are really x86-shared but some Masm APIs are not yet
// available on x86.

// Ternary and binary operators require the dest register to be the same as
// their first input register, leading to a pattern of useRegisterAtStart +
// defineReuseInput.

void LIRGenerator::visitWasmTernarySimd128(MWasmTernarySimd128* ins) {
#ifdef ENABLE_WASM_SIMD
  MOZ_ASSERT(ins->v0()->type() == MIRType::Simd128);
  MOZ_ASSERT(ins->v1()->type() == MIRType::Simd128);
  MOZ_ASSERT(ins->v2()->type() == MIRType::Simd128);
  MOZ_ASSERT(ins->type() == MIRType::Simd128);

  switch (ins->simdOp()) {
    case wasm::SimdOp::V128Bitselect: {
      // Enforcing lhs == output avoids one setup move.  We would like to also
      // enforce merging the control with the temp (with
      // usRegisterAtStart(control) and tempCopy()), but the register allocator
      // ignores those constraints at present.
      auto* lir = new (alloc()) LWasmTernarySimd128(
          useRegisterAtStart(ins->v0()), useRegister(ins->v1()),
          useRegister(ins->v2()), tempSimd128(), ins->simdOp());
      defineReuseInput(lir, ins, LWasmTernarySimd128::V0Index);
      break;
    }
    case wasm::SimdOp::F32x4RelaxedMadd:
    case wasm::SimdOp::F32x4RelaxedNmadd:
    case wasm::SimdOp::F64x2RelaxedMadd:
    case wasm::SimdOp::F64x2RelaxedNmadd: {
      auto* lir = new (alloc())
          LWasmTernarySimd128(useRegister(ins->v0()), useRegister(ins->v1()),
                              useRegisterAtStart(ins->v2()),
                              LDefinition::BogusTemp(), ins->simdOp());
      defineReuseInput(lir, ins, LWasmTernarySimd128::V2Index);
      break;
    }
    case wasm::SimdOp::I32x4DotI8x16I7x16AddS: {
      auto* lir = new (alloc())
          LWasmTernarySimd128(useRegister(ins->v0()), useRegister(ins->v1()),
                              useRegisterAtStart(ins->v2()),
                              LDefinition::BogusTemp(), ins->simdOp());
      defineReuseInput(lir, ins, LWasmTernarySimd128::V2Index);
      break;
    }
    case wasm::SimdOp::I8x16RelaxedLaneSelect:
    case wasm::SimdOp::I16x8RelaxedLaneSelect:
    case wasm::SimdOp::I32x4RelaxedLaneSelect:
    case wasm::SimdOp::I64x2RelaxedLaneSelect: {
      if (Assembler::HasAVX()) {
        auto* lir = new (alloc()) LWasmTernarySimd128(
            useRegisterAtStart(ins->v0()), useRegisterAtStart(ins->v1()),
            useRegisterAtStart(ins->v2()), LDefinition::BogusTemp(),
            ins->simdOp());
        define(lir, ins);
      } else {
        auto* lir = new (alloc()) LWasmTernarySimd128(
            useRegister(ins->v0()), useRegisterAtStart(ins->v1()),
            useFixed(ins->v2(), vmm0), LDefinition::BogusTemp(), ins->simdOp());
        defineReuseInput(lir, ins, LWasmTernarySimd128::V1Index);
      }
      break;
    }
    default:
      MOZ_CRASH("NYI");
  }
#else
  MOZ_CRASH("No SIMD");
#endif
}

void LIRGenerator::visitWasmBinarySimd128(MWasmBinarySimd128* ins) {
#ifdef ENABLE_WASM_SIMD
  MDefinition* lhs = ins->lhs();
  MDefinition* rhs = ins->rhs();
  wasm::SimdOp op = ins->simdOp();

  MOZ_ASSERT(lhs->type() == MIRType::Simd128);
  MOZ_ASSERT(rhs->type() == MIRType::Simd128);
  MOZ_ASSERT(ins->type() == MIRType::Simd128);

  // Note MWasmBinarySimd128::foldsTo has already specialized operations that
  // have a constant operand, so this takes care of more general cases of
  // reordering, see ReorderCommutative.
  if (ins->isCommutative()) {
    ReorderCommutative(&lhs, &rhs, ins);
  }

  // Swap operands and change operation if necessary, these are all x86/x64
  // dependent transformations.  Except where noted, this is about avoiding
  // unnecessary moves and fixups in the code generator macros.
  bool swap = false;
  switch (op) {
    case wasm::SimdOp::V128AndNot: {
      // Code generation requires the operands to be reversed.
      swap = true;
      break;
    }
    case wasm::SimdOp::I8x16LtS: {
      swap = true;
      op = wasm::SimdOp::I8x16GtS;
      break;
    }
    case wasm::SimdOp::I8x16GeS: {
      swap = true;
      op = wasm::SimdOp::I8x16LeS;
      break;
    }
    case wasm::SimdOp::I16x8LtS: {
      swap = true;
      op = wasm::SimdOp::I16x8GtS;
      break;
    }
    case wasm::SimdOp::I16x8GeS: {
      swap = true;
      op = wasm::SimdOp::I16x8LeS;
      break;
    }
    case wasm::SimdOp::I32x4LtS: {
      swap = true;
      op = wasm::SimdOp::I32x4GtS;
      break;
    }
    case wasm::SimdOp::I32x4GeS: {
      swap = true;
      op = wasm::SimdOp::I32x4LeS;
      break;
    }
    case wasm::SimdOp::F32x4Gt: {
      swap = true;
      op = wasm::SimdOp::F32x4Lt;
      break;
    }
    case wasm::SimdOp::F32x4Ge: {
      swap = true;
      op = wasm::SimdOp::F32x4Le;
      break;
    }
    case wasm::SimdOp::F64x2Gt: {
      swap = true;
      op = wasm::SimdOp::F64x2Lt;
      break;
    }
    case wasm::SimdOp::F64x2Ge: {
      swap = true;
      op = wasm::SimdOp::F64x2Le;
      break;
    }
    case wasm::SimdOp::F32x4PMin:
    case wasm::SimdOp::F32x4PMax:
    case wasm::SimdOp::F64x2PMin:
    case wasm::SimdOp::F64x2PMax: {
      // Code generation requires the operations to be reversed (the rhs is the
      // output register).
      swap = true;
      break;
    }
    default:
      break;
  }
  if (swap) {
    MDefinition* tmp = lhs;
    lhs = rhs;
    rhs = tmp;
  }

  // Allocate temp registers
  LDefinition tempReg0 = LDefinition::BogusTemp();
  LDefinition tempReg1 = LDefinition::BogusTemp();
  switch (op) {
    case wasm::SimdOp::I64x2Mul:
      tempReg0 = tempSimd128();
      break;
    case wasm::SimdOp::F32x4Min:
    case wasm::SimdOp::F32x4Max:
    case wasm::SimdOp::F64x2Min:
    case wasm::SimdOp::F64x2Max:
      tempReg0 = tempSimd128();
      tempReg1 = tempSimd128();
      break;
    case wasm::SimdOp::I64x2LtS:
    case wasm::SimdOp::I64x2GtS:
    case wasm::SimdOp::I64x2LeS:
    case wasm::SimdOp::I64x2GeS:
      // The compareForOrderingInt64x2AVX implementation does not require
      // temps but needs SSE4.2 support. Checking if both AVX and SSE4.2
      // are enabled.
      if (!(Assembler::HasAVX() && Assembler::HasSSE42())) {
        tempReg0 = tempSimd128();
        tempReg1 = tempSimd128();
      }
      break;
    default:
      break;
  }

  // For binary ops, without AVX support, the Masm API always is usually
  // (rhs, lhsDest) and requires  AtStart+ReuseInput for the lhs.
  //
  // For a few ops, the API is actually (rhsDest, lhs) and the rules are the
  // same but the reversed.  We swapped operands above; they will be swapped
  // again in the code generator to emit the right code.
  //
  // If AVX support is enabled, some binary ops can use output as destination,
  // useRegisterAtStart is applied for both operands and no need for ReuseInput.

  switch (op) {
    case wasm::SimdOp::I8x16AvgrU:
    case wasm::SimdOp::I16x8AvgrU:
    case wasm::SimdOp::I8x16Add:
    case wasm::SimdOp::I8x16AddSatS:
    case wasm::SimdOp::I8x16AddSatU:
    case wasm::SimdOp::I8x16Sub:
    case wasm::SimdOp::I8x16SubSatS:
    case wasm::SimdOp::I8x16SubSatU:
    case wasm::SimdOp::I16x8Mul:
    case wasm::SimdOp::I16x8MinS:
    case wasm::SimdOp::I16x8MinU:
    case wasm::SimdOp::I16x8MaxS:
    case wasm::SimdOp::I16x8MaxU:
    case wasm::SimdOp::I32x4Add:
    case wasm::SimdOp::I32x4Sub:
    case wasm::SimdOp::I32x4Mul:
    case wasm::SimdOp::I32x4MinS:
    case wasm::SimdOp::I32x4MinU:
    case wasm::SimdOp::I32x4MaxS:
    case wasm::SimdOp::I32x4MaxU:
    case wasm::SimdOp::I64x2Add:
    case wasm::SimdOp::I64x2Sub:
    case wasm::SimdOp::I64x2Mul:
    case wasm::SimdOp::F32x4Add:
    case wasm::SimdOp::F32x4Sub:
    case wasm::SimdOp::F32x4Mul:
    case wasm::SimdOp::F32x4Div:
    case wasm::SimdOp::F64x2Add:
    case wasm::SimdOp::F64x2Sub:
    case wasm::SimdOp::F64x2Mul:
    case wasm::SimdOp::F64x2Div:
    case wasm::SimdOp::F32x4Eq:
    case wasm::SimdOp::F32x4Ne:
    case wasm::SimdOp::F32x4Lt:
    case wasm::SimdOp::F32x4Le:
    case wasm::SimdOp::F64x2Eq:
    case wasm::SimdOp::F64x2Ne:
    case wasm::SimdOp::F64x2Lt:
    case wasm::SimdOp::F64x2Le:
    case wasm::SimdOp::F32x4PMin:
    case wasm::SimdOp::F32x4PMax:
    case wasm::SimdOp::F64x2PMin:
    case wasm::SimdOp::F64x2PMax:
    case wasm::SimdOp::I8x16Swizzle:
    case wasm::SimdOp::I8x16RelaxedSwizzle:
    case wasm::SimdOp::I8x16Eq:
    case wasm::SimdOp::I8x16Ne:
    case wasm::SimdOp::I8x16GtS:
    case wasm::SimdOp::I8x16LeS:
    case wasm::SimdOp::I8x16LtU:
    case wasm::SimdOp::I8x16GtU:
    case wasm::SimdOp::I8x16LeU:
    case wasm::SimdOp::I8x16GeU:
    case wasm::SimdOp::I16x8Eq:
    case wasm::SimdOp::I16x8Ne:
    case wasm::SimdOp::I16x8GtS:
    case wasm::SimdOp::I16x8LeS:
    case wasm::SimdOp::I16x8LtU:
    case wasm::SimdOp::I16x8GtU:
    case wasm::SimdOp::I16x8LeU:
    case wasm::SimdOp::I16x8GeU:
    case wasm::SimdOp::I32x4Eq:
    case wasm::SimdOp::I32x4Ne:
    case wasm::SimdOp::I32x4GtS:
    case wasm::SimdOp::I32x4LeS:
    case wasm::SimdOp::I32x4LtU:
    case wasm::SimdOp::I32x4GtU:
    case wasm::SimdOp::I32x4LeU:
    case wasm::SimdOp::I32x4GeU:
    case wasm::SimdOp::I64x2Eq:
    case wasm::SimdOp::I64x2Ne:
    case wasm::SimdOp::I64x2LtS:
    case wasm::SimdOp::I64x2GtS:
    case wasm::SimdOp::I64x2LeS:
    case wasm::SimdOp::I64x2GeS:
    case wasm::SimdOp::V128And:
    case wasm::SimdOp::V128Or:
    case wasm::SimdOp::V128Xor:
    case wasm::SimdOp::V128AndNot:
    case wasm::SimdOp::F32x4Min:
    case wasm::SimdOp::F32x4Max:
    case wasm::SimdOp::F64x2Min:
    case wasm::SimdOp::F64x2Max:
    case wasm::SimdOp::I8x16NarrowI16x8S:
    case wasm::SimdOp::I8x16NarrowI16x8U:
    case wasm::SimdOp::I16x8NarrowI32x4S:
    case wasm::SimdOp::I16x8NarrowI32x4U:
    case wasm::SimdOp::I32x4DotI16x8S:
    case wasm::SimdOp::I16x8ExtmulLowI8x16S:
    case wasm::SimdOp::I16x8ExtmulHighI8x16S:
    case wasm::SimdOp::I16x8ExtmulLowI8x16U:
    case wasm::SimdOp::I16x8ExtmulHighI8x16U:
    case wasm::SimdOp::I32x4ExtmulLowI16x8S:
    case wasm::SimdOp::I32x4ExtmulHighI16x8S:
    case wasm::SimdOp::I32x4ExtmulLowI16x8U:
    case wasm::SimdOp::I32x4ExtmulHighI16x8U:
    case wasm::SimdOp::I64x2ExtmulLowI32x4S:
    case wasm::SimdOp::I64x2ExtmulHighI32x4S:
    case wasm::SimdOp::I64x2ExtmulLowI32x4U:
    case wasm::SimdOp::I64x2ExtmulHighI32x4U:
    case wasm::SimdOp::I16x8Q15MulrSatS:
    case wasm::SimdOp::F32x4RelaxedMin:
    case wasm::SimdOp::F32x4RelaxedMax:
    case wasm::SimdOp::F64x2RelaxedMin:
    case wasm::SimdOp::F64x2RelaxedMax:
    case wasm::SimdOp::I16x8RelaxedQ15MulrS:
    case wasm::SimdOp::I16x8DotI8x16I7x16S:
    case wasm::SimdOp::MozPMADDUBSW:
      if (isThreeOpAllowed()) {
        auto* lir = new (alloc())
            LWasmBinarySimd128(useRegisterAtStart(lhs), useRegisterAtStart(rhs),
                               tempReg0, tempReg1, op);
        define(lir, ins);
        break;
      }
      [[fallthrough]];
    default: {
      LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
      LAllocation rhsAlloc = willHaveDifferentLIRNodes(lhs, rhs)
                                 ? useRegister(rhs)
                                 : useRegisterAtStart(rhs);
      auto* lir = new (alloc())
          LWasmBinarySimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1, op);
      defineReuseInput(lir, ins, LWasmBinarySimd128::LhsIndex);
      break;
    }
  }
#else
  MOZ_CRASH("No SIMD");
#endif
}

#ifdef ENABLE_WASM_SIMD
bool MWasmTernarySimd128::specializeBitselectConstantMaskAsShuffle(
    int8_t shuffle[16]) {
  if (simdOp() != wasm::SimdOp::V128Bitselect) {
    return false;
  }

  // Optimization when control vector is a mask with all 0 or all 1 per lane.
  // On x86, there is no bitselect, blend operations will be a win,
  // e.g. via PBLENDVB or PBLENDW.
  SimdConstant constant = static_cast<MWasmFloatConstant*>(v2())->toSimd128();
  const SimdConstant::I8x16& bytes = constant.asInt8x16();
  for (int8_t i = 0; i < 16; i++) {
    if (bytes[i] == -1) {
      shuffle[i] = i + 16;
    } else if (bytes[i] == 0) {
      shuffle[i] = i;
    } else {
      return false;
    }
  }
  return true;
}
bool MWasmTernarySimd128::canRelaxBitselect() {
  wasm::SimdOp simdOp;
  if (v2()->isWasmBinarySimd128()) {
    simdOp = v2()->toWasmBinarySimd128()->simdOp();
  } else if (v2()->isWasmBinarySimd128WithConstant()) {
    simdOp = v2()->toWasmBinarySimd128WithConstant()->simdOp();
  } else {
    return false;
  }
  switch (simdOp) {
    case wasm::SimdOp::I8x16Eq:
    case wasm::SimdOp::I8x16Ne:
    case wasm::SimdOp::I8x16GtS:
    case wasm::SimdOp::I8x16GeS:
    case wasm::SimdOp::I8x16LtS:
    case wasm::SimdOp::I8x16LeS:
    case wasm::SimdOp::I8x16GtU:
    case wasm::SimdOp::I8x16GeU:
    case wasm::SimdOp::I8x16LtU:
    case wasm::SimdOp::I8x16LeU:
    case wasm::SimdOp::I16x8Eq:
    case wasm::SimdOp::I16x8Ne:
    case wasm::SimdOp::I16x8GtS:
    case wasm::SimdOp::I16x8GeS:
    case wasm::SimdOp::I16x8LtS:
    case wasm::SimdOp::I16x8LeS:
    case wasm::SimdOp::I16x8GtU:
    case wasm::SimdOp::I16x8GeU:
    case wasm::SimdOp::I16x8LtU:
    case wasm::SimdOp::I16x8LeU:
    case wasm::SimdOp::I32x4Eq:
    case wasm::SimdOp::I32x4Ne:
    case wasm::SimdOp::I32x4GtS:
    case wasm::SimdOp::I32x4GeS:
    case wasm::SimdOp::I32x4LtS:
    case wasm::SimdOp::I32x4LeS:
    case wasm::SimdOp::I32x4GtU:
    case wasm::SimdOp::I32x4GeU:
    case wasm::SimdOp::I32x4LtU:
    case wasm::SimdOp::I32x4LeU:
    case wasm::SimdOp::I64x2Eq:
    case wasm::SimdOp::I64x2Ne:
    case wasm::SimdOp::I64x2GtS:
    case wasm::SimdOp::I64x2GeS:
    case wasm::SimdOp::I64x2LtS:
    case wasm::SimdOp::I64x2LeS:
    case wasm::SimdOp::F32x4Eq:
    case wasm::SimdOp::F32x4Ne:
    case wasm::SimdOp::F32x4Gt:
    case wasm::SimdOp::F32x4Ge:
    case wasm::SimdOp::F32x4Lt:
    case wasm::SimdOp::F32x4Le:
    case wasm::SimdOp::F64x2Eq:
    case wasm::SimdOp::F64x2Ne:
    case wasm::SimdOp::F64x2Gt:
    case wasm::SimdOp::F64x2Ge:
    case wasm::SimdOp::F64x2Lt:
    case wasm::SimdOp::F64x2Le:
      return true;
    default:
      break;
  }
  return false;
}

bool MWasmBinarySimd128::canPmaddubsw() {
  MOZ_ASSERT(Assembler::HasSSE3());
  return true;
}
#endif

bool MWasmBinarySimd128::specializeForConstantRhs() {
  // The order follows MacroAssembler.h, generally
  switch (simdOp()) {
    // Operations implemented by a single native instruction where it is
    // plausible that the rhs (after commutation if available) could be a
    // constant.
    //
    // Swizzle is not here because it was handled earlier in the pipeline.
    //
    // Integer compares >= and < are not here because they are not supported in
    // the hardware.
    //
    // Floating compares are not here because our patching machinery can't
    // handle them yet.
    //
    // Floating-point min and max (including pmin and pmax) are not here because
    // they are not straightforward to implement.
    case wasm::SimdOp::I8x16Add:
    case wasm::SimdOp::I16x8Add:
    case wasm::SimdOp::I32x4Add:
    case wasm::SimdOp::I64x2Add:
    case wasm::SimdOp::I8x16Sub:
    case wasm::SimdOp::I16x8Sub:
    case wasm::SimdOp::I32x4Sub:
    case wasm::SimdOp::I64x2Sub:
    case wasm::SimdOp::I16x8Mul:
    case wasm::SimdOp::I32x4Mul:
    case wasm::SimdOp::I8x16AddSatS:
    case wasm::SimdOp::I8x16AddSatU:
    case wasm::SimdOp::I16x8AddSatS:
    case wasm::SimdOp::I16x8AddSatU:
    case wasm::SimdOp::I8x16SubSatS:
    case wasm::SimdOp::I8x16SubSatU:
    case wasm::SimdOp::I16x8SubSatS:
    case wasm::SimdOp::I16x8SubSatU:
    case wasm::SimdOp::I8x16MinS:
    case wasm::SimdOp::I8x16MinU:
    case wasm::SimdOp::I16x8MinS:
    case wasm::SimdOp::I16x8MinU:
    case wasm::SimdOp::I32x4MinS:
    case wasm::SimdOp::I32x4MinU:
    case wasm::SimdOp::I8x16MaxS:
    case wasm::SimdOp::I8x16MaxU:
    case wasm::SimdOp::I16x8MaxS:
    case wasm::SimdOp::I16x8MaxU:
    case wasm::SimdOp::I32x4MaxS:
    case wasm::SimdOp::I32x4MaxU:
    case wasm::SimdOp::V128And:
    case wasm::SimdOp::V128Or:
    case wasm::SimdOp::V128Xor:
    case wasm::SimdOp::I8x16Eq:
    case wasm::SimdOp::I8x16Ne:
    case wasm::SimdOp::I8x16GtS:
    case wasm::SimdOp::I8x16LeS:
    case wasm::SimdOp::I16x8Eq:
    case wasm::SimdOp::I16x8Ne:
    case wasm::SimdOp::I16x8GtS:
    case wasm::SimdOp::I16x8LeS:
    case wasm::SimdOp::I32x4Eq:
    case wasm::SimdOp::I32x4Ne:
    case wasm::SimdOp::I32x4GtS:
    case wasm::SimdOp::I32x4LeS:
    case wasm::SimdOp::I64x2Mul:
    case wasm::SimdOp::F32x4Eq:
    case wasm::SimdOp::F32x4Ne:
    case wasm::SimdOp::F32x4Lt:
    case wasm::SimdOp::F32x4Le:
    case wasm::SimdOp::F64x2Eq:
    case wasm::SimdOp::F64x2Ne:
    case wasm::SimdOp::F64x2Lt:
    case wasm::SimdOp::F64x2Le:
    case wasm::SimdOp::I32x4DotI16x8S:
    case wasm::SimdOp::F32x4Add:
    case wasm::SimdOp::F64x2Add:
    case wasm::SimdOp::F32x4Sub:
    case wasm::SimdOp::F64x2Sub:
    case wasm::SimdOp::F32x4Div:
    case wasm::SimdOp::F64x2Div:
    case wasm::SimdOp::F32x4Mul:
    case wasm::SimdOp::F64x2Mul:
    case wasm::SimdOp::I8x16NarrowI16x8S:
    case wasm::SimdOp::I8x16NarrowI16x8U:
    case wasm::SimdOp::I16x8NarrowI32x4S:
    case wasm::SimdOp::I16x8NarrowI32x4U:
      return true;
    default:
      return false;
  }
}

void LIRGenerator::visitWasmBinarySimd128WithConstant(
    MWasmBinarySimd128WithConstant* ins) {
#ifdef ENABLE_WASM_SIMD
  MDefinition* lhs = ins->lhs();

  MOZ_ASSERT(lhs->type() == MIRType::Simd128);
  MOZ_ASSERT(ins->type() == MIRType::Simd128);

  // Allocate temp registers
  LDefinition tempReg = LDefinition::BogusTemp();
  switch (ins->simdOp()) {
    case wasm::SimdOp::I64x2Mul:
      tempReg = tempSimd128();
      break;
    default:
      break;
  }

  if (isThreeOpAllowed()) {
    // The non-destructive versions of instructions will be available
    // when AVX is enabled.
    LAllocation lhsAlloc = useRegisterAtStart(lhs);
    auto* lir = new (alloc())
        LWasmBinarySimd128WithConstant(lhsAlloc, tempReg, ins->rhs());
    define(lir, ins);
  } else {
    // Always beneficial to reuse the lhs register here, see discussion in
    // visitWasmBinarySimd128() and also code in specializeForConstantRhs().
    LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
    auto* lir = new (alloc())
        LWasmBinarySimd128WithConstant(lhsDestAlloc, tempReg, ins->rhs());
    defineReuseInput(lir, ins, LWasmBinarySimd128WithConstant::LhsIndex);
  }
#else
  MOZ_CRASH("No SIMD");
#endif
}

void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
#ifdef ENABLE_WASM_SIMD
  MDefinition* lhs = ins->lhs();
  MDefinition* rhs = ins->rhs();

  MOZ_ASSERT(lhs->type() == MIRType::Simd128);
  MOZ_ASSERT(rhs->type() == MIRType::Int32);
  MOZ_ASSERT(ins->type() == MIRType::Simd128);

  if (rhs->isConstant()) {
    int32_t shiftCountMask;
    switch (ins->simdOp()) {
      case wasm::SimdOp::I8x16Shl:
      case wasm::SimdOp::I8x16ShrU:
      case wasm::SimdOp::I8x16ShrS:
        shiftCountMask = 7;
        break;
      case wasm::SimdOp::I16x8Shl:
      case wasm::SimdOp::I16x8ShrU:
      case wasm::SimdOp::I16x8ShrS:
        shiftCountMask = 15;
        break;
      case wasm::SimdOp::I32x4Shl:
      case wasm::SimdOp::I32x4ShrU:
      case wasm::SimdOp::I32x4ShrS:
        shiftCountMask = 31;
        break;
      case wasm::SimdOp::I64x2Shl:
      case wasm::SimdOp::I64x2ShrU:
      case wasm::SimdOp::I64x2ShrS:
        shiftCountMask = 63;
        break;
      default:
        MOZ_CRASH("Unexpected shift operation");
    }

    int32_t shiftCount = rhs->toConstant()->toInt32() & shiftCountMask;
    if (shiftCount == shiftCountMask) {
      // Check if possible to apply sign replication optimization.
      // For some ops the input shall be reused.
      switch (ins->simdOp()) {
        case wasm::SimdOp::I8x16ShrS: {
          auto* lir =
              new (alloc()) LWasmSignReplicationSimd128(useRegister(lhs));
          define(lir, ins);
          return;
        }
        case wasm::SimdOp::I16x8ShrS:
        case wasm::SimdOp::I32x4ShrS:
        case wasm::SimdOp::I64x2ShrS: {
          auto* lir = new (alloc())
              LWasmSignReplicationSimd128(useRegisterAtStart(lhs));
          if (isThreeOpAllowed()) {
            define(lir, ins);
          } else {
            // For non-AVX, it is always beneficial to reuse the input.
            defineReuseInput(lir, ins, LWasmSignReplicationSimd128::SrcIndex);
          }
          return;
        }
        default:
          break;
      }
    }

#  ifdef DEBUG
    js::wasm::ReportSimdAnalysis("shift -> constant shift");
#  endif
    auto* lir = new (alloc())
        LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount);
    if (isThreeOpAllowed()) {
      define(lir, ins);
    } else {
      // For non-AVX, it is always beneficial to reuse the input.
      defineReuseInput(lir, ins, LWasmConstantShiftSimd128::SrcIndex);
    }
    return;
  }

#  ifdef DEBUG
  js::wasm::ReportSimdAnalysis("shift -> variable shift");
#  endif

  LDefinition tempReg = LDefinition::BogusTemp();
  switch (ins->simdOp()) {
    case wasm::SimdOp::I8x16Shl:
    case wasm::SimdOp::I8x16ShrS:
    case wasm::SimdOp::I8x16ShrU:
    case wasm::SimdOp::I64x2ShrS:
      tempReg = tempSimd128();
      break;
    default:
      break;
  }

  // Reusing the input if possible is never detrimental.
  LAllocation lhsDestAlloc = useRegisterAtStart(lhs);
  LAllocation rhsAlloc = useRegisterAtStart(rhs);
  auto* lir =
      new (alloc()) LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg);
  defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsIndex);
#else
  MOZ_CRASH("No SIMD");
#endif
}

void LIRGenerator::visitWasmShuffleSimd128(MWasmShuffleSimd128* ins) {
#ifdef ENABLE_WASM_SIMD
  MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
  MOZ_ASSERT(ins->rhs()->type() == MIRType::Simd128);
  MOZ_ASSERT(ins->type() == MIRType::Simd128);

  SimdShuffle s = ins->shuffle();
  switch (s.opd) {
    case SimdShuffle::Operand::LEFT:
    case SimdShuffle::Operand::RIGHT: {
      LAllocation src;
      bool reuse = false;
      switch (*s.permuteOp) {
        case SimdPermuteOp::MOVE:
          reuse = true;
          break;
        case SimdPermuteOp::BROADCAST_8x16:
        case SimdPermuteOp::BROADCAST_16x8:
        case SimdPermuteOp::PERMUTE_8x16:
        case SimdPermuteOp::PERMUTE_16x8:
        case SimdPermuteOp::PERMUTE_32x4:
        case SimdPermuteOp::ROTATE_RIGHT_8x16:
        case SimdPermuteOp::SHIFT_LEFT_8x16:
        case SimdPermuteOp::SHIFT_RIGHT_8x16:
        case SimdPermuteOp::REVERSE_16x8:
        case SimdPermuteOp::REVERSE_32x4:
        case SimdPermuteOp::REVERSE_64x2:
        case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8:
        case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4:
        case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2:
        case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4:
        case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2:
        case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2:
          // No need to reuse registers when VEX instructions are enabled.
          reuse = !Assembler::HasAVX();
          break;
        default:
          MOZ_CRASH("Unexpected operator");
      }
      if (s.opd == SimdShuffle::Operand::LEFT) {
        src = useRegisterAtStart(ins->lhs());
      } else {
        src = useRegisterAtStart(ins->rhs());
      }
      auto* lir =
          new (alloc()) LWasmPermuteSimd128(src, *s.permuteOp, s.control);
      if (reuse) {
        defineReuseInput(lir, ins, LWasmPermuteSimd128::SrcIndex);
      } else {
        define(lir, ins);
      }
      break;
    }
    case SimdShuffle::Operand::BOTH:
    case SimdShuffle::Operand::BOTH_SWAPPED: {
      LDefinition temp = LDefinition::BogusTemp();
      switch (*s.shuffleOp) {
        case SimdShuffleOp::BLEND_8x16:
          temp = Assembler::HasAVX() ? tempSimd128() : tempFixed(xmm0);
          break;
        default:
          break;
      }
      if (isThreeOpAllowed()) {
        LAllocation lhs;
        LAllocation rhs;
        if (s.opd == SimdShuffle::Operand::BOTH) {
          lhs = useRegisterAtStart(ins->lhs());
          rhs = useRegisterAtStart(ins->rhs());
        } else {
          lhs = useRegisterAtStart(ins->rhs());
          rhs = useRegisterAtStart(ins->lhs());
        }
        auto* lir = new (alloc())
            LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
        define(lir, ins);
      } else {
        LAllocation lhs;
        LAllocation rhs;
        if (s.opd == SimdShuffle::Operand::BOTH) {
          lhs = useRegisterAtStart(ins->lhs());
          rhs = useRegister(ins->rhs());
        } else {
          lhs = useRegisterAtStart(ins->rhs());
          rhs = useRegister(ins->lhs());
        }
        auto* lir = new (alloc())
            LWasmShuffleSimd128(lhs, rhs, temp, *s.shuffleOp, s.control);
        defineReuseInput(lir, ins, LWasmShuffleSimd128::LhsIndex);
      }
      break;
    }
  }
#else
  MOZ_CRASH("No SIMD");
#endif
}

void LIRGenerator::visitWasmReplaceLaneSimd128(MWasmReplaceLaneSimd128* ins) {
#ifdef ENABLE_WASM_SIMD
  MOZ_ASSERT(ins->lhs()->type() == MIRType::Simd128);
  MOZ_ASSERT(ins->type() == MIRType::Simd128);

  // If AVX support is disabled, the Masm API is (rhs, lhsDest) and requires
  // AtStart+ReuseInput for the lhs. For type reasons, the rhs will never be
  // the same as the lhs and is therefore a plain Use.
  //
  // If AVX support is enabled, useRegisterAtStart is preferred.

  if (ins->rhs()->type() == MIRType::Int64) {
    if (isThreeOpAllowed()) {
      auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
          useRegisterAtStart(ins->lhs()), useInt64RegisterAtStart(ins->rhs()));
      define(lir, ins);
    } else {
      auto* lir = new (alloc()) LWasmReplaceInt64LaneSimd128(
          useRegisterAtStart(ins->lhs()), useInt64Register(ins->rhs()));
      defineReuseInput(lir, ins, LWasmReplaceInt64LaneSimd128::LhsIndex);
    }
  } else {
    if (isThreeOpAllowed()) {
      auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
          useRegisterAtStart(ins->lhs()), useRegisterAtStart(ins->rhs()));
      define(lir, ins);
    } else {
      auto* lir = new (alloc()) LWasmReplaceLaneSimd128(
          useRegisterAtStart(ins->lhs()), useRegister(ins->rhs()));
      defineReuseInput(lir, ins, LWasmReplaceLaneSimd128::LhsIndex);
    }
  }
#else
  MOZ_CRASH("No SIMD");
#endif
}

void LIRGenerator::visitWasmScalarToSimd128(MWasmScalarToSimd128* ins) {
#ifdef ENABLE_WASM_SIMD
  MOZ_ASSERT(ins->type() == MIRType::Simd128);

  switch (ins->input()->type()) {
    case MIRType::Int64: {
      // 64-bit integer splats.
      // Load-and-(sign|zero)extend.
      auto* lir = new (alloc())
          LWasmInt64ToSimd128(useInt64RegisterAtStart(ins->input()));
      define(lir, ins);
      break;
    }
    case MIRType::Float32:
    case MIRType::Double: {
      // Floating-point splats.
      // Ideally we save a move on SSE systems by reusing the input register,
      // but since the input and output register types differ, we can't.
      auto* lir =
          new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
      define(lir, ins);
      break;
    }
    default: {
      // 32-bit integer splats.
      auto* lir =
          new (alloc()) LWasmScalarToSimd128(useRegisterAtStart(ins->input()));
      define(lir, ins);
      break;
    }
  }
#else
  MOZ_CRASH("No SIMD");
#endif
}

void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
#ifdef ENABLE_WASM_SIMD
  MOZ_ASSERT(ins->input()->type() == MIRType::Simd128);
  MOZ_ASSERT(ins->type() == MIRType::Simd128);

  bool useAtStart = false;
  bool reuseInput = false;
  LDefinition tempReg = LDefinition::BogusTemp();
  switch (ins->simdOp()) {
    case wasm::SimdOp::I8x16Neg:
    case wasm::SimdOp::I16x8Neg:
    case wasm::SimdOp::I32x4Neg:
    case wasm::SimdOp::I64x2Neg:
    case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
      // Prefer src != dest to avoid an unconditional src->temp move.
      MOZ_ASSERT(!reuseInput);
      // If AVX is enabled, we prefer useRegisterAtStart.
      useAtStart = isThreeOpAllowed();
      break;
    case wasm::SimdOp::F32x4Neg:
    case wasm::SimdOp::F64x2Neg:
    case wasm::SimdOp::F32x4Abs:
    case wasm::SimdOp::F64x2Abs:
    case wasm::SimdOp::V128Not:
    case wasm::SimdOp::F32x4Sqrt:
    case wasm::SimdOp::F64x2Sqrt:
    case wasm::SimdOp::I8x16Abs:
    case wasm::SimdOp::I16x8Abs:
    case wasm::SimdOp::I32x4Abs:
    case wasm::SimdOp::I64x2Abs:
    case wasm::SimdOp::I32x4TruncSatF32x4S:
    case wasm::SimdOp::F32x4ConvertI32x4U:
    case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U:
    case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S:
    case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U:
    case wasm::SimdOp::I32x4RelaxedTruncF32x4S:
    case wasm::SimdOp::I32x4RelaxedTruncF32x4U:
    case wasm::SimdOp::I32x4RelaxedTruncF64x2SZero:
    case wasm::SimdOp::I32x4RelaxedTruncF64x2UZero:
    case wasm::SimdOp::I64x2ExtendHighI32x4S:
    case wasm::SimdOp::I64x2ExtendHighI32x4U:
      // Prefer src == dest to avoid an unconditional src->dest move
      // for better performance in non-AVX mode (e.g. non-PSHUFD use).
      useAtStart = true;
      reuseInput = !isThreeOpAllowed();
      break;
    case wasm::SimdOp::I32x4TruncSatF32x4U:
    case wasm::SimdOp::I32x4TruncSatF64x2SZero:
    case wasm::SimdOp::I32x4TruncSatF64x2UZero:
    case wasm::SimdOp::I8x16Popcnt:
      tempReg = tempSimd128();
      // Prefer src == dest to avoid an unconditional src->dest move
      // in non-AVX mode.
      useAtStart = true;
      reuseInput = !isThreeOpAllowed();
      break;
    case wasm::SimdOp::I16x8ExtendLowI8x16S:
    case wasm::SimdOp::I16x8ExtendHighI8x16S:
    case wasm::SimdOp::I16x8ExtendLowI8x16U:
    case wasm::SimdOp::I16x8ExtendHighI8x16U:
    case wasm::SimdOp::I32x4ExtendLowI16x8S:
    case wasm::SimdOp::I32x4ExtendHighI16x8S:
    case wasm::SimdOp::I32x4ExtendLowI16x8U:
    case wasm::SimdOp::I32x4ExtendHighI16x8U:
    case wasm::SimdOp::I64x2ExtendLowI32x4S:
    case wasm::SimdOp::I64x2ExtendLowI32x4U:
    case wasm::SimdOp::F32x4ConvertI32x4S:
    case wasm::SimdOp::F32x4Ceil:
    case wasm::SimdOp::F32x4Floor:
    case wasm::SimdOp::F32x4Trunc:
    case wasm::SimdOp::F32x4Nearest:
    case wasm::SimdOp::F64x2Ceil:
    case wasm::SimdOp::F64x2Floor:
    case wasm::SimdOp::F64x2Trunc:
    case wasm::SimdOp::F64x2Nearest:
    case wasm::SimdOp::F32x4DemoteF64x2Zero:
    case wasm::SimdOp::F64x2PromoteLowF32x4:
    case wasm::SimdOp::F64x2ConvertLowI32x4S:
    case wasm::SimdOp::F64x2ConvertLowI32x4U:
      // Prefer src == dest to exert the lowest register pressure on the
      // surrounding code.
      useAtStart = true;
      MOZ_ASSERT(!reuseInput);
      break;
    default:
      MOZ_CRASH("Unary SimdOp not implemented");
  }

  LUse inputUse =
      useAtStart ? useRegisterAtStart(ins->input()) : useRegister(ins->input());
  LWasmUnarySimd128* lir = new (alloc()) LWasmUnarySimd128(inputUse, tempReg);
  if (reuseInput) {
    defineReuseInput(lir, ins, LWasmUnarySimd128::SrcIndex);
  } else {
    define(lir, ins);
  }
#else
  MOZ_CRASH("No SIMD");
#endif
}

void LIRGenerator::visitWasmLoadLaneSimd128(MWasmLoadLaneSimd128* ins) {
#ifdef ENABLE_WASM_SIMD
  // A trick: On 32-bit systems, the base pointer is 32 bits (it was bounds
  // checked and then chopped).  On 64-bit systems, it can be 32 bits or 64
  // bits.  Either way, it fits in a GPR so we can ignore the
  // Register/Register64 distinction here.
#  ifndef JS_64BIT
  MOZ_ASSERT(ins->base()->type() == MIRType::Int32);
#  endif
  LUse base = useRegisterAtStart(ins->base());
  LUse inputUse = useRegisterAtStart(ins->value());
  LAllocation memoryBase = ins->hasMemoryBase()
                               ? useRegisterAtStart(ins->memoryBase())
                               : LAllocation();
  auto* lir = new (alloc()) LWasmLoadLaneSimd128(base, inputUse, memoryBase);
  defineReuseInput(lir, ins, LWasmLoadLaneSimd128::SrcIndex);
#else
  MOZ_CRASH("No SIMD");
#endif
}

void LIRGenerator::visitWasmStoreLaneSimd128(MWasmStoreLaneSimd128* ins) {
#ifdef ENABLE_WASM_SIMD
  // See comment above.
#  ifndef JS_64BIT
  MOZ_ASSERT(ins->base()->type() == MIRType::Int32);
#  endif
  LUse base = useRegisterAtStart(ins->base());
  LUse input = useRegisterAtStart(ins->value());
  LAllocation memoryBase = ins->hasMemoryBase()
                               ? useRegisterAtStart(ins->memoryBase())
                               : LAllocation();
  auto* lir = new (alloc()) LWasmStoreLaneSimd128(base, input, memoryBase);
  add(lir, ins);
#else
  MOZ_CRASH("No SIMD");
#endif
}

#ifdef ENABLE_WASM_SIMD

bool LIRGeneratorX86Shared::canFoldReduceSimd128AndBranch(wasm::SimdOp op) {
  switch (op) {
    case wasm::SimdOp::V128AnyTrue:
    case wasm::SimdOp::I8x16AllTrue:
    case wasm::SimdOp::I16x8AllTrue:
    case wasm::SimdOp::I32x4AllTrue:
    case wasm::SimdOp::I64x2AllTrue:
    case wasm::SimdOp::I16x8Bitmask:
      return true;
    default:
      return false;
  }
}

bool LIRGeneratorX86Shared::canEmitWasmReduceSimd128AtUses(
    MWasmReduceSimd128* ins) {
  if (!ins->canEmitAtUses()) {
    return false;
  }
  // Only specific ops generating int32.
  if (ins->type() != MIRType::Int32) {
    return false;
  }
  if (!canFoldReduceSimd128AndBranch(ins->simdOp())) {
    return false;
  }
  // If never used then defer (it will be removed).
  MUseIterator iter(ins->usesBegin());
  if (iter == ins->usesEnd()) {
    return true;
  }
  // We require an MTest consumer.
  MNode* node = iter->consumer();
  if (!node->isDefinition() || !node->toDefinition()->isTest()) {
    return false;
  }
  // Defer only if there's only one use.
  iter++;
  return iter == ins->usesEnd();
}

#endif  // ENABLE_WASM_SIMD

void LIRGenerator::visitWasmReduceSimd128(MWasmReduceSimd128* ins) {
#ifdef ENABLE_WASM_SIMD
  if (canEmitWasmReduceSimd128AtUses(ins)) {
    emitAtUses(ins);
    return;
  }

  // Reductions (any_true, all_true, bitmask, extract_lane) uniformly prefer
  // useRegisterAtStart:
  //
  // - In most cases, the input type differs from the output type, so there's no
  //   conflict and it doesn't really matter.
  //
  // - For extract_lane(0) on F32x4 and F64x2, input == output results in zero
  //   code being generated.
  //
  // - For extract_lane(k > 0) on F32x4 and F64x2, allowing the input register
  //   to be targeted lowers register pressure if it's the last use of the
  //   input.

  if (ins->type() == MIRType::Int64) {
    auto* lir = new (alloc())
        LWasmReduceSimd128ToInt64(useRegisterAtStart(ins->input()));
    defineInt64(lir, ins);
  } else {
    // Ideally we would reuse the input register for floating extract_lane if
    // the lane is zero, but constraints in the register allocator require the
    // input and output register types to be the same.
    auto* lir =
        new (alloc()) LWasmReduceSimd128(useRegisterAtStart(ins->input()));
    define(lir, ins);
  }
#else
  MOZ_CRASH("No SIMD");
#endif
}

Messung V0.5

¤ Dauer der Verarbeitung: 0.21 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.