/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// Note: this function clobbers the input register. void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) {
ScratchDoubleScope scratch(*this);
MOZ_ASSERT(input != scratch);
Label positive, done;
// <= 0 or NaN --> 0
zeroDouble(scratch);
branchDouble(DoubleGreaterThan, input, scratch, &positive);
{
move32(Imm32(0), output);
jump(&done);
}
bind(&positive);
if (HasRoundInstruction(RoundingMode::NearestTiesToEven)) { // Round input to nearest integer.
nearbyIntDouble(RoundingMode::NearestTiesToEven, input, input);
// Truncate to int32 and ensure the result <= 255. This relies on the // processor setting output to a value > 255 for doubles outside the int32 // range (for instance 0x80000000).
vcvttsd2si(input, output);
branch32(Assembler::BelowOrEqual, output, Imm32(255), &done);
move32(Imm32(255), output);
} else {
Label outOfRange;
// Truncate to int32 and ensure the result <= 255. This relies on the // processor setting output to a value > 255 for doubles outside the int32 // range (for instance 0x80000000).
vcvttsd2si(input, output);
branch32(Assembler::AboveOrEqual, output, Imm32(255), &outOfRange);
{ // Check if we had a tie.
convertInt32ToDouble(output, scratch);
subDouble(scratch, input);
void MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg, Register scratch, Label* label, bool maybeNonZero) { // Determines whether the low double contained in the XMM register reg // is equal to -0.0.
#ifdefined(JS_CODEGEN_X86)
Label nonZero;
// if not already compared to zero if (maybeNonZero) {
ScratchDoubleScope scratchDouble(asMasm());
// Compare to zero. Lets through {0, -0}.
zeroDouble(scratchDouble);
// If reg is non-zero, jump to nonZero.
asMasm().branchDouble(DoubleNotEqual, reg, scratchDouble, &nonZero);
} // Input register is either zero or negative zero. Retrieve sign of input.
vmovmskpd(reg, scratch);
// If reg is 1 or 3, input is negative zero. // If reg is 0 or 2, input is a normal zero.
asMasm().branchTest32(NonZero, scratch, Imm32(1), label);
// Do a vucomisd to catch equality and NaNs, which both require special // handling. If the operands are ordered and inequal, we branch straight to // the min/max instruction. If we wanted, we could also branch for less-than // or greater-than here instead of using min/max, however these conditions // will sometimes be hard on the branch predictor.
vucomisd(second, first);
j(Assembler::NotEqual, &minMaxInst); if (canBeNaN) {
j(Assembler::Parity, &nan);
}
// Ordered and equal. The operands are bit-identical unless they are zero // and negative zero. These instructions merge the sign bits in that // case, and are no-ops otherwise. if (isMax) {
vandpd(second, first, first);
} else {
vorpd(second, first, first);
}
jump(&done);
// x86's min/max are not symmetric; if either operand is a NaN, they return // the read-only operand. We need to return a NaN if either operand is a // NaN, so we explicitly check for a NaN in the read-write operand. if (canBeNaN) {
bind(&nan);
vucomisd(first, first);
j(Assembler::Parity, &done);
}
// When the values are inequal, or second is NaN, x86's min and max will // return the value we need.
bind(&minMaxInst); if (isMax) {
vmaxsd(second, first, first);
} else {
vminsd(second, first, first);
}
// Do a vucomiss to catch equality and NaNs, which both require special // handling. If the operands are ordered and inequal, we branch straight to // the min/max instruction. If we wanted, we could also branch for less-than // or greater-than here instead of using min/max, however these conditions // will sometimes be hard on the branch predictor.
vucomiss(second, first);
j(Assembler::NotEqual, &minMaxInst); if (canBeNaN) {
j(Assembler::Parity, &nan);
}
// Ordered and equal. The operands are bit-identical unless they are zero // and negative zero. These instructions merge the sign bits in that // case, and are no-ops otherwise. if (isMax) {
vandps(second, first, first);
} else {
vorps(second, first, first);
}
jump(&done);
// x86's min/max are not symmetric; if either operand is a NaN, they return // the read-only operand. We need to return a NaN if either operand is a // NaN, so we explicitly check for a NaN in the read-write operand. if (canBeNaN) {
bind(&nan);
vucomiss(first, first);
j(Assembler::Parity, &done);
}
// When the values are inequal, or second is NaN, x86's min and max will // return the value we need.
bind(&minMaxInst); if (isMax) {
vmaxss(second, first, first);
} else {
vminss(second, first, first);
}
bind(&done);
}
#ifdef ENABLE_WASM_SIMD bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { switch (op) { case wasm::SimdOp::I8x16Shl: case wasm::SimdOp::I8x16ShrU: case wasm::SimdOp::I8x16ShrS:
*mask = 7; break; case wasm::SimdOp::I16x8Shl: case wasm::SimdOp::I16x8ShrU: case wasm::SimdOp::I16x8ShrS:
*mask = 15; break; case wasm::SimdOp::I32x4Shl: case wasm::SimdOp::I32x4ShrU: case wasm::SimdOp::I32x4ShrS:
*mask = 31; break; case wasm::SimdOp::I64x2Shl: case wasm::SimdOp::I64x2ShrU: case wasm::SimdOp::I64x2ShrS:
*mask = 63; break; default:
MOZ_CRASH("Unexpected shift operation");
} returntrue;
} #endif
// This operation really consists of five phases, in order to enforce the // restriction that on x86_shared, srcDest must be eax and edx will be // clobbered. // // Input: { rhs, lhsOutput } // // [PUSH] Preserve registers // [MOVE] Generate moves to specific registers // // [DIV] Input: { regForRhs, EAX } // [DIV] extend EAX into EDX // [DIV] x86 Division operator // [DIV] Ouptut: { EAX, EDX } // // [MOVE] Move specific registers to outputs // [POP] Restore registers // // Output: { lhsOutput, remainderOutput } void MacroAssembler::flexibleDivMod32(Register rhs, Register lhsOutput, Register remOutput, bool isUnsigned, const LiveRegisterSet&) { // Currently this helper can't handle this situation.
MOZ_ASSERT(lhsOutput != rhs);
MOZ_ASSERT(lhsOutput != remOutput);
// Choose a register that is not edx, or eax to hold the rhs; // ebx is chosen arbitrarily, and will be preserved if necessary. Register regForRhs = (rhs == eax || rhs == edx) ? ebx : rhs;
// Add registers we will be clobbering as live, but // also remove the set we do not restore.
LiveRegisterSet preserve;
preserve.add(edx);
preserve.add(eax);
preserve.add(regForRhs);
// Shuffle input into place.
moveRegPair(lhsOutput, rhs, eax, regForRhs);
// Sign extend eax into edx to make (edx:eax): idiv/udiv are 64-bit. if (isUnsigned) {
mov(ImmWord(0), edx);
udiv(regForRhs);
} else {
cdq();
idiv(regForRhs);
}
moveRegPair(eax, edx, lhsOutput, remOutput);
PopRegsInMask(preserve);
}
void MacroAssembler::flexibleQuotient32( Register rhs, Register srcDest, bool isUnsigned, const LiveRegisterSet& volatileLiveRegs) { // Choose an arbitrary register that isn't eax, edx, rhs or srcDest;
AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
regs.takeUnchecked(eax);
regs.takeUnchecked(edx);
regs.takeUnchecked(rhs);
regs.takeUnchecked(srcDest);
// On x86, always use push to push the integer registers, as it's fast // on modern hardware and it's a small instruction. for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) {
diffG -= sizeof(intptr_t);
Push(*iter);
}
MOZ_ASSERT(diffG == 0);
(void)diffG;
// x64 padding to keep the stack aligned on uintptr_t. Keep in sync with // GetPushSizeInBytes.
size_t alignExtra = ((size_t)diffF) % sizeof(uintptr_t);
MOZ_ASSERT_IF(sizeof(uintptr_t) == 8, alignExtra == 0 || alignExtra == 4);
MOZ_ASSERT_IF(sizeof(uintptr_t) == 4, alignExtra == 0);
diffF -= alignExtra;
MOZ_ASSERT(diffF == 0);
// The macroassembler will keep the stack sizeof(uintptr_t)-aligned, so // we don't need to take into account `alignExtra` here.
MOZ_ASSERT(framePushed() - framePushedInitial ==
PushRegsInMaskSizeInBytes(set));
}
// x64 padding to keep the stack aligned on uintptr_t. Keep in sync with // GetPushSizeInBytes.
size_t alignExtra = ((size_t)diffF) % sizeof(uintptr_t);
MOZ_ASSERT_IF(sizeof(uintptr_t) == 8, alignExtra == 0 || alignExtra == 4);
MOZ_ASSERT_IF(sizeof(uintptr_t) == 4, alignExtra == 0);
diffF -= alignExtra;
MOZ_ASSERT(diffF == 0);
// What this means is: if `alignExtra` is nonzero, then the save area size // actually used is `alignExtra` bytes smaller than what // PushRegsInMaskSizeInBytes claims. Hence we need to compensate for that.
MOZ_ASSERT(alignExtra + offsetInitial - dest.offset ==
PushRegsInMaskSizeInBytes(set));
}
Address spillAddress(StackPointer, diffF); if (reg.isDouble()) {
loadDouble(spillAddress, reg);
} elseif (reg.isSingle()) {
loadFloat32(spillAddress, reg);
} elseif (reg.isSimd128()) {
loadUnalignedSimd128(spillAddress, reg);
} else {
MOZ_CRASH("Unknown register type.");
}
}
freeStack(reservedF);
MOZ_ASSERT(numFpu == 0);
(void)numFpu; // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with // GetPushBytesInSize.
diffF -= diffF % sizeof(uintptr_t);
MOZ_ASSERT(diffF == 0);
// On x86, use pop to pop the integer registers, if we're not going to // ignore any slots, as it's fast on modern hardware and it's a small // instruction. if (ignore.emptyGeneral()) { for (GeneralRegisterForwardIterator iter(set.gprs()); iter.more(); ++iter) {
diffG -= sizeof(intptr_t);
Pop(*iter);
}
} else { for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();
++iter) {
diffG -= sizeof(intptr_t); if (!ignore.has(*iter)) {
loadPtr(Address(StackPointer, diffG), *iter);
}
}
freeStack(reservedG);
}
MOZ_ASSERT(diffG == 0);
// RAII class that generates the jumps to traps when it's destructed, to // prevent some code duplication in the outOfLineWasmTruncateXtoY methods. struct MOZ_RAII AutoHandleWasmTruncateToIntErrors {
MacroAssembler& masm;
Label inputIsNaN;
Label intOverflow; const wasm::TrapSiteDesc& trapSiteDesc;
~AutoHandleWasmTruncateToIntErrors() { // Handle errors. These cases are not in arbitrary order: code will // fall through to intOverflow.
masm.bind(&intOverflow);
masm.wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc);
if (isSaturating) { if (isUnsigned) { // Negative overflow and NaN both are converted to 0, and the only // other case is positive overflow which is converted to // UINT32_MAX.
Label nonNegative;
ScratchDoubleScope fpscratch(*this);
loadConstantDouble(0.0, fpscratch);
branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
&nonNegative);
move32(Imm32(0), output);
jump(rejoin);
bind(&nonNegative);
move32(Imm32(UINT32_MAX), output);
} else { // Negative overflow is already saturated to INT32_MIN, so we only // have to handle NaN and positive overflow here.
Label notNaN;
branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN);
move32(Imm32(0), output);
jump(rejoin);
// Eagerly take care of NaNs.
branchDouble(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN);
// For unsigned, fall through to intOverflow failure case. if (isUnsigned) { return;
}
// Handle special values.
// We've used vcvttsd2si. The only valid double values that can // truncate to INT32_MIN are in ]INT32_MIN - 1; INT32_MIN].
ScratchDoubleScope fpscratch(*this);
loadConstantDouble(double(INT32_MIN) - 1.0, fpscratch);
branchDouble(Assembler::DoubleLessThanOrEqual, input, fpscratch,
&traps.intOverflow);
if (isSaturating) { if (isUnsigned) { // Negative overflow and NaN both are converted to 0, and the only // other case is positive overflow which is converted to // UINT32_MAX.
Label nonNegative;
ScratchFloat32Scope fpscratch(*this);
loadConstantFloat32(0.0f, fpscratch);
branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch,
&nonNegative);
move32(Imm32(0), output);
jump(rejoin);
bind(&nonNegative);
move32(Imm32(UINT32_MAX), output);
} else { // Negative overflow is already saturated to INT32_MIN, so we only // have to handle NaN and positive overflow here.
Label notNaN;
branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN);
move32(Imm32(0), output);
jump(rejoin);
// Eagerly take care of NaNs.
branchFloat(Assembler::DoubleUnordered, input, input, &traps.inputIsNaN);
// For unsigned, fall through to intOverflow failure case. if (isUnsigned) { return;
}
// Handle special values.
// We've used vcvttss2si. Check that the input wasn't // float(INT32_MIN), which is the only legimitate input that // would truncate to INT32_MIN.
ScratchFloat32Scope fpscratch(*this);
loadConstantFloat32(float(INT32_MIN), fpscratch);
branchFloat(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow);
jump(rejoin);
}
if (isSaturating) { if (isUnsigned) { // Negative overflow and NaN both are converted to 0, and the only // other case is positive overflow which is converted to // UINT64_MAX.
Label positive;
ScratchDoubleScope fpscratch(*this);
loadConstantDouble(0.0, fpscratch);
branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, &positive);
move64(Imm64(0), output);
jump(rejoin);
bind(&positive);
move64(Imm64(UINT64_MAX), output);
} else { // Negative overflow is already saturated to INT64_MIN, so we only // have to handle NaN and positive overflow here.
Label notNaN;
branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN);
move64(Imm64(0), output);
jump(rejoin);
// We've used vcvtsd2sq. The only legit value whose i64 // truncation is INT64_MIN is double(INT64_MIN): exponent is so // high that the highest resolution around is much more than 1.
ScratchDoubleScope fpscratch(*this);
loadConstantDouble(double(int64_t(INT64_MIN)), fpscratch);
branchDouble(Assembler::DoubleNotEqual, input, fpscratch, &traps.intOverflow);
jump(rejoin);
}
if (isSaturating) { if (isUnsigned) { // Negative overflow and NaN both are converted to 0, and the only // other case is positive overflow which is converted to // UINT64_MAX.
Label positive;
ScratchFloat32Scope fpscratch(*this);
loadConstantFloat32(0.0f, fpscratch);
branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, &positive);
move64(Imm64(0), output);
jump(rejoin);
bind(&positive);
move64(Imm64(UINT64_MAX), output);
} else { // Negative overflow is already saturated to INT64_MIN, so we only // have to handle NaN and positive overflow here.
Label notNaN;
branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN);
move64(Imm64(0), output);
jump(rejoin);
CodeOffset MacroAssembler::sub32FromMemAndBranchIfNegativeWithPatch(
Address address, Label* label) { // -128 is arbitrary, but makes `*address` count upwards, which may help // to identify cases where the subsequent ::patch..() call was forgotten. int numImmBytes = subl(Imm32(-128), Operand(address)); // This is vitally important for patching
MOZ_RELEASE_ASSERT(numImmBytes == 1); // Points immediately after the location to patch
CodeOffset patchPoint = CodeOffset(currentOffset());
jSrc(Condition::Signed, label); return patchPoint;
}
void MacroAssembler::patchSub32FromMemAndBranchIfNegative(CodeOffset offset,
Imm32 imm) {
int32_t val = imm.value; // Patching it to zero would make the insn pointless
MOZ_RELEASE_ASSERT(val >= 1 && val <= 127);
uint8_t* ptr = (uint8_t*)masm.data() + offset.offset() - 1;
MOZ_RELEASE_ASSERT(*ptr == uint8_t(-128)); // as created above
*ptr = uint8_t(val) & 0x7F;
}
if (oldval != output) {
masm.movl(oldval, output);
}
if (access) {
masm.append(*access, wasm::TrapMachineInsn::Atomic,
FaultingCodeOffset(masm.currentOffset()));
}
// NOTE: the generated code must match the assembly code in gen_cmpxchg in // GenerateAtomicOperations.py switch (Scalar::byteSize(type)) { case 1:
MOZ_ASSERT(IsByteReg(newval));
masm.lock_cmpxchgb(newval, Operand(mem)); break; case 2:
masm.lock_cmpxchgw(newval, Operand(mem)); break; case 4:
masm.lock_cmpxchgl(newval, Operand(mem)); break; default:
MOZ_CRASH("Invalid");
}
staticauto WasmTrapMachineInsn(Scalar::Type arrayType, AtomicOp op) { switch (op) { case AtomicOp::Add: case AtomicOp::Sub: return wasm::TrapMachineInsn::Atomic; case AtomicOp::And: case AtomicOp::Or: case AtomicOp::Xor: switch (arrayType) { case Scalar::Int8: case Scalar::Uint8: return wasm::TrapMachineInsn::Load8; case Scalar::Int16: case Scalar::Uint16: return wasm::TrapMachineInsn::Load16; case Scalar::Int32: case Scalar::Uint32: return wasm::TrapMachineInsn::Load32; default: break;
}
[[fallthrough]]; default: break;
}
MOZ_CRASH();
}
template <typename T, typename V> staticvoid AtomicFetchOp(MacroAssembler& masm, const wasm::MemoryAccessDesc* access,
Scalar::Type arrayType, AtomicOp op, V value, const T& mem, Register temp, Register output) { // Note value can be an Imm or a Register.
// NOTE: the generated code must match the assembly code in gen_fetchop in // GenerateAtomicOperations.py
// Setup the output register. switch (op) { case AtomicOp::Add: case AtomicOp::Sub:
MOZ_ASSERT(temp == InvalidReg);
MOZ_ASSERT_IF(Scalar::byteSize(arrayType) == 1,
IsByteReg(output) && IsByteReg(value));
SetupValue(masm, op, value, output); break; case AtomicOp::And: case AtomicOp::Or: case AtomicOp::Xor:
MOZ_ASSERT(output != temp && output == eax);
MOZ_ASSERT_IF(Scalar::byteSize(arrayType) == 1,
IsByteReg(output) && IsByteReg(temp));
auto lock_xadd = [&]() { switch (arrayType) { case Scalar::Int8: case Scalar::Uint8:
masm.lock_xaddb(output, Operand(mem)); break; case Scalar::Int16: case Scalar::Uint16:
masm.lock_xaddw(output, Operand(mem)); break; case Scalar::Int32: case Scalar::Uint32:
masm.lock_xaddl(output, Operand(mem)); break; default:
MOZ_CRASH();
}
};
auto load = [&]() { switch (arrayType) { case Scalar::Int8: case Scalar::Uint8:
masm.movzbl(Operand(mem), eax); break; case Scalar::Int16: case Scalar::Uint16:
masm.movzwl(Operand(mem), eax); break; case Scalar::Int32: case Scalar::Uint32:
masm.movl(Operand(mem), eax); break; default:
MOZ_CRASH();
}
};
auto bitwiseOp = [&]() { switch (op) { case AtomicOp::And:
masm.andl(value, temp); break; case AtomicOp::Or:
masm.orl(value, temp); break; case AtomicOp::Xor:
masm.xorl(value, temp); break; default:
MOZ_CRASH();
}
};
auto lock_cmpxchg = [&]() { switch (arrayType) { case Scalar::Int8: case Scalar::Uint8:
masm.lock_cmpxchgb(temp, Operand(mem)); break; case Scalar::Int16: case Scalar::Uint16:
masm.lock_cmpxchgw(temp, Operand(mem)); break; case Scalar::Int32: case Scalar::Uint32:
masm.lock_cmpxchgl(temp, Operand(mem)); break; default:
MOZ_CRASH();
}
};
// Add trap instruction directly before the load. if (access) {
masm.append(*access, WasmTrapMachineInsn(arrayType, op),
FaultingCodeOffset(masm.currentOffset()));
}
switch (op) { case AtomicOp::Add: case AtomicOp::Sub: // `add` and `sub` operations can be optimized with XADD.
lock_xadd();
ExtendTo32(masm, arrayType, output); break;
case AtomicOp::And: case AtomicOp::Or: case AtomicOp::Xor: { // Bitwise operations need a CAS loop.
void MacroAssembler::speculationBarrier() { // Spectre mitigation recommended by Intel and AMD suggest to use lfence as // a way to force all speculative execution of instructions to end.
MOZ_ASSERT(HasSSE2());
masm.lfence();
}
void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest,
Label* fail) { if (HasSSE41()) { // Fail on negative-zero.
branchNegativeZeroFloat32(src, dest, fail);
// Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
{
ScratchFloat32Scope scratch(*this);
zeroFloat32(scratch);
branchFloat(Assembler::DoubleLessThan, src, scratch, &negative);
}
// Fail on negative-zero.
branchNegativeZeroFloat32(src, dest, fail);
// Input is non-negative, so truncation correctly rounds.
truncateFloat32ToInt32(src, dest, fail);
jump(&end);
// Input is negative, but isn't -0. // Negative values go on a comparatively expensive path, since no // native rounding mode matches JS semantics. Still better than callVM.
bind(&negative);
{ // Truncate and round toward zero. // This is off-by-one for everything but integer-valued inputs. // // Directly call vcvttss2si instead of truncateFloat32ToInt32 because we // want to perform failure handling ourselves.
vcvttss2si(src, dest);
// Test whether the input double was integer-valued.
{
ScratchFloat32Scope scratch(*this);
convertInt32ToFloat32(dest, scratch);
branchFloat(Assembler::DoubleEqualOrUnordered, src, scratch, &end);
}
// Input is not integer-valued, so we rounded off-by-one in the // wrong direction. Correct by subtraction. // // Overflows if vcvttss2si returned the failure return value INT_MIN.
branchSub32(Assembler::Overflow, Imm32(1), dest, fail);
}
bind(&end);
}
}
void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest,
Label* fail) { if (HasSSE41()) { // Fail on negative-zero.
branchNegativeZero(src, dest, fail);
// Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
{
ScratchDoubleScope scratch(*this);
zeroDouble(scratch);
branchDouble(Assembler::DoubleLessThan, src, scratch, &negative);
}
// Fail on negative-zero.
branchNegativeZero(src, dest, fail);
// Input is non-negative, so truncation correctly rounds.
truncateDoubleToInt32(src, dest, fail);
jump(&end);
// Input is negative, but isn't -0. // Negative values go on a comparatively expensive path, since no // native rounding mode matches JS semantics. Still better than callVM.
bind(&negative);
{ // Truncate and round toward zero. // This is off-by-one for everything but integer-valued inputs. // // Directly call vcvttsd2si instead of truncateDoubleToInt32 because we // want to perform failure handling ourselves.
vcvttsd2si(src, dest);
// Test whether the input double was integer-valued.
{
ScratchDoubleScope scratch(*this);
convertInt32ToDouble(dest, scratch);
branchDouble(Assembler::DoubleEqualOrUnordered, src, scratch, &end);
}
// Input is not integer-valued, so we rounded off-by-one in the // wrong direction. Correct by subtraction. // // Overflows if vcvttsd2si returned the failure return value INT_MIN.
branchSub32(Assembler::Overflow, Imm32(1), dest, fail);
}
// If x is in ]-1,0], ceil(x) is -0, which cannot be represented as an int32. // Fail if x > -1 and the sign bit is set.
loadConstantFloat32(-1.f, scratch);
branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch,
&lessThanOrEqualMinusOne);
vmovmskps(src, dest);
branchTest32(Assembler::NonZero, dest, Imm32(1), fail);
if (HasSSE41()) { // x <= -1 or x > -0
bind(&lessThanOrEqualMinusOne); // Round toward +Infinity.
vroundss(X86Encoding::RoundUp, src, scratch);
truncateFloat32ToInt32(scratch, dest, fail); return;
}
// No SSE4.1
Label end;
// x >= 0 and x is not -0.0. We can truncate integer values, and truncate and // add 1 to non-integer values. This will also work for values >= INT_MAX + 1, // as the truncate operation will return INT_MIN and we'll fail.
truncateFloat32ToInt32(src, dest, fail);
convertInt32ToFloat32(dest, scratch);
branchFloat(Assembler::DoubleEqualOrUnordered, src, scratch, &end);
// Input is not integer-valued, add 1 to obtain the ceiling value. // If input > INT_MAX, output == INT_MAX so adding 1 will overflow.
branchAdd32(Assembler::Overflow, Imm32(1), dest, fail);
jump(&end);
// x <= -1, truncation is the way to go.
bind(&lessThanOrEqualMinusOne);
truncateFloat32ToInt32(src, dest, fail);
// If x is in ]-1,0], ceil(x) is -0, which cannot be represented as an int32. // Fail if x > -1 and the sign bit is set.
loadConstantDouble(-1.0, scratch);
branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, src, scratch,
&lessThanOrEqualMinusOne);
vmovmskpd(src, dest);
branchTest32(Assembler::NonZero, dest, Imm32(1), fail);
if (HasSSE41()) { // x <= -1 or x > -0
bind(&lessThanOrEqualMinusOne); // Round toward +Infinity.
vroundsd(X86Encoding::RoundUp, src, scratch);
truncateDoubleToInt32(scratch, dest, fail); return;
}
// No SSE4.1
Label end;
// x >= 0 and x is not -0.0. We can truncate integer values, and truncate and // add 1 to non-integer values. This will also work for values >= INT_MAX + 1, // as the truncate operation will return INT_MIN and we'll fail.
truncateDoubleToInt32(src, dest, fail);
convertInt32ToDouble(dest, scratch);
branchDouble(Assembler::DoubleEqualOrUnordered, src, scratch, &end);
// Input is not integer-valued, add 1 to obtain the ceiling value.
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.56 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.