// Copyright 2015, VIXL authors // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of ARM Limited nor the names of its contributors may be // used to endorse or promote products derived from this software without // specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include"jit/arm64/vixl/Utils-vixl.h"
#include <cstdio>
namespace vixl {
// The default NaN values (for FPCR.DN=1). constdouble kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000)); constfloat kFP32DefaultNaN = RawbitsToFloat(0x7fc00000); const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00);
case FP_INFINITE: return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
case FP_SUBNORMAL: { // Calculate shift required to put mantissa into the most-significant bits // of the destination mantissa. int shift = CountLeadingZeros(mantissa << (32 - 10));
// Adjust the exponent for the shift applied, and rebias.
exponent = exponent - shift + (-15 + 127); break;
}
case FP_NAN: if (IsSignallingNaN(value)) { if (exception != NULL) {
*exception = true;
}
} if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
// Convert NaNs as the processor would: // - The sign is propagated. // - The payload (mantissa) is transferred entirely, except that the top // bit is forced to '1', making the result a quiet NaN. The unused // (low-order) payload bits are set to 0.
exponent = (1 << kFloatExponentBits) - 1;
// Increase bits in mantissa, making low-order bits 0.
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
mantissa |= 1 << 22; // Force a quiet NaN. break;
case FP_NORMAL: // Increase bits in mantissa, making low-order bits 0.
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
float FPToFloat(double value,
FPRounding round_mode,
UseDefaultNaN DN, bool* exception) { // Only the FPTieEven rounding mode is implemented.
VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
USE(round_mode);
switch (std::fpclassify(value)) { case FP_NAN: { if (IsSignallingNaN(value)) { if (exception != NULL) {
*exception = true;
}
} if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
// Convert NaNs as the processor would: // - The sign is propagated. // - The payload (mantissa) is transferred as much as possible, except // that the top bit is forced to '1', making the result a quiet NaN.
uint64_t raw = DoubleToRawbits(value);
case FP_ZERO: case FP_INFINITE: { // In a C++ cast, any value representable in the target type will be // unchanged. This is always the case for +/-0.0 and infinities. returnstatic_cast<float>(value);
}
case FP_NORMAL: case FP_SUBNORMAL: { // Convert double-to-float as the processor would, assuming that FPCR.FZ // (flush-to-zero) is not set.
uint64_t raw = DoubleToRawbits(value); // Extract the IEEE-754 double components.
uint32_t sign = raw >> 63; // Extract the exponent and remove the IEEE-754 encoding bias.
int32_t exponent = static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023; // Extract the mantissa and add the implicit '1' bit.
uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); if (std::fpclassify(value) == FP_NORMAL) {
mantissa |= (UINT64_C(1) << 52);
} return FPRoundToFloat(sign, exponent, mantissa, round_mode);
}
}
VIXL_UNREACHABLE(); return value;
}
// TODO: We should consider implementing a full FPToDouble(Float16) // conversion function (for performance reasons). double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) { // We can rely on implicit float to double conversion here. return FPToFloat(value, DN, exception);
}
double FPToDouble(float value, UseDefaultNaN DN, bool* exception) { switch (std::fpclassify(value)) { case FP_NAN: { if (IsSignallingNaN(value)) { if (exception != NULL) {
*exception = true;
}
} if (DN == kUseDefaultNaN) return kFP64DefaultNaN;
// Convert NaNs as the processor would: // - The sign is propagated. // - The payload (mantissa) is transferred entirely, except that the top // bit is forced to '1', making the result a quiet NaN. The unused // (low-order) payload bits are set to 0.
uint32_t raw = FloatToRawbits(value);
uint64_t sign = raw >> 31;
uint64_t exponent = (1 << 11) - 1;
uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
payload <<= (52 - 23); // The unused low-order bits should be 0.
payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
case FP_ZERO: case FP_NORMAL: case FP_SUBNORMAL: case FP_INFINITE: { // All other inputs are preserved in a standard cast, because every value // representable using an IEEE-754 float is also representable using an // IEEE-754 double. returnstatic_cast<double>(value);
}
}
switch (std::fpclassify(value)) { case FP_NAN: { if (IsSignallingNaN(value)) { if (exception != NULL) {
*exception = true;
}
} if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
// Convert NaNs as the processor would: // - The sign is propagated. // - The payload (mantissa) is transferred as much as possible, except // that the top bit is forced to '1', making the result a quiet NaN.
uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
: Float16ToRawbits(kFP16NegativeInfinity);
result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
result |= (1 << 9); // Force a quiet NaN; return RawbitsToFloat16(result);
}
case FP_ZERO: return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
case FP_INFINITE: return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
case FP_NORMAL: case FP_SUBNORMAL: { // Convert float-to-half as the processor would, assuming that FPCR.FZ // (flush-to-zero) is not set.
// Add the implicit '1' bit to the mantissa.
mantissa += (1 << 23); return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
}
}
VIXL_UNREACHABLE(); return kFP16PositiveZero;
}
Float16 FPToFloat16(double value,
FPRounding round_mode,
UseDefaultNaN DN, bool* exception) { // Only the FPTieEven rounding mode is implemented.
VIXL_ASSERT(round_mode == FPTieEven);
USE(round_mode);
switch (std::fpclassify(value)) { case FP_NAN: { if (IsSignallingNaN(value)) { if (exception != NULL) {
*exception = true;
}
} if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
// Convert NaNs as the processor would: // - The sign is propagated. // - The payload (mantissa) is transferred as much as possible, except // that the top bit is forced to '1', making the result a quiet NaN.
uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
: Float16ToRawbits(kFP16NegativeInfinity);
result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
result |= (1 << 9); // Force a quiet NaN; return RawbitsToFloat16(result);
}
case FP_ZERO: return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
case FP_INFINITE: return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; case FP_NORMAL: case FP_SUBNORMAL: { // Convert double-to-half as the processor would, assuming that FPCR.FZ // (flush-to-zero) is not set.
// Add the implicit '1' bit to the mantissa.
mantissa += (UINT64_C(1) << 52); return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.