/* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2021 Evan Nemerson <evan@nemerson.com>
*/
/* Portable version which should work on pretty much any compiler. * Obviously you can't rely on compiler support for things like * conversion to/from 32-bit floats, so make sure you always use the * functions and macros in this file! * * The portable implementations are (heavily) based on CC0 code by * Fabian Giesen: <https://gist.github.com/rygorous/2156668> (see also * <https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/>). * I have basically just modified it to get rid of some UB (lots of * aliasing, right shifting a negative value), use fixed-width types,
* and work in C. */ #define SIMDE_FLOAT16_API_PORTABLE 1 /* _Float16, per C standard (TS 18661-3;
* <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1945.pdf>). */ #define SIMDE_FLOAT16_API_FLOAT16 2 /* clang >= 6.0 supports __fp16 as an interchange format on all * targets, but only allows you to use them for arguments and return * values on targets which have defined an ABI. We get around the * restriction by wrapping the __fp16 in a struct, but we can't do * that on Arm since it would break compatibility with the NEON F16
* functions. */ #define SIMDE_FLOAT16_API_FP16_NO_ABI 3 /* This is basically __fp16 as specified by Arm, where arugments and
* return values are raw __fp16 values not structs. */ #define SIMDE_FLOAT16_API_FP16 4
/* Choosing an implementation. This is a bit rough, but I don't have * any ideas on how to improve it. If you do, patches are definitely
* welcome. */ #if !defined(SIMDE_FLOAT16_API) #if 0 && !defined(__cplusplus) /* I haven't found a way to detect this. It seems like defining * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then * checking for defined(FLT16_MAX) should work, but both gcc and * clang will define the constants even if _Float16 is not
* supported. Ideas welcome. */ #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 #elifdefined(__ARM_FP16_FORMAT_IEEE) && defined(SIMDE_ARM_NEON_FP16) #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 #elifdefined(__FLT16_MIN__) && (defined(__clang__) && (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))) #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI #else #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE #endif #endif
/* Reinterpret -- you *generally* shouldn't need these, they're really * intended for internal use. However, on x86 half-precision floats
* get stuffed into a __m128i/__m256i, so it may be useful. */
/* NOTE all the integer compares in this function cast the operands * to signed values to help compilers vectorize to SSE2, which lacks * unsigned comparison instructions. This is fine since all
* operands are below 0x80000000 (we clear the sign bit). */
if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */
f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */
} else { /* (De)normalized number or zero */ if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ /* use a magic value to align our 10 mantissa bits at the bottom of * the float. as long as FP addition is round-to-nearest-even this
* just works. */
f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic));
/* and one integer subtract of the bias later, we have our final float! */
f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic);
} else {
uint32_t mant_odd = (f32u >> 13) & 1;
/* update exponent, rounding bias part 1 */
f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); /* rounding bias part 2 */
f32u += mant_odd; /* take the bits! */
f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13);
}
}
f16u |= sign >> 16;
res = simde_uint16_as_float16(f16u); #endif
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.