4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck
, float32
)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck
, float64
)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1
, float32
)
153 GEN_INPUT_FLUSH1(float64_input_flush1
, float64
)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2
, float32
)
167 GEN_INPUT_FLUSH2(float64_input_flush2
, float64
)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3
, float32
)
182 GEN_INPUT_FLUSH3(float64_input_flush3
, float64
)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
215 # define QEMU_HARDFLOAT_USE_ISINF 0
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
235 static inline bool can_use_fpu(const float_status
*s
)
237 if (QEMU_NO_HARDFLOAT
) {
240 return likely(s
->float_exception_flags
& float_flag_inexact
&&
241 s
->float_rounding_mode
== float_round_nearest_even
);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
266 typedef bool (*f32_check_fn
)(union_float32 a
, union_float32 b
);
267 typedef bool (*f64_check_fn
)(union_float64 a
, union_float64 b
);
269 typedef float32 (*soft_f32_op2_fn
)(float32 a
, float32 b
, float_status
*s
);
270 typedef float64 (*soft_f64_op2_fn
)(float64 a
, float64 b
, float_status
*s
);
271 typedef float (*hard_f32_op2_fn
)(float a
, float b
);
272 typedef double (*hard_f64_op2_fn
)(double a
, double b
);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a
, union_float32 b
)
277 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
283 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
285 return float32_is_zero_or_normal(a
.s
) &&
286 float32_is_zero_or_normal(b
.s
);
289 static inline bool f64_is_zon2(union_float64 a
, union_float64 b
)
291 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
292 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
293 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
295 return float64_is_zero_or_normal(a
.s
) &&
296 float64_is_zero_or_normal(b
.s
);
299 /* 3-input is-zero-or-normal */
301 bool f32_is_zon3(union_float32 a
, union_float32 b
, union_float32 c
)
303 if (QEMU_HARDFLOAT_3F32_USE_FP
) {
304 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
305 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
306 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
308 return float32_is_zero_or_normal(a
.s
) &&
309 float32_is_zero_or_normal(b
.s
) &&
310 float32_is_zero_or_normal(c
.s
);
314 bool f64_is_zon3(union_float64 a
, union_float64 b
, union_float64 c
)
316 if (QEMU_HARDFLOAT_3F64_USE_FP
) {
317 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
318 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
319 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
321 return float64_is_zero_or_normal(a
.s
) &&
322 float64_is_zero_or_normal(b
.s
) &&
323 float64_is_zero_or_normal(c
.s
);
326 static inline bool f32_is_inf(union_float32 a
)
328 if (QEMU_HARDFLOAT_USE_ISINF
) {
331 return float32_is_infinity(a
.s
);
334 static inline bool f64_is_inf(union_float64 a
)
336 if (QEMU_HARDFLOAT_USE_ISINF
) {
339 return float64_is_infinity(a
.s
);
342 static inline float32
343 float32_gen2(float32 xa
, float32 xb
, float_status
*s
,
344 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
,
345 f32_check_fn pre
, f32_check_fn post
)
347 union_float32 ua
, ub
, ur
;
352 if (unlikely(!can_use_fpu(s
))) {
356 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
357 if (unlikely(!pre(ua
, ub
))) {
361 ur
.h
= hard(ua
.h
, ub
.h
);
362 if (unlikely(f32_is_inf(ur
))) {
363 s
->float_exception_flags
|= float_flag_overflow
;
364 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
) && post(ua
, ub
)) {
370 return soft(ua
.s
, ub
.s
, s
);
373 static inline float64
374 float64_gen2(float64 xa
, float64 xb
, float_status
*s
,
375 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
,
376 f64_check_fn pre
, f64_check_fn post
)
378 union_float64 ua
, ub
, ur
;
383 if (unlikely(!can_use_fpu(s
))) {
387 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
388 if (unlikely(!pre(ua
, ub
))) {
392 ur
.h
= hard(ua
.h
, ub
.h
);
393 if (unlikely(f64_is_inf(ur
))) {
394 s
->float_exception_flags
|= float_flag_overflow
;
395 } else if (unlikely(fabs(ur
.h
) <= DBL_MIN
) && post(ua
, ub
)) {
401 return soft(ua
.s
, ub
.s
, s
);
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
408 static inline uint32_t extractFloat32Frac(float32 a
)
410 return float32_val(a
) & 0x007FFFFF;
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
417 static inline int extractFloat32Exp(float32 a
)
419 return (float32_val(a
) >> 23) & 0xFF;
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
426 static inline bool extractFloat32Sign(float32 a
)
428 return float32_val(a
) >> 31;
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
435 static inline uint64_t extractFloat64Frac(float64 a
)
437 return float64_val(a
) & UINT64_C(0x000FFFFFFFFFFFFF);
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
444 static inline int extractFloat64Exp(float64 a
)
446 return (float64_val(a
) >> 52) & 0x7FF;
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
453 static inline bool extractFloat64Sign(float64 a
)
455 return float64_val(a
) >> 63;
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
463 typedef enum __attribute__ ((__packed__
)) {
464 float_class_unclassified
,
468 float_class_qnan
, /* all NaNs from here */
472 /* Simple helpers for checking if, or what kind of, NaN we have */
473 static inline __attribute__((unused
)) bool is_nan(FloatClass c
)
475 return unlikely(c
>= float_class_qnan
);
478 static inline __attribute__((unused
)) bool is_snan(FloatClass c
)
480 return c
== float_class_snan
;
483 static inline __attribute__((unused
)) bool is_qnan(FloatClass c
)
485 return c
== float_class_qnan
;
489 * Structure holding all of the decomposed parts of a float. The
490 * exponent is unbiased and the fraction is normalized. All
491 * calculations are done with a 64 bit fraction and then rounded as
492 * appropriate for the final format.
494 * Thanks to the packed FloatClass a decent compiler should be able to
495 * fit the whole structure into registers and avoid using the stack
496 * for parameter passing.
506 #define DECOMPOSED_BINARY_POINT (64 - 2)
507 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
508 #define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
510 /* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
517 * frac_lsb: least significant bit of fraction
518 * frac_lsbm1: the bit below the least significant bit (for rounding)
519 * round_mask/roundeven_mask: masks used for rounding
520 * The following optional modifiers are available:
521 * arm_althp: handle ARM Alternative Half Precision
532 uint64_t roundeven_mask
;
536 /* Expand fields based on the size of exponent and fraction */
537 #define FLOAT_PARAMS(E, F) \
539 .exp_bias = ((1 << E) - 1) >> 1, \
540 .exp_max = (1 << E) - 1, \
542 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
543 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
544 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
545 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
546 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
548 static const FloatFmt float16_params
= {
552 static const FloatFmt float16_params_ahp
= {
557 static const FloatFmt bfloat16_params
= {
561 static const FloatFmt float32_params
= {
565 static const FloatFmt float64_params
= {
569 /* Unpack a float to parts, but do not canonicalize. */
570 static inline FloatParts
unpack_raw(FloatFmt fmt
, uint64_t raw
)
572 const int sign_pos
= fmt
.frac_size
+ fmt
.exp_size
;
574 return (FloatParts
) {
575 .cls
= float_class_unclassified
,
576 .sign
= extract64(raw
, sign_pos
, 1),
577 .exp
= extract64(raw
, fmt
.frac_size
, fmt
.exp_size
),
578 .frac
= extract64(raw
, 0, fmt
.frac_size
),
582 static inline FloatParts
float16_unpack_raw(float16 f
)
584 return unpack_raw(float16_params
, f
);
587 static inline FloatParts
bfloat16_unpack_raw(bfloat16 f
)
589 return unpack_raw(bfloat16_params
, f
);
592 static inline FloatParts
float32_unpack_raw(float32 f
)
594 return unpack_raw(float32_params
, f
);
597 static inline FloatParts
float64_unpack_raw(float64 f
)
599 return unpack_raw(float64_params
, f
);
602 /* Pack a float from parts, but do not canonicalize. */
603 static inline uint64_t pack_raw(FloatFmt fmt
, FloatParts p
)
605 const int sign_pos
= fmt
.frac_size
+ fmt
.exp_size
;
606 uint64_t ret
= deposit64(p
.frac
, fmt
.frac_size
, fmt
.exp_size
, p
.exp
);
607 return deposit64(ret
, sign_pos
, 1, p
.sign
);
610 static inline float16
float16_pack_raw(FloatParts p
)
612 return make_float16(pack_raw(float16_params
, p
));
615 static inline bfloat16
bfloat16_pack_raw(FloatParts p
)
617 return pack_raw(bfloat16_params
, p
);
620 static inline float32
float32_pack_raw(FloatParts p
)
622 return make_float32(pack_raw(float32_params
, p
));
625 static inline float64
float64_pack_raw(FloatParts p
)
627 return make_float64(pack_raw(float64_params
, p
));
630 /*----------------------------------------------------------------------------
631 | Functions and definitions to determine: (1) whether tininess for underflow
632 | is detected before or after rounding by default, (2) what (if anything)
633 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
634 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
635 | are propagated from function inputs to output. These details are target-
637 *----------------------------------------------------------------------------*/
638 #include "softfloat-specialize.c.inc"
640 /* Canonicalize EXP and FRAC, setting CLS. */
641 static FloatParts
sf_canonicalize(FloatParts part
, const FloatFmt
*parm
,
642 float_status
*status
)
644 if (part
.exp
== parm
->exp_max
&& !parm
->arm_althp
) {
645 if (part
.frac
== 0) {
646 part
.cls
= float_class_inf
;
648 part
.frac
<<= parm
->frac_shift
;
649 part
.cls
= (parts_is_snan_frac(part
.frac
, status
)
650 ? float_class_snan
: float_class_qnan
);
652 } else if (part
.exp
== 0) {
653 if (likely(part
.frac
== 0)) {
654 part
.cls
= float_class_zero
;
655 } else if (status
->flush_inputs_to_zero
) {
656 float_raise(float_flag_input_denormal
, status
);
657 part
.cls
= float_class_zero
;
660 int shift
= clz64(part
.frac
) - 1;
661 part
.cls
= float_class_normal
;
662 part
.exp
= parm
->frac_shift
- parm
->exp_bias
- shift
+ 1;
666 part
.cls
= float_class_normal
;
667 part
.exp
-= parm
->exp_bias
;
668 part
.frac
= DECOMPOSED_IMPLICIT_BIT
+ (part
.frac
<< parm
->frac_shift
);
673 /* Round and uncanonicalize a floating-point number by parts. There
674 * are FRAC_SHIFT bits that may require rounding at the bottom of the
675 * fraction; these bits will be removed. The exponent will be biased
676 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
679 static FloatParts
round_canonical(FloatParts p
, float_status
*s
,
680 const FloatFmt
*parm
)
682 const uint64_t frac_lsb
= parm
->frac_lsb
;
683 const uint64_t frac_lsbm1
= parm
->frac_lsbm1
;
684 const uint64_t round_mask
= parm
->round_mask
;
685 const uint64_t roundeven_mask
= parm
->roundeven_mask
;
686 const int exp_max
= parm
->exp_max
;
687 const int frac_shift
= parm
->frac_shift
;
696 case float_class_normal
:
697 switch (s
->float_rounding_mode
) {
698 case float_round_nearest_even
:
699 overflow_norm
= false;
700 inc
= ((frac
& roundeven_mask
) != frac_lsbm1 ? frac_lsbm1
: 0);
702 case float_round_ties_away
:
703 overflow_norm
= false;
706 case float_round_to_zero
:
707 overflow_norm
= true;
711 inc
= p
.sign ?
0 : round_mask
;
712 overflow_norm
= p
.sign
;
714 case float_round_down
:
715 inc
= p
.sign ? round_mask
: 0;
716 overflow_norm
= !p
.sign
;
718 case float_round_to_odd
:
719 overflow_norm
= true;
720 inc
= frac
& frac_lsb ?
0 : round_mask
;
723 g_assert_not_reached();
726 exp
+= parm
->exp_bias
;
727 if (likely(exp
> 0)) {
728 if (frac
& round_mask
) {
729 flags
|= float_flag_inexact
;
731 if (frac
& DECOMPOSED_OVERFLOW_BIT
) {
738 if (parm
->arm_althp
) {
739 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
740 if (unlikely(exp
> exp_max
)) {
741 /* Overflow. Return the maximum normal. */
742 flags
= float_flag_invalid
;
746 } else if (unlikely(exp
>= exp_max
)) {
747 flags
|= float_flag_overflow
| float_flag_inexact
;
752 p
.cls
= float_class_inf
;
756 } else if (s
->flush_to_zero
) {
757 flags
|= float_flag_output_denormal
;
758 p
.cls
= float_class_zero
;
761 bool is_tiny
= s
->tininess_before_rounding
763 || !((frac
+ inc
) & DECOMPOSED_OVERFLOW_BIT
);
765 shift64RightJamming(frac
, 1 - exp
, &frac
);
766 if (frac
& round_mask
) {
767 /* Need to recompute round-to-even. */
768 switch (s
->float_rounding_mode
) {
769 case float_round_nearest_even
:
770 inc
= ((frac
& roundeven_mask
) != frac_lsbm1
773 case float_round_to_odd
:
774 inc
= frac
& frac_lsb ?
0 : round_mask
;
779 flags
|= float_flag_inexact
;
783 exp
= (frac
& DECOMPOSED_IMPLICIT_BIT ?
1 : 0);
786 if (is_tiny
&& (flags
& float_flag_inexact
)) {
787 flags
|= float_flag_underflow
;
789 if (exp
== 0 && frac
== 0) {
790 p
.cls
= float_class_zero
;
795 case float_class_zero
:
801 case float_class_inf
:
803 assert(!parm
->arm_althp
);
808 case float_class_qnan
:
809 case float_class_snan
:
810 assert(!parm
->arm_althp
);
812 frac
>>= parm
->frac_shift
;
816 g_assert_not_reached();
819 float_raise(flags
, s
);
825 /* Explicit FloatFmt version */
826 static FloatParts
float16a_unpack_canonical(float16 f
, float_status
*s
,
827 const FloatFmt
*params
)
829 return sf_canonicalize(float16_unpack_raw(f
), params
, s
);
832 static FloatParts
float16_unpack_canonical(float16 f
, float_status
*s
)
834 return float16a_unpack_canonical(f
, s
, &float16_params
);
837 static FloatParts
bfloat16_unpack_canonical(bfloat16 f
, float_status
*s
)
839 return sf_canonicalize(bfloat16_unpack_raw(f
), &bfloat16_params
, s
);
842 static float16
float16a_round_pack_canonical(FloatParts p
, float_status
*s
,
843 const FloatFmt
*params
)
845 return float16_pack_raw(round_canonical(p
, s
, params
));
848 static float16
float16_round_pack_canonical(FloatParts p
, float_status
*s
)
850 return float16a_round_pack_canonical(p
, s
, &float16_params
);
853 static bfloat16
bfloat16_round_pack_canonical(FloatParts p
, float_status
*s
)
855 return bfloat16_pack_raw(round_canonical(p
, s
, &bfloat16_params
));
858 static FloatParts
float32_unpack_canonical(float32 f
, float_status
*s
)
860 return sf_canonicalize(float32_unpack_raw(f
), &float32_params
, s
);
863 static float32
float32_round_pack_canonical(FloatParts p
, float_status
*s
)
865 return float32_pack_raw(round_canonical(p
, s
, &float32_params
));
868 static FloatParts
float64_unpack_canonical(float64 f
, float_status
*s
)
870 return sf_canonicalize(float64_unpack_raw(f
), &float64_params
, s
);
873 static float64
float64_round_pack_canonical(FloatParts p
, float_status
*s
)
875 return float64_pack_raw(round_canonical(p
, s
, &float64_params
));
878 static FloatParts
return_nan(FloatParts a
, float_status
*s
)
881 case float_class_snan
:
882 s
->float_exception_flags
|= float_flag_invalid
;
883 a
= parts_silence_nan(a
, s
);
885 case float_class_qnan
:
886 if (s
->default_nan_mode
) {
887 return parts_default_nan(s
);
892 g_assert_not_reached();
897 static FloatParts
pick_nan(FloatParts a
, FloatParts b
, float_status
*s
)
899 if (is_snan(a
.cls
) || is_snan(b
.cls
)) {
900 s
->float_exception_flags
|= float_flag_invalid
;
903 if (s
->default_nan_mode
) {
904 return parts_default_nan(s
);
906 if (pickNaN(a
.cls
, b
.cls
,
908 (a
.frac
== b
.frac
&& a
.sign
< b
.sign
), s
)) {
911 if (is_snan(a
.cls
)) {
912 return parts_silence_nan(a
, s
);
918 static FloatParts
pick_nan_muladd(FloatParts a
, FloatParts b
, FloatParts c
,
919 bool inf_zero
, float_status
*s
)
923 if (is_snan(a
.cls
) || is_snan(b
.cls
) || is_snan(c
.cls
)) {
924 s
->float_exception_flags
|= float_flag_invalid
;
927 which
= pickNaNMulAdd(a
.cls
, b
.cls
, c
.cls
, inf_zero
, s
);
929 if (s
->default_nan_mode
) {
930 /* Note that this check is after pickNaNMulAdd so that function
931 * has an opportunity to set the Invalid flag.
946 return parts_default_nan(s
);
948 g_assert_not_reached();
951 if (is_snan(a
.cls
)) {
952 return parts_silence_nan(a
, s
);
958 * Returns the result of adding or subtracting the values of the
959 * floating-point values `a' and `b'. The operation is performed
960 * according to the IEC/IEEE Standard for Binary Floating-Point
964 static FloatParts
addsub_floats(FloatParts a
, FloatParts b
, bool subtract
,
967 bool a_sign
= a
.sign
;
968 bool b_sign
= b
.sign
^ subtract
;
970 if (a_sign
!= b_sign
) {
973 if (a
.cls
== float_class_normal
&& b
.cls
== float_class_normal
) {
974 if (a
.exp
> b
.exp
|| (a
.exp
== b
.exp
&& a
.frac
>= b
.frac
)) {
975 shift64RightJamming(b
.frac
, a
.exp
- b
.exp
, &b
.frac
);
976 a
.frac
= a
.frac
- b
.frac
;
978 shift64RightJamming(a
.frac
, b
.exp
- a
.exp
, &a
.frac
);
979 a
.frac
= b
.frac
- a
.frac
;
985 a
.cls
= float_class_zero
;
986 a
.sign
= s
->float_rounding_mode
== float_round_down
;
988 int shift
= clz64(a
.frac
) - 1;
989 a
.frac
= a
.frac
<< shift
;
990 a
.exp
= a
.exp
- shift
;
995 if (is_nan(a
.cls
) || is_nan(b
.cls
)) {
996 return pick_nan(a
, b
, s
);
998 if (a
.cls
== float_class_inf
) {
999 if (b
.cls
== float_class_inf
) {
1000 float_raise(float_flag_invalid
, s
);
1001 return parts_default_nan(s
);
1005 if (a
.cls
== float_class_zero
&& b
.cls
== float_class_zero
) {
1006 a
.sign
= s
->float_rounding_mode
== float_round_down
;
1009 if (a
.cls
== float_class_zero
|| b
.cls
== float_class_inf
) {
1010 b
.sign
= a_sign
^ 1;
1013 if (b
.cls
== float_class_zero
) {
1018 if (a
.cls
== float_class_normal
&& b
.cls
== float_class_normal
) {
1019 if (a
.exp
> b
.exp
) {
1020 shift64RightJamming(b
.frac
, a
.exp
- b
.exp
, &b
.frac
);
1021 } else if (a
.exp
< b
.exp
) {
1022 shift64RightJamming(a
.frac
, b
.exp
- a
.exp
, &a
.frac
);
1026 if (a
.frac
& DECOMPOSED_OVERFLOW_BIT
) {
1027 shift64RightJamming(a
.frac
, 1, &a
.frac
);
1032 if (is_nan(a
.cls
) || is_nan(b
.cls
)) {
1033 return pick_nan(a
, b
, s
);
1035 if (a
.cls
== float_class_inf
|| b
.cls
== float_class_zero
) {
1038 if (b
.cls
== float_class_inf
|| a
.cls
== float_class_zero
) {
1043 g_assert_not_reached();
1047 * Returns the result of adding or subtracting the floating-point
1048 * values `a' and `b'. The operation is performed according to the
1049 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1052 float16 QEMU_FLATTEN
float16_add(float16 a
, float16 b
, float_status
*status
)
1054 FloatParts pa
= float16_unpack_canonical(a
, status
);
1055 FloatParts pb
= float16_unpack_canonical(b
, status
);
1056 FloatParts pr
= addsub_floats(pa
, pb
, false, status
);
1058 return float16_round_pack_canonical(pr
, status
);
1061 float16 QEMU_FLATTEN
float16_sub(float16 a
, float16 b
, float_status
*status
)
1063 FloatParts pa
= float16_unpack_canonical(a
, status
);
1064 FloatParts pb
= float16_unpack_canonical(b
, status
);
1065 FloatParts pr
= addsub_floats(pa
, pb
, true, status
);
1067 return float16_round_pack_canonical(pr
, status
);
1070 static float32 QEMU_SOFTFLOAT_ATTR
1071 soft_f32_addsub(float32 a
, float32 b
, bool subtract
, float_status
*status
)
1073 FloatParts pa
= float32_unpack_canonical(a
, status
);
1074 FloatParts pb
= float32_unpack_canonical(b
, status
);
1075 FloatParts pr
= addsub_floats(pa
, pb
, subtract
, status
);
1077 return float32_round_pack_canonical(pr
, status
);
1080 static inline float32
soft_f32_add(float32 a
, float32 b
, float_status
*status
)
1082 return soft_f32_addsub(a
, b
, false, status
);
1085 static inline float32
soft_f32_sub(float32 a
, float32 b
, float_status
*status
)
1087 return soft_f32_addsub(a
, b
, true, status
);
1090 static float64 QEMU_SOFTFLOAT_ATTR
1091 soft_f64_addsub(float64 a
, float64 b
, bool subtract
, float_status
*status
)
1093 FloatParts pa
= float64_unpack_canonical(a
, status
);
1094 FloatParts pb
= float64_unpack_canonical(b
, status
);
1095 FloatParts pr
= addsub_floats(pa
, pb
, subtract
, status
);
1097 return float64_round_pack_canonical(pr
, status
);
1100 static inline float64
soft_f64_add(float64 a
, float64 b
, float_status
*status
)
1102 return soft_f64_addsub(a
, b
, false, status
);
1105 static inline float64
soft_f64_sub(float64 a
, float64 b
, float_status
*status
)
1107 return soft_f64_addsub(a
, b
, true, status
);
1110 static float hard_f32_add(float a
, float b
)
1115 static float hard_f32_sub(float a
, float b
)
1120 static double hard_f64_add(double a
, double b
)
1125 static double hard_f64_sub(double a
, double b
)
1130 static bool f32_addsubmul_post(union_float32 a
, union_float32 b
)
1132 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1133 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1135 return !(float32_is_zero(a
.s
) && float32_is_zero(b
.s
));
1138 static bool f64_addsubmul_post(union_float64 a
, union_float64 b
)
1140 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1141 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1143 return !(float64_is_zero(a
.s
) && float64_is_zero(b
.s
));
1147 static float32
float32_addsub(float32 a
, float32 b
, float_status
*s
,
1148 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
)
1150 return float32_gen2(a
, b
, s
, hard
, soft
,
1151 f32_is_zon2
, f32_addsubmul_post
);
1154 static float64
float64_addsub(float64 a
, float64 b
, float_status
*s
,
1155 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
)
1157 return float64_gen2(a
, b
, s
, hard
, soft
,
1158 f64_is_zon2
, f64_addsubmul_post
);
1161 float32 QEMU_FLATTEN
1162 float32_add(float32 a
, float32 b
, float_status
*s
)
1164 return float32_addsub(a
, b
, s
, hard_f32_add
, soft_f32_add
);
1167 float32 QEMU_FLATTEN
1168 float32_sub(float32 a
, float32 b
, float_status
*s
)
1170 return float32_addsub(a
, b
, s
, hard_f32_sub
, soft_f32_sub
);
1173 float64 QEMU_FLATTEN
1174 float64_add(float64 a
, float64 b
, float_status
*s
)
1176 return float64_addsub(a
, b
, s
, hard_f64_add
, soft_f64_add
);
1179 float64 QEMU_FLATTEN
1180 float64_sub(float64 a
, float64 b
, float_status
*s
)
1182 return float64_addsub(a
, b
, s
, hard_f64_sub
, soft_f64_sub
);
1186 * Returns the result of adding or subtracting the bfloat16
1187 * values `a' and `b'.
1189 bfloat16 QEMU_FLATTEN
bfloat16_add(bfloat16 a
, bfloat16 b
, float_status
*status
)
1191 FloatParts pa
= bfloat16_unpack_canonical(a
, status
);
1192 FloatParts pb
= bfloat16_unpack_canonical(b
, status
);
1193 FloatParts pr
= addsub_floats(pa
, pb
, false, status
);
1195 return bfloat16_round_pack_canonical(pr
, status
);
1198 bfloat16 QEMU_FLATTEN
bfloat16_sub(bfloat16 a
, bfloat16 b
, float_status
*status
)
1200 FloatParts pa
= bfloat16_unpack_canonical(a
, status
);
1201 FloatParts pb
= bfloat16_unpack_canonical(b
, status
);
1202 FloatParts pr
= addsub_floats(pa
, pb
, true, status
);
1204 return bfloat16_round_pack_canonical(pr
, status
);
1208 * Returns the result of multiplying the floating-point values `a' and
1209 * `b'. The operation is performed according to the IEC/IEEE Standard
1210 * for Binary Floating-Point Arithmetic.
1213 static FloatParts
mul_floats(FloatParts a
, FloatParts b
, float_status
*s
)
1215 bool sign
= a
.sign
^ b
.sign
;
1217 if (a
.cls
== float_class_normal
&& b
.cls
== float_class_normal
) {
1219 int exp
= a
.exp
+ b
.exp
;
1221 mul64To128(a
.frac
, b
.frac
, &hi
, &lo
);
1222 shift128RightJamming(hi
, lo
, DECOMPOSED_BINARY_POINT
, &hi
, &lo
);
1223 if (lo
& DECOMPOSED_OVERFLOW_BIT
) {
1224 shift64RightJamming(lo
, 1, &lo
);
1234 /* handle all the NaN cases */
1235 if (is_nan(a
.cls
) || is_nan(b
.cls
)) {
1236 return pick_nan(a
, b
, s
);
1238 /* Inf * Zero == NaN */
1239 if ((a
.cls
== float_class_inf
&& b
.cls
== float_class_zero
) ||
1240 (a
.cls
== float_class_zero
&& b
.cls
== float_class_inf
)) {
1241 s
->float_exception_flags
|= float_flag_invalid
;
1242 return parts_default_nan(s
);
1244 /* Multiply by 0 or Inf */
1245 if (a
.cls
== float_class_inf
|| a
.cls
== float_class_zero
) {
1249 if (b
.cls
== float_class_inf
|| b
.cls
== float_class_zero
) {
1253 g_assert_not_reached();
1256 float16 QEMU_FLATTEN
float16_mul(float16 a
, float16 b
, float_status
*status
)
1258 FloatParts pa
= float16_unpack_canonical(a
, status
);
1259 FloatParts pb
= float16_unpack_canonical(b
, status
);
1260 FloatParts pr
= mul_floats(pa
, pb
, status
);
1262 return float16_round_pack_canonical(pr
, status
);
1265 static float32 QEMU_SOFTFLOAT_ATTR
1266 soft_f32_mul(float32 a
, float32 b
, float_status
*status
)
1268 FloatParts pa
= float32_unpack_canonical(a
, status
);
1269 FloatParts pb
= float32_unpack_canonical(b
, status
);
1270 FloatParts pr
= mul_floats(pa
, pb
, status
);
1272 return float32_round_pack_canonical(pr
, status
);
1275 static float64 QEMU_SOFTFLOAT_ATTR
1276 soft_f64_mul(float64 a
, float64 b
, float_status
*status
)
1278 FloatParts pa
= float64_unpack_canonical(a
, status
);
1279 FloatParts pb
= float64_unpack_canonical(b
, status
);
1280 FloatParts pr
= mul_floats(pa
, pb
, status
);
1282 return float64_round_pack_canonical(pr
, status
);
1285 static float hard_f32_mul(float a
, float b
)
1290 static double hard_f64_mul(double a
, double b
)
1295 float32 QEMU_FLATTEN
1296 float32_mul(float32 a
, float32 b
, float_status
*s
)
1298 return float32_gen2(a
, b
, s
, hard_f32_mul
, soft_f32_mul
,
1299 f32_is_zon2
, f32_addsubmul_post
);
1302 float64 QEMU_FLATTEN
1303 float64_mul(float64 a
, float64 b
, float_status
*s
)
1305 return float64_gen2(a
, b
, s
, hard_f64_mul
, soft_f64_mul
,
1306 f64_is_zon2
, f64_addsubmul_post
);
1310 * Returns the result of multiplying the bfloat16
1311 * values `a' and `b'.
1314 bfloat16 QEMU_FLATTEN
bfloat16_mul(bfloat16 a
, bfloat16 b
, float_status
*status
)
1316 FloatParts pa
= bfloat16_unpack_canonical(a
, status
);
1317 FloatParts pb
= bfloat16_unpack_canonical(b
, status
);
1318 FloatParts pr
= mul_floats(pa
, pb
, status
);
1320 return bfloat16_round_pack_canonical(pr
, status
);
1324 * Returns the result of multiplying the floating-point values `a' and
1325 * `b' then adding 'c', with no intermediate rounding step after the
1326 * multiplication. The operation is performed according to the
1327 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1328 * The flags argument allows the caller to select negation of the
1329 * addend, the intermediate product, or the final result. (The
1330 * difference between this and having the caller do a separate
1331 * negation is that negating externally will flip the sign bit on
1335 static FloatParts
muladd_floats(FloatParts a
, FloatParts b
, FloatParts c
,
1336 int flags
, float_status
*s
)
1338 bool inf_zero
= ((1 << a
.cls
) | (1 << b
.cls
)) ==
1339 ((1 << float_class_inf
) | (1 << float_class_zero
));
1341 bool sign_flip
= flags
& float_muladd_negate_result
;
1346 /* It is implementation-defined whether the cases of (0,inf,qnan)
1347 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1348 * they return if they do), so we have to hand this information
1349 * off to the target-specific pick-a-NaN routine.
1351 if (is_nan(a
.cls
) || is_nan(b
.cls
) || is_nan(c
.cls
)) {
1352 return pick_nan_muladd(a
, b
, c
, inf_zero
, s
);
1356 s
->float_exception_flags
|= float_flag_invalid
;
1357 return parts_default_nan(s
);
1360 if (flags
& float_muladd_negate_c
) {
1364 p_sign
= a
.sign
^ b
.sign
;
1366 if (flags
& float_muladd_negate_product
) {
1370 if (a
.cls
== float_class_inf
|| b
.cls
== float_class_inf
) {
1371 p_class
= float_class_inf
;
1372 } else if (a
.cls
== float_class_zero
|| b
.cls
== float_class_zero
) {
1373 p_class
= float_class_zero
;
1375 p_class
= float_class_normal
;
1378 if (c
.cls
== float_class_inf
) {
1379 if (p_class
== float_class_inf
&& p_sign
!= c
.sign
) {
1380 s
->float_exception_flags
|= float_flag_invalid
;
1381 return parts_default_nan(s
);
1383 a
.cls
= float_class_inf
;
1384 a
.sign
= c
.sign
^ sign_flip
;
1389 if (p_class
== float_class_inf
) {
1390 a
.cls
= float_class_inf
;
1391 a
.sign
= p_sign
^ sign_flip
;
1395 if (p_class
== float_class_zero
) {
1396 if (c
.cls
== float_class_zero
) {
1397 if (p_sign
!= c
.sign
) {
1398 p_sign
= s
->float_rounding_mode
== float_round_down
;
1401 } else if (flags
& float_muladd_halve_result
) {
1404 c
.sign
^= sign_flip
;
1408 /* a & b should be normals now... */
1409 assert(a
.cls
== float_class_normal
&&
1410 b
.cls
== float_class_normal
);
1412 p_exp
= a
.exp
+ b
.exp
;
1414 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1417 mul64To128(a
.frac
, b
.frac
, &hi
, &lo
);
1418 /* binary point now at bit 124 */
1420 /* check for overflow */
1421 if (hi
& (1ULL << (DECOMPOSED_BINARY_POINT
* 2 + 1 - 64))) {
1422 shift128RightJamming(hi
, lo
, 1, &hi
, &lo
);
1427 if (c
.cls
== float_class_zero
) {
1428 /* move binary point back to 62 */
1429 shift128RightJamming(hi
, lo
, DECOMPOSED_BINARY_POINT
, &hi
, &lo
);
1431 int exp_diff
= p_exp
- c
.exp
;
1432 if (p_sign
== c
.sign
) {
1434 if (exp_diff
<= 0) {
1435 shift128RightJamming(hi
, lo
,
1436 DECOMPOSED_BINARY_POINT
- exp_diff
,
1441 uint64_t c_hi
, c_lo
;
1442 /* shift c to the same binary point as the product (124) */
1445 shift128RightJamming(c_hi
, c_lo
,
1448 add128(hi
, lo
, c_hi
, c_lo
, &hi
, &lo
);
1449 /* move binary point back to 62 */
1450 shift128RightJamming(hi
, lo
, DECOMPOSED_BINARY_POINT
, &hi
, &lo
);
1453 if (lo
& DECOMPOSED_OVERFLOW_BIT
) {
1454 shift64RightJamming(lo
, 1, &lo
);
1460 uint64_t c_hi
, c_lo
;
1461 /* make C binary point match product at bit 124 */
1465 if (exp_diff
<= 0) {
1466 shift128RightJamming(hi
, lo
, -exp_diff
, &hi
, &lo
);
1469 (hi
> c_hi
|| (hi
== c_hi
&& lo
>= c_lo
))) {
1470 sub128(hi
, lo
, c_hi
, c_lo
, &hi
, &lo
);
1472 sub128(c_hi
, c_lo
, hi
, lo
, &hi
, &lo
);
1477 shift128RightJamming(c_hi
, c_lo
,
1480 sub128(hi
, lo
, c_hi
, c_lo
, &hi
, &lo
);
1483 if (hi
== 0 && lo
== 0) {
1484 a
.cls
= float_class_zero
;
1485 a
.sign
= s
->float_rounding_mode
== float_round_down
;
1486 a
.sign
^= sign_flip
;
1493 shift
= clz64(lo
) + 64;
1495 /* Normalizing to a binary point of 124 is the
1496 correct adjust for the exponent. However since we're
1497 shifting, we might as well put the binary point back
1498 at 62 where we really want it. Therefore shift as
1499 if we're leaving 1 bit at the top of the word, but
1500 adjust the exponent as if we're leaving 3 bits. */
1503 lo
= lo
<< (shift
- 64);
1505 hi
= (hi
<< shift
) | (lo
>> (64 - shift
));
1506 lo
= hi
| ((lo
<< shift
) != 0);
1513 if (flags
& float_muladd_halve_result
) {
1517 /* finally prepare our result */
1518 a
.cls
= float_class_normal
;
1519 a
.sign
= p_sign
^ sign_flip
;
1526 float16 QEMU_FLATTEN
float16_muladd(float16 a
, float16 b
, float16 c
,
1527 int flags
, float_status
*status
)
1529 FloatParts pa
= float16_unpack_canonical(a
, status
);
1530 FloatParts pb
= float16_unpack_canonical(b
, status
);
1531 FloatParts pc
= float16_unpack_canonical(c
, status
);
1532 FloatParts pr
= muladd_floats(pa
, pb
, pc
, flags
, status
);
1534 return float16_round_pack_canonical(pr
, status
);
1537 static float32 QEMU_SOFTFLOAT_ATTR
1538 soft_f32_muladd(float32 a
, float32 b
, float32 c
, int flags
,
1539 float_status
*status
)
1541 FloatParts pa
= float32_unpack_canonical(a
, status
);
1542 FloatParts pb
= float32_unpack_canonical(b
, status
);
1543 FloatParts pc
= float32_unpack_canonical(c
, status
);
1544 FloatParts pr
= muladd_floats(pa
, pb
, pc
, flags
, status
);
1546 return float32_round_pack_canonical(pr
, status
);
1549 static float64 QEMU_SOFTFLOAT_ATTR
1550 soft_f64_muladd(float64 a
, float64 b
, float64 c
, int flags
,
1551 float_status
*status
)
1553 FloatParts pa
= float64_unpack_canonical(a
, status
);
1554 FloatParts pb
= float64_unpack_canonical(b
, status
);
1555 FloatParts pc
= float64_unpack_canonical(c
, status
);
1556 FloatParts pr
= muladd_floats(pa
, pb
, pc
, flags
, status
);
1558 return float64_round_pack_canonical(pr
, status
);
1561 static bool force_soft_fma
;
1563 float32 QEMU_FLATTEN
1564 float32_muladd(float32 xa
, float32 xb
, float32 xc
, int flags
, float_status
*s
)
1566 union_float32 ua
, ub
, uc
, ur
;
1572 if (unlikely(!can_use_fpu(s
))) {
1575 if (unlikely(flags
& float_muladd_halve_result
)) {
1579 float32_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
1580 if (unlikely(!f32_is_zon3(ua
, ub
, uc
))) {
1584 if (unlikely(force_soft_fma
)) {
1589 * When (a || b) == 0, there's no need to check for under/over flow,
1590 * since we know the addend is (normal || 0) and the product is 0.
1592 if (float32_is_zero(ua
.s
) || float32_is_zero(ub
.s
)) {
1596 prod_sign
= float32_is_neg(ua
.s
) ^ float32_is_neg(ub
.s
);
1597 prod_sign
^= !!(flags
& float_muladd_negate_product
);
1598 up
.s
= float32_set_sign(float32_zero
, prod_sign
);
1600 if (flags
& float_muladd_negate_c
) {
1605 union_float32 ua_orig
= ua
;
1606 union_float32 uc_orig
= uc
;
1608 if (flags
& float_muladd_negate_product
) {
1611 if (flags
& float_muladd_negate_c
) {
1615 ur
.h
= fmaf(ua
.h
, ub
.h
, uc
.h
);
1617 if (unlikely(f32_is_inf(ur
))) {
1618 s
->float_exception_flags
|= float_flag_overflow
;
1619 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
)) {
1625 if (flags
& float_muladd_negate_result
) {
1626 return float32_chs(ur
.s
);
1631 return soft_f32_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
1634 float64 QEMU_FLATTEN
1635 float64_muladd(float64 xa
, float64 xb
, float64 xc
, int flags
, float_status
*s
)
1637 union_float64 ua
, ub
, uc
, ur
;
1643 if (unlikely(!can_use_fpu(s
))) {
1646 if (unlikely(flags
& float_muladd_halve_result
)) {
1650 float64_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
1651 if (unlikely(!f64_is_zon3(ua
, ub
, uc
))) {
1655 if (unlikely(force_soft_fma
)) {
1660 * When (a || b) == 0, there's no need to check for under/over flow,
1661 * since we know the addend is (normal || 0) and the product is 0.
1663 if (float64_is_zero(ua
.s
) || float64_is_zero(ub
.s
)) {
1667 prod_sign
= float64_is_neg(ua
.s
) ^ float64_is_neg(ub
.s
);
1668 prod_sign
^= !!(flags
& float_muladd_negate_product
);
1669 up
.s
= float64_set_sign(float64_zero
, prod_sign
);
1671 if (flags
& float_muladd_negate_c
) {
1676 union_float64 ua_orig
= ua
;
1677 union_float64 uc_orig
= uc
;
1679 if (flags
& float_muladd_negate_product
) {
1682 if (flags
& float_muladd_negate_c
) {
1686 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
1688 if (unlikely(f64_is_inf(ur
))) {
1689 s
->float_exception_flags
|= float_flag_overflow
;
1690 } else if (unlikely(fabs(ur
.h
) <= FLT_MIN
)) {
1696 if (flags
& float_muladd_negate_result
) {
1697 return float64_chs(ur
.s
);
1702 return soft_f64_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
1706 * Returns the result of multiplying the bfloat16 values `a'
1707 * and `b' then adding 'c', with no intermediate rounding step after the
1711 bfloat16 QEMU_FLATTEN
bfloat16_muladd(bfloat16 a
, bfloat16 b
, bfloat16 c
,
1712 int flags
, float_status
*status
)
1714 FloatParts pa
= bfloat16_unpack_canonical(a
, status
);
1715 FloatParts pb
= bfloat16_unpack_canonical(b
, status
);
1716 FloatParts pc
= bfloat16_unpack_canonical(c
, status
);
1717 FloatParts pr
= muladd_floats(pa
, pb
, pc
, flags
, status
);
1719 return bfloat16_round_pack_canonical(pr
, status
);
1723 * Returns the result of dividing the floating-point value `a' by the
1724 * corresponding value `b'. The operation is performed according to
1725 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1728 static FloatParts
div_floats(FloatParts a
, FloatParts b
, float_status
*s
)
1730 bool sign
= a
.sign
^ b
.sign
;
1732 if (a
.cls
== float_class_normal
&& b
.cls
== float_class_normal
) {
1733 uint64_t n0
, n1
, q
, r
;
1734 int exp
= a
.exp
- b
.exp
;
1737 * We want a 2*N / N-bit division to produce exactly an N-bit
1738 * result, so that we do not lose any precision and so that we
1739 * do not have to renormalize afterward. If A.frac < B.frac,
1740 * then division would produce an (N-1)-bit result; shift A left
1741 * by one to produce the an N-bit result, and decrement the
1742 * exponent to match.
1744 * The udiv_qrnnd algorithm that we're using requires normalization,
1745 * i.e. the msb of the denominator must be set. Since we know that
1746 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1747 * by one (more), and the remainder must be shifted right by one.
1749 if (a
.frac
< b
.frac
) {
1751 shift128Left(0, a
.frac
, DECOMPOSED_BINARY_POINT
+ 2, &n1
, &n0
);
1753 shift128Left(0, a
.frac
, DECOMPOSED_BINARY_POINT
+ 1, &n1
, &n0
);
1755 q
= udiv_qrnnd(&r
, n1
, n0
, b
.frac
<< 1);
1758 * Set lsb if there is a remainder, to set inexact.
1759 * As mentioned above, to find the actual value of the remainder we
1760 * would need to shift right, but (1) we are only concerned about
1761 * non-zero-ness, and (2) the remainder will always be even because
1762 * both inputs to the division primitive are even.
1764 a
.frac
= q
| (r
!= 0);
1769 /* handle all the NaN cases */
1770 if (is_nan(a
.cls
) || is_nan(b
.cls
)) {
1771 return pick_nan(a
, b
, s
);
1773 /* 0/0 or Inf/Inf */
1776 (a
.cls
== float_class_inf
|| a
.cls
== float_class_zero
)) {
1777 s
->float_exception_flags
|= float_flag_invalid
;
1778 return parts_default_nan(s
);
1780 /* Inf / x or 0 / x */
1781 if (a
.cls
== float_class_inf
|| a
.cls
== float_class_zero
) {
1786 if (b
.cls
== float_class_zero
) {
1787 s
->float_exception_flags
|= float_flag_divbyzero
;
1788 a
.cls
= float_class_inf
;
1793 if (b
.cls
== float_class_inf
) {
1794 a
.cls
= float_class_zero
;
1798 g_assert_not_reached();
1801 float16
float16_div(float16 a
, float16 b
, float_status
*status
)
1803 FloatParts pa
= float16_unpack_canonical(a
, status
);
1804 FloatParts pb
= float16_unpack_canonical(b
, status
);
1805 FloatParts pr
= div_floats(pa
, pb
, status
);
1807 return float16_round_pack_canonical(pr
, status
);
1810 static float32 QEMU_SOFTFLOAT_ATTR
1811 soft_f32_div(float32 a
, float32 b
, float_status
*status
)
1813 FloatParts pa
= float32_unpack_canonical(a
, status
);
1814 FloatParts pb
= float32_unpack_canonical(b
, status
);
1815 FloatParts pr
= div_floats(pa
, pb
, status
);
1817 return float32_round_pack_canonical(pr
, status
);
1820 static float64 QEMU_SOFTFLOAT_ATTR
1821 soft_f64_div(float64 a
, float64 b
, float_status
*status
)
1823 FloatParts pa
= float64_unpack_canonical(a
, status
);
1824 FloatParts pb
= float64_unpack_canonical(b
, status
);
1825 FloatParts pr
= div_floats(pa
, pb
, status
);
1827 return float64_round_pack_canonical(pr
, status
);
1830 static float hard_f32_div(float a
, float b
)
1835 static double hard_f64_div(double a
, double b
)
1840 static bool f32_div_pre(union_float32 a
, union_float32 b
)
1842 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1843 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
1844 fpclassify(b
.h
) == FP_NORMAL
;
1846 return float32_is_zero_or_normal(a
.s
) && float32_is_normal(b
.s
);
1849 static bool f64_div_pre(union_float64 a
, union_float64 b
)
1851 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1852 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
1853 fpclassify(b
.h
) == FP_NORMAL
;
1855 return float64_is_zero_or_normal(a
.s
) && float64_is_normal(b
.s
);
1858 static bool f32_div_post(union_float32 a
, union_float32 b
)
1860 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1861 return fpclassify(a
.h
) != FP_ZERO
;
1863 return !float32_is_zero(a
.s
);
1866 static bool f64_div_post(union_float64 a
, union_float64 b
)
1868 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1869 return fpclassify(a
.h
) != FP_ZERO
;
1871 return !float64_is_zero(a
.s
);
1874 float32 QEMU_FLATTEN
1875 float32_div(float32 a
, float32 b
, float_status
*s
)
1877 return float32_gen2(a
, b
, s
, hard_f32_div
, soft_f32_div
,
1878 f32_div_pre
, f32_div_post
);
1881 float64 QEMU_FLATTEN
1882 float64_div(float64 a
, float64 b
, float_status
*s
)
1884 return float64_gen2(a
, b
, s
, hard_f64_div
, soft_f64_div
,
1885 f64_div_pre
, f64_div_post
);
1889 * Returns the result of dividing the bfloat16
1890 * value `a' by the corresponding value `b'.
1893 bfloat16
bfloat16_div(bfloat16 a
, bfloat16 b
, float_status
*status
)
1895 FloatParts pa
= bfloat16_unpack_canonical(a
, status
);
1896 FloatParts pb
= bfloat16_unpack_canonical(b
, status
);
1897 FloatParts pr
= div_floats(pa
, pb
, status
);
1899 return bfloat16_round_pack_canonical(pr
, status
);
1903 * Float to Float conversions
1905 * Returns the result of converting one float format to another. The
1906 * conversion is performed according to the IEC/IEEE Standard for
1907 * Binary Floating-Point Arithmetic.
1909 * The float_to_float helper only needs to take care of raising
1910 * invalid exceptions and handling the conversion on NaNs.
1913 static FloatParts
float_to_float(FloatParts a
, const FloatFmt
*dstf
,
1916 if (dstf
->arm_althp
) {
1918 case float_class_qnan
:
1919 case float_class_snan
:
1920 /* There is no NaN in the destination format. Raise Invalid
1921 * and return a zero with the sign of the input NaN.
1923 s
->float_exception_flags
|= float_flag_invalid
;
1924 a
.cls
= float_class_zero
;
1929 case float_class_inf
:
1930 /* There is no Inf in the destination format. Raise Invalid
1931 * and return the maximum normal with the correct sign.
1933 s
->float_exception_flags
|= float_flag_invalid
;
1934 a
.cls
= float_class_normal
;
1935 a
.exp
= dstf
->exp_max
;
1936 a
.frac
= ((1ull << dstf
->frac_size
) - 1) << dstf
->frac_shift
;
1942 } else if (is_nan(a
.cls
)) {
1943 if (is_snan(a
.cls
)) {
1944 s
->float_exception_flags
|= float_flag_invalid
;
1945 a
= parts_silence_nan(a
, s
);
1947 if (s
->default_nan_mode
) {
1948 return parts_default_nan(s
);
1954 float32
float16_to_float32(float16 a
, bool ieee
, float_status
*s
)
1956 const FloatFmt
*fmt16
= ieee ?
&float16_params
: &float16_params_ahp
;
1957 FloatParts p
= float16a_unpack_canonical(a
, s
, fmt16
);
1958 FloatParts pr
= float_to_float(p
, &float32_params
, s
);
1959 return float32_round_pack_canonical(pr
, s
);
1962 float64
float16_to_float64(float16 a
, bool ieee
, float_status
*s
)
1964 const FloatFmt
*fmt16
= ieee ?
&float16_params
: &float16_params_ahp
;
1965 FloatParts p
= float16a_unpack_canonical(a
, s
, fmt16
);
1966 FloatParts pr
= float_to_float(p
, &float64_params
, s
);
1967 return float64_round_pack_canonical(pr
, s
);
1970 float16
float32_to_float16(float32 a
, bool ieee
, float_status
*s
)
1972 const FloatFmt
*fmt16
= ieee ?
&float16_params
: &float16_params_ahp
;
1973 FloatParts p
= float32_unpack_canonical(a
, s
);
1974 FloatParts pr
= float_to_float(p
, fmt16
, s
);
1975 return float16a_round_pack_canonical(pr
, s
, fmt16
);
1978 static float64 QEMU_SOFTFLOAT_ATTR
1979 soft_float32_to_float64(float32 a
, float_status
*s
)
1981 FloatParts p
= float32_unpack_canonical(a
, s
);
1982 FloatParts pr
= float_to_float(p
, &float64_params
, s
);
1983 return float64_round_pack_canonical(pr
, s
);
1986 float64
float32_to_float64(float32 a
, float_status
*s
)
1988 if (likely(float32_is_normal(a
))) {
1989 /* Widening conversion can never produce inexact results. */
1995 } else if (float32_is_zero(a
)) {
1996 return float64_set_sign(float64_zero
, float32_is_neg(a
));
1998 return soft_float32_to_float64(a
, s
);
2002 float16
float64_to_float16(float64 a
, bool ieee
, float_status
*s
)
2004 const FloatFmt
*fmt16
= ieee ?
&float16_params
: &float16_params_ahp
;
2005 FloatParts p
= float64_unpack_canonical(a
, s
);
2006 FloatParts pr
= float_to_float(p
, fmt16
, s
);
2007 return float16a_round_pack_canonical(pr
, s
, fmt16
);
2010 float32
float64_to_float32(float64 a
, float_status
*s
)
2012 FloatParts p
= float64_unpack_canonical(a
, s
);
2013 FloatParts pr
= float_to_float(p
, &float32_params
, s
);
2014 return float32_round_pack_canonical(pr
, s
);
2017 float32
bfloat16_to_float32(bfloat16 a
, float_status
*s
)
2019 FloatParts p
= bfloat16_unpack_canonical(a
, s
);
2020 FloatParts pr
= float_to_float(p
, &float32_params
, s
);
2021 return float32_round_pack_canonical(pr
, s
);
2024 float64
bfloat16_to_float64(bfloat16 a
, float_status
*s
)
2026 FloatParts p
= bfloat16_unpack_canonical(a
, s
);
2027 FloatParts pr
= float_to_float(p
, &float64_params
, s
);
2028 return float64_round_pack_canonical(pr
, s
);
2031 bfloat16
float32_to_bfloat16(float32 a
, float_status
*s
)
2033 FloatParts p
= float32_unpack_canonical(a
, s
);
2034 FloatParts pr
= float_to_float(p
, &bfloat16_params
, s
);
2035 return bfloat16_round_pack_canonical(pr
, s
);
2038 bfloat16
float64_to_bfloat16(float64 a
, float_status
*s
)
2040 FloatParts p
= float64_unpack_canonical(a
, s
);
2041 FloatParts pr
= float_to_float(p
, &bfloat16_params
, s
);
2042 return bfloat16_round_pack_canonical(pr
, s
);
2046 * Rounds the floating-point value `a' to an integer, and returns the
2047 * result as a floating-point value. The operation is performed
2048 * according to the IEC/IEEE Standard for Binary Floating-Point
2052 static FloatParts
round_to_int(FloatParts a
, FloatRoundMode rmode
,
2053 int scale
, float_status
*s
)
2056 case float_class_qnan
:
2057 case float_class_snan
:
2058 return return_nan(a
, s
);
2060 case float_class_zero
:
2061 case float_class_inf
:
2062 /* already "integral" */
2065 case float_class_normal
:
2066 scale
= MIN(MAX(scale
, -0x10000), 0x10000);
2069 if (a
.exp
>= DECOMPOSED_BINARY_POINT
) {
2070 /* already integral */
2075 /* all fractional */
2076 s
->float_exception_flags
|= float_flag_inexact
;
2078 case float_round_nearest_even
:
2079 one
= a
.exp
== -1 && a
.frac
> DECOMPOSED_IMPLICIT_BIT
;
2081 case float_round_ties_away
:
2082 one
= a
.exp
== -1 && a
.frac
>= DECOMPOSED_IMPLICIT_BIT
;
2084 case float_round_to_zero
:
2087 case float_round_up
:
2090 case float_round_down
:
2093 case float_round_to_odd
:
2097 g_assert_not_reached();
2101 a
.frac
= DECOMPOSED_IMPLICIT_BIT
;
2104 a
.cls
= float_class_zero
;
2107 uint64_t frac_lsb
= DECOMPOSED_IMPLICIT_BIT
>> a
.exp
;
2108 uint64_t frac_lsbm1
= frac_lsb
>> 1;
2109 uint64_t rnd_even_mask
= (frac_lsb
- 1) | frac_lsb
;
2110 uint64_t rnd_mask
= rnd_even_mask
>> 1;
2114 case float_round_nearest_even
:
2115 inc
= ((a
.frac
& rnd_even_mask
) != frac_lsbm1 ? frac_lsbm1
: 0);
2117 case float_round_ties_away
:
2120 case float_round_to_zero
:
2123 case float_round_up
:
2124 inc
= a
.sign ?
0 : rnd_mask
;
2126 case float_round_down
:
2127 inc
= a
.sign ? rnd_mask
: 0;
2129 case float_round_to_odd
:
2130 inc
= a
.frac
& frac_lsb ?
0 : rnd_mask
;
2133 g_assert_not_reached();
2136 if (a
.frac
& rnd_mask
) {
2137 s
->float_exception_flags
|= float_flag_inexact
;
2139 a
.frac
&= ~rnd_mask
;
2140 if (a
.frac
& DECOMPOSED_OVERFLOW_BIT
) {
2148 g_assert_not_reached();
2153 float16
float16_round_to_int(float16 a
, float_status
*s
)
2155 FloatParts pa
= float16_unpack_canonical(a
, s
);
2156 FloatParts pr
= round_to_int(pa
, s
->float_rounding_mode
, 0, s
);
2157 return float16_round_pack_canonical(pr
, s
);
2160 float32
float32_round_to_int(float32 a
, float_status
*s
)
2162 FloatParts pa
= float32_unpack_canonical(a
, s
);
2163 FloatParts pr
= round_to_int(pa
, s
->float_rounding_mode
, 0, s
);
2164 return float32_round_pack_canonical(pr
, s
);
2167 float64
float64_round_to_int(float64 a
, float_status
*s
)
2169 FloatParts pa
= float64_unpack_canonical(a
, s
);
2170 FloatParts pr
= round_to_int(pa
, s
->float_rounding_mode
, 0, s
);
2171 return float64_round_pack_canonical(pr
, s
);
2175 * Rounds the bfloat16 value `a' to an integer, and returns the
2176 * result as a bfloat16 value.
2179 bfloat16
bfloat16_round_to_int(bfloat16 a
, float_status
*s
)
2181 FloatParts pa
= bfloat16_unpack_canonical(a
, s
);
2182 FloatParts pr
= round_to_int(pa
, s
->float_rounding_mode
, 0, s
);
2183 return bfloat16_round_pack_canonical(pr
, s
);
2187 * Returns the result of converting the floating-point value `a' to
2188 * the two's complement integer format. The conversion is performed
2189 * according to the IEC/IEEE Standard for Binary Floating-Point
2190 * Arithmetic---which means in particular that the conversion is
2191 * rounded according to the current rounding mode. If `a' is a NaN,
2192 * the largest positive integer is returned. Otherwise, if the
2193 * conversion overflows, the largest integer with the same sign as `a'
2197 static int64_t round_to_int_and_pack(FloatParts in
, FloatRoundMode rmode
,
2198 int scale
, int64_t min
, int64_t max
,
2202 int orig_flags
= get_float_exception_flags(s
);
2203 FloatParts p
= round_to_int(in
, rmode
, scale
, s
);
2206 case float_class_snan
:
2207 case float_class_qnan
:
2208 s
->float_exception_flags
= orig_flags
| float_flag_invalid
;
2210 case float_class_inf
:
2211 s
->float_exception_flags
= orig_flags
| float_flag_invalid
;
2212 return p
.sign ? min
: max
;
2213 case float_class_zero
:
2215 case float_class_normal
:
2216 if (p
.exp
< DECOMPOSED_BINARY_POINT
) {
2217 r
= p
.frac
>> (DECOMPOSED_BINARY_POINT
- p
.exp
);
2218 } else if (p
.exp
- DECOMPOSED_BINARY_POINT
< 2) {
2219 r
= p
.frac
<< (p
.exp
- DECOMPOSED_BINARY_POINT
);
2224 if (r
<= -(uint64_t) min
) {
2227 s
->float_exception_flags
= orig_flags
| float_flag_invalid
;
2234 s
->float_exception_flags
= orig_flags
| float_flag_invalid
;
2239 g_assert_not_reached();
2243 int8_t float16_to_int8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2246 return round_to_int_and_pack(float16_unpack_canonical(a
, s
),
2247 rmode
, scale
, INT8_MIN
, INT8_MAX
, s
);
2250 int16_t float16_to_int16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2253 return round_to_int_and_pack(float16_unpack_canonical(a
, s
),
2254 rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2257 int32_t float16_to_int32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2260 return round_to_int_and_pack(float16_unpack_canonical(a
, s
),
2261 rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2264 int64_t float16_to_int64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2267 return round_to_int_and_pack(float16_unpack_canonical(a
, s
),
2268 rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2271 int16_t float32_to_int16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2274 return round_to_int_and_pack(float32_unpack_canonical(a
, s
),
2275 rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2278 int32_t float32_to_int32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2281 return round_to_int_and_pack(float32_unpack_canonical(a
, s
),
2282 rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2285 int64_t float32_to_int64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2288 return round_to_int_and_pack(float32_unpack_canonical(a
, s
),
2289 rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2292 int16_t float64_to_int16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2295 return round_to_int_and_pack(float64_unpack_canonical(a
, s
),
2296 rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2299 int32_t float64_to_int32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2302 return round_to_int_and_pack(float64_unpack_canonical(a
, s
),
2303 rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2306 int64_t float64_to_int64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2309 return round_to_int_and_pack(float64_unpack_canonical(a
, s
),
2310 rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2313 int8_t float16_to_int8(float16 a
, float_status
*s
)
2315 return float16_to_int8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2318 int16_t float16_to_int16(float16 a
, float_status
*s
)
2320 return float16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2323 int32_t float16_to_int32(float16 a
, float_status
*s
)
2325 return float16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2328 int64_t float16_to_int64(float16 a
, float_status
*s
)
2330 return float16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2333 int16_t float32_to_int16(float32 a
, float_status
*s
)
2335 return float32_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2338 int32_t float32_to_int32(float32 a
, float_status
*s
)
2340 return float32_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2343 int64_t float32_to_int64(float32 a
, float_status
*s
)
2345 return float32_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2348 int16_t float64_to_int16(float64 a
, float_status
*s
)
2350 return float64_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2353 int32_t float64_to_int32(float64 a
, float_status
*s
)
2355 return float64_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2358 int64_t float64_to_int64(float64 a
, float_status
*s
)
2360 return float64_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2363 int16_t float16_to_int16_round_to_zero(float16 a
, float_status
*s
)
2365 return float16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2368 int32_t float16_to_int32_round_to_zero(float16 a
, float_status
*s
)
2370 return float16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2373 int64_t float16_to_int64_round_to_zero(float16 a
, float_status
*s
)
2375 return float16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2378 int16_t float32_to_int16_round_to_zero(float32 a
, float_status
*s
)
2380 return float32_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2383 int32_t float32_to_int32_round_to_zero(float32 a
, float_status
*s
)
2385 return float32_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2388 int64_t float32_to_int64_round_to_zero(float32 a
, float_status
*s
)
2390 return float32_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2393 int16_t float64_to_int16_round_to_zero(float64 a
, float_status
*s
)
2395 return float64_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2398 int32_t float64_to_int32_round_to_zero(float64 a
, float_status
*s
)
2400 return float64_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2403 int64_t float64_to_int64_round_to_zero(float64 a
, float_status
*s
)
2405 return float64_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2409 * Returns the result of converting the floating-point value `a' to
2410 * the two's complement integer format.
2413 int16_t bfloat16_to_int16_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2416 return round_to_int_and_pack(bfloat16_unpack_canonical(a
, s
),
2417 rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2420 int32_t bfloat16_to_int32_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2423 return round_to_int_and_pack(bfloat16_unpack_canonical(a
, s
),
2424 rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2427 int64_t bfloat16_to_int64_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2430 return round_to_int_and_pack(bfloat16_unpack_canonical(a
, s
),
2431 rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2434 int16_t bfloat16_to_int16(bfloat16 a
, float_status
*s
)
2436 return bfloat16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2439 int32_t bfloat16_to_int32(bfloat16 a
, float_status
*s
)
2441 return bfloat16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2444 int64_t bfloat16_to_int64(bfloat16 a
, float_status
*s
)
2446 return bfloat16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2449 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a
, float_status
*s
)
2451 return bfloat16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2454 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a
, float_status
*s
)
2456 return bfloat16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2459 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a
, float_status
*s
)
2461 return bfloat16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2465 * Returns the result of converting the floating-point value `a' to
2466 * the unsigned integer format. The conversion is performed according
2467 * to the IEC/IEEE Standard for Binary Floating-Point
2468 * Arithmetic---which means in particular that the conversion is
2469 * rounded according to the current rounding mode. If `a' is a NaN,
2470 * the largest unsigned integer is returned. Otherwise, if the
2471 * conversion overflows, the largest unsigned integer is returned. If
2472 * the 'a' is negative, the result is rounded and zero is returned;
2473 * values that do not round to zero will raise the inexact exception
2477 static uint64_t round_to_uint_and_pack(FloatParts in
, FloatRoundMode rmode
,
2478 int scale
, uint64_t max
,
2481 int orig_flags
= get_float_exception_flags(s
);
2482 FloatParts p
= round_to_int(in
, rmode
, scale
, s
);
2486 case float_class_snan
:
2487 case float_class_qnan
:
2488 s
->float_exception_flags
= orig_flags
| float_flag_invalid
;
2490 case float_class_inf
:
2491 s
->float_exception_flags
= orig_flags
| float_flag_invalid
;
2492 return p
.sign ?
0 : max
;
2493 case float_class_zero
:
2495 case float_class_normal
:
2497 s
->float_exception_flags
= orig_flags
| float_flag_invalid
;
2501 if (p
.exp
< DECOMPOSED_BINARY_POINT
) {
2502 r
= p
.frac
>> (DECOMPOSED_BINARY_POINT
- p
.exp
);
2503 } else if (p
.exp
- DECOMPOSED_BINARY_POINT
< 2) {
2504 r
= p
.frac
<< (p
.exp
- DECOMPOSED_BINARY_POINT
);
2506 s
->float_exception_flags
= orig_flags
| float_flag_invalid
;
2510 /* For uint64 this will never trip, but if p.exp is too large
2511 * to shift a decomposed fraction we shall have exited via the
2515 s
->float_exception_flags
= orig_flags
| float_flag_invalid
;
2520 g_assert_not_reached();
2524 uint8_t float16_to_uint8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2527 return round_to_uint_and_pack(float16_unpack_canonical(a
, s
),
2528 rmode
, scale
, UINT8_MAX
, s
);
2531 uint16_t float16_to_uint16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2534 return round_to_uint_and_pack(float16_unpack_canonical(a
, s
),
2535 rmode
, scale
, UINT16_MAX
, s
);
2538 uint32_t float16_to_uint32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2541 return round_to_uint_and_pack(float16_unpack_canonical(a
, s
),
2542 rmode
, scale
, UINT32_MAX
, s
);
2545 uint64_t float16_to_uint64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2548 return round_to_uint_and_pack(float16_unpack_canonical(a
, s
),
2549 rmode
, scale
, UINT64_MAX
, s
);
2552 uint16_t float32_to_uint16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2555 return round_to_uint_and_pack(float32_unpack_canonical(a
, s
),
2556 rmode
, scale
, UINT16_MAX
, s
);
2559 uint32_t float32_to_uint32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2562 return round_to_uint_and_pack(float32_unpack_canonical(a
, s
),
2563 rmode
, scale
, UINT32_MAX
, s
);
2566 uint64_t float32_to_uint64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2569 return round_to_uint_and_pack(float32_unpack_canonical(a
, s
),
2570 rmode
, scale
, UINT64_MAX
, s
);
2573 uint16_t float64_to_uint16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2576 return round_to_uint_and_pack(float64_unpack_canonical(a
, s
),
2577 rmode
, scale
, UINT16_MAX
, s
);
2580 uint32_t float64_to_uint32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2583 return round_to_uint_and_pack(float64_unpack_canonical(a
, s
),
2584 rmode
, scale
, UINT32_MAX
, s
);
2587 uint64_t float64_to_uint64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2590 return round_to_uint_and_pack(float64_unpack_canonical(a
, s
),
2591 rmode
, scale
, UINT64_MAX
, s
);
2594 uint8_t float16_to_uint8(float16 a
, float_status
*s
)
2596 return float16_to_uint8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2599 uint16_t float16_to_uint16(float16 a
, float_status
*s
)
2601 return float16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2604 uint32_t float16_to_uint32(float16 a
, float_status
*s
)
2606 return float16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2609 uint64_t float16_to_uint64(float16 a
, float_status
*s
)
2611 return float16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2614 uint16_t float32_to_uint16(float32 a
, float_status
*s
)
2616 return float32_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2619 uint32_t float32_to_uint32(float32 a
, float_status
*s
)
2621 return float32_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2624 uint64_t float32_to_uint64(float32 a
, float_status
*s
)
2626 return float32_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2629 uint16_t float64_to_uint16(float64 a
, float_status
*s
)
2631 return float64_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2634 uint32_t float64_to_uint32(float64 a
, float_status
*s
)
2636 return float64_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2639 uint64_t float64_to_uint64(float64 a
, float_status
*s
)
2641 return float64_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2644 uint16_t float16_to_uint16_round_to_zero(float16 a
, float_status
*s
)
2646 return float16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
2649 uint32_t float16_to_uint32_round_to_zero(float16 a
, float_status
*s
)
2651 return float16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
2654 uint64_t float16_to_uint64_round_to_zero(float16 a
, float_status
*s
)
2656 return float16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
2659 uint16_t float32_to_uint16_round_to_zero(float32 a
, float_status
*s
)
2661 return float32_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
2664 uint32_t float32_to_uint32_round_to_zero(float32 a
, float_status
*s
)
2666 return float32_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
2669 uint64_t float32_to_uint64_round_to_zero(float32 a
, float_status
*s
)
2671 return float32_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
2674 uint16_t float64_to_uint16_round_to_zero(float64 a
, float_status
*s
)
2676 return float64_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
2679 uint32_t float64_to_uint32_round_to_zero(float64 a
, float_status
*s
)
2681 return float64_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
2684 uint64_t float64_to_uint64_round_to_zero(float64 a
, float_status
*s
)
2686 return float64_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
2690 * Returns the result of converting the bfloat16 value `a' to
2691 * the unsigned integer format.
2694 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a
, FloatRoundMode rmode
,
2695 int scale
, float_status
*s
)
2697 return round_to_uint_and_pack(bfloat16_unpack_canonical(a
, s
),
2698 rmode
, scale
, UINT16_MAX
, s
);
2701 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a
, FloatRoundMode rmode
,
2702 int scale
, float_status
*s
)
2704 return round_to_uint_and_pack(bfloat16_unpack_canonical(a
, s
),
2705 rmode
, scale
, UINT32_MAX
, s
);
2708 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a
, FloatRoundMode rmode
,
2709 int scale
, float_status
*s
)
2711 return round_to_uint_and_pack(bfloat16_unpack_canonical(a
, s
),
2712 rmode
, scale
, UINT64_MAX
, s
);
2715 uint16_t bfloat16_to_uint16(bfloat16 a
, float_status
*s
)
2717 return bfloat16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2720 uint32_t bfloat16_to_uint32(bfloat16 a
, float_status
*s
)
2722 return bfloat16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2725 uint64_t bfloat16_to_uint64(bfloat16 a
, float_status
*s
)
2727 return bfloat16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2730 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a
, float_status
*s
)
2732 return bfloat16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
2735 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a
, float_status
*s
)
2737 return bfloat16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
2740 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a
, float_status
*s
)
2742 return bfloat16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
2746 * Integer to float conversions
2748 * Returns the result of converting the two's complement integer `a'
2749 * to the floating-point format. The conversion is performed according
2750 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2753 static FloatParts
int_to_float(int64_t a
, int scale
, float_status
*status
)
2755 FloatParts r
= { .sign
= false };
2758 r
.cls
= float_class_zero
;
2763 r
.cls
= float_class_normal
;
2768 shift
= clz64(f
) - 1;
2769 scale
= MIN(MAX(scale
, -0x10000), 0x10000);
2771 r
.exp
= DECOMPOSED_BINARY_POINT
- shift
+ scale
;
2772 r
.frac
= (shift
< 0 ? DECOMPOSED_IMPLICIT_BIT
: f
<< shift
);
2778 float16
int64_to_float16_scalbn(int64_t a
, int scale
, float_status
*status
)
2780 FloatParts pa
= int_to_float(a
, scale
, status
);
2781 return float16_round_pack_canonical(pa
, status
);
2784 float16
int32_to_float16_scalbn(int32_t a
, int scale
, float_status
*status
)
2786 return int64_to_float16_scalbn(a
, scale
, status
);
2789 float16
int16_to_float16_scalbn(int16_t a
, int scale
, float_status
*status
)
2791 return int64_to_float16_scalbn(a
, scale
, status
);
2794 float16
int64_to_float16(int64_t a
, float_status
*status
)
2796 return int64_to_float16_scalbn(a
, 0, status
);
2799 float16
int32_to_float16(int32_t a
, float_status
*status
)
2801 return int64_to_float16_scalbn(a
, 0, status
);
2804 float16
int16_to_float16(int16_t a
, float_status
*status
)
2806 return int64_to_float16_scalbn(a
, 0, status
);
2809 float16
int8_to_float16(int8_t a
, float_status
*status
)
2811 return int64_to_float16_scalbn(a
, 0, status
);
2814 float32
int64_to_float32_scalbn(int64_t a
, int scale
, float_status
*status
)
2816 FloatParts pa
= int_to_float(a
, scale
, status
);
2817 return float32_round_pack_canonical(pa
, status
);
2820 float32
int32_to_float32_scalbn(int32_t a
, int scale
, float_status
*status
)
2822 return int64_to_float32_scalbn(a
, scale
, status
);
2825 float32
int16_to_float32_scalbn(int16_t a
, int scale
, float_status
*status
)
2827 return int64_to_float32_scalbn(a
, scale
, status
);
2830 float32
int64_to_float32(int64_t a
, float_status
*status
)
2832 return int64_to_float32_scalbn(a
, 0, status
);
2835 float32
int32_to_float32(int32_t a
, float_status
*status
)
2837 return int64_to_float32_scalbn(a
, 0, status
);
2840 float32
int16_to_float32(int16_t a
, float_status
*status
)
2842 return int64_to_float32_scalbn(a
, 0, status
);
2845 float64
int64_to_float64_scalbn(int64_t a
, int scale
, float_status
*status
)
2847 FloatParts pa
= int_to_float(a
, scale
, status
);
2848 return float64_round_pack_canonical(pa
, status
);
2851 float64
int32_to_float64_scalbn(int32_t a
, int scale
, float_status
*status
)
2853 return int64_to_float64_scalbn(a
, scale
, status
);
2856 float64
int16_to_float64_scalbn(int16_t a
, int scale
, float_status
*status
)
2858 return int64_to_float64_scalbn(a
, scale
, status
);
2861 float64
int64_to_float64(int64_t a
, float_status
*status
)
2863 return int64_to_float64_scalbn(a
, 0, status
);
2866 float64
int32_to_float64(int32_t a
, float_status
*status
)
2868 return int64_to_float64_scalbn(a
, 0, status
);
2871 float64
int16_to_float64(int16_t a
, float_status
*status
)
2873 return int64_to_float64_scalbn(a
, 0, status
);
2877 * Returns the result of converting the two's complement integer `a'
2878 * to the bfloat16 format.
2881 bfloat16
int64_to_bfloat16_scalbn(int64_t a
, int scale
, float_status
*status
)
2883 FloatParts pa
= int_to_float(a
, scale
, status
);
2884 return bfloat16_round_pack_canonical(pa
, status
);
2887 bfloat16
int32_to_bfloat16_scalbn(int32_t a
, int scale
, float_status
*status
)
2889 return int64_to_bfloat16_scalbn(a
, scale
, status
);
2892 bfloat16
int16_to_bfloat16_scalbn(int16_t a
, int scale
, float_status
*status
)
2894 return int64_to_bfloat16_scalbn(a
, scale
, status
);
2897 bfloat16
int64_to_bfloat16(int64_t a
, float_status
*status
)
2899 return int64_to_bfloat16_scalbn(a
, 0, status
);
2902 bfloat16
int32_to_bfloat16(int32_t a
, float_status
*status
)
2904 return int64_to_bfloat16_scalbn(a
, 0, status
);
2907 bfloat16
int16_to_bfloat16(int16_t a
, float_status
*status
)
2909 return int64_to_bfloat16_scalbn(a
, 0, status
);
2913 * Unsigned Integer to float conversions
2915 * Returns the result of converting the unsigned integer `a' to the
2916 * floating-point format. The conversion is performed according to the
2917 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2920 static FloatParts
uint_to_float(uint64_t a
, int scale
, float_status
*status
)
2922 FloatParts r
= { .sign
= false };
2925 r
.cls
= float_class_zero
;
2927 scale
= MIN(MAX(scale
, -0x10000), 0x10000);
2928 r
.cls
= float_class_normal
;
2929 if ((int64_t)a
< 0) {
2930 r
.exp
= DECOMPOSED_BINARY_POINT
+ 1 + scale
;
2931 shift64RightJamming(a
, 1, &a
);
2934 int shift
= clz64(a
) - 1;
2935 r
.exp
= DECOMPOSED_BINARY_POINT
- shift
+ scale
;
2936 r
.frac
= a
<< shift
;
2943 float16
uint64_to_float16_scalbn(uint64_t a
, int scale
, float_status
*status
)
2945 FloatParts pa
= uint_to_float(a
, scale
, status
);
2946 return float16_round_pack_canonical(pa
, status
);
2949 float16
uint32_to_float16_scalbn(uint32_t a
, int scale
, float_status
*status
)
2951 return uint64_to_float16_scalbn(a
, scale
, status
);
2954 float16
uint16_to_float16_scalbn(uint16_t a
, int scale
, float_status
*status
)
2956 return uint64_to_float16_scalbn(a
, scale
, status
);
2959 float16
uint64_to_float16(uint64_t a
, float_status
*status
)
2961 return uint64_to_float16_scalbn(a
, 0, status
);
2964 float16
uint32_to_float16(uint32_t a
, float_status
*status
)
2966 return uint64_to_float16_scalbn(a
, 0, status
);
2969 float16
uint16_to_float16(uint16_t a
, float_status
*status
)
2971 return uint64_to_float16_scalbn(a
, 0, status
);
2974 float16
uint8_to_float16(uint8_t a
, float_status
*status
)
2976 return uint64_to_float16_scalbn(a
, 0, status
);
2979 float32
uint64_to_float32_scalbn(uint64_t a
, int scale
, float_status
*status
)
2981 FloatParts pa
= uint_to_float(a
, scale
, status
);
2982 return float32_round_pack_canonical(pa
, status
);
2985 float32
uint32_to_float32_scalbn(uint32_t a
, int scale
, float_status
*status
)
2987 return uint64_to_float32_scalbn(a
, scale
, status
);
2990 float32
uint16_to_float32_scalbn(uint16_t a
, int scale
, float_status
*status
)
2992 return uint64_to_float32_scalbn(a
, scale
, status
);
2995 float32
uint64_to_float32(uint64_t a
, float_status
*status
)
2997 return uint64_to_float32_scalbn(a
, 0, status
);
3000 float32
uint32_to_float32(uint32_t a
, float_status
*status
)
3002 return uint64_to_float32_scalbn(a
, 0, status
);
3005 float32
uint16_to_float32(uint16_t a
, float_status
*status
)
3007 return uint64_to_float32_scalbn(a
, 0, status
);
3010 float64
uint64_to_float64_scalbn(uint64_t a
, int scale
, float_status
*status
)
3012 FloatParts pa
= uint_to_float(a
, scale
, status
);
3013 return float64_round_pack_canonical(pa
, status
);
3016 float64
uint32_to_float64_scalbn(uint32_t a
, int scale
, float_status
*status
)
3018 return uint64_to_float64_scalbn(a
, scale
, status
);
3021 float64
uint16_to_float64_scalbn(uint16_t a
, int scale
, float_status
*status
)
3023 return uint64_to_float64_scalbn(a
, scale
, status
);
3026 float64
uint64_to_float64(uint64_t a
, float_status
*status
)
3028 return uint64_to_float64_scalbn(a
, 0, status
);
3031 float64
uint32_to_float64(uint32_t a
, float_status
*status
)
3033 return uint64_to_float64_scalbn(a
, 0, status
);
3036 float64
uint16_to_float64(uint16_t a
, float_status
*status
)
3038 return uint64_to_float64_scalbn(a
, 0, status
);
3042 * Returns the result of converting the unsigned integer `a' to the
3046 bfloat16
uint64_to_bfloat16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3048 FloatParts pa
= uint_to_float(a
, scale
, status
);
3049 return bfloat16_round_pack_canonical(pa
, status
);
3052 bfloat16
uint32_to_bfloat16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3054 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3057 bfloat16
uint16_to_bfloat16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3059 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3062 bfloat16
uint64_to_bfloat16(uint64_t a
, float_status
*status
)
3064 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3067 bfloat16
uint32_to_bfloat16(uint32_t a
, float_status
*status
)
3069 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3072 bfloat16
uint16_to_bfloat16(uint16_t a
, float_status
*status
)
3074 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3078 /* min() and max() functions. These can't be implemented as
3079 * 'compare and pick one input' because that would mishandle
3080 * NaNs and +0 vs -0.
3082 * minnum() and maxnum() functions. These are similar to the min()
3083 * and max() functions but if one of the arguments is a QNaN and
3084 * the other is numerical then the numerical argument is returned.
3085 * SNaNs will get quietened before being returned.
3086 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3087 * and maxNum() operations. min() and max() are the typical min/max
3088 * semantics provided by many CPUs which predate that specification.
3090 * minnummag() and maxnummag() functions correspond to minNumMag()
3091 * and minNumMag() from the IEEE-754 2008.
3093 static FloatParts
minmax_floats(FloatParts a
, FloatParts b
, bool ismin
,
3094 bool ieee
, bool ismag
, float_status
*s
)
3096 if (unlikely(is_nan(a
.cls
) || is_nan(b
.cls
))) {
3098 /* Takes two floating-point values `a' and `b', one of
3099 * which is a NaN, and returns the appropriate NaN
3100 * result. If either `a' or `b' is a signaling NaN,
3101 * the invalid exception is raised.
3103 if (is_snan(a
.cls
) || is_snan(b
.cls
)) {
3104 return pick_nan(a
, b
, s
);
3105 } else if (is_nan(a
.cls
) && !is_nan(b
.cls
)) {
3107 } else if (is_nan(b
.cls
) && !is_nan(a
.cls
)) {
3111 return pick_nan(a
, b
, s
);
3116 case float_class_normal
:
3119 case float_class_inf
:
3122 case float_class_zero
:
3126 g_assert_not_reached();
3130 case float_class_normal
:
3133 case float_class_inf
:
3136 case float_class_zero
:
3140 g_assert_not_reached();
3144 if (ismag
&& (a_exp
!= b_exp
|| a
.frac
!= b
.frac
)) {
3145 bool a_less
= a_exp
< b_exp
;
3146 if (a_exp
== b_exp
) {
3147 a_less
= a
.frac
< b
.frac
;
3149 return a_less
^ ismin ? b
: a
;
3152 if (a
.sign
== b
.sign
) {
3153 bool a_less
= a_exp
< b_exp
;
3154 if (a_exp
== b_exp
) {
3155 a_less
= a
.frac
< b
.frac
;
3157 return a
.sign
^ a_less
^ ismin ? b
: a
;
3159 return a
.sign
^ ismin ? b
: a
;
3164 #define MINMAX(sz, name, ismin, isiee, ismag) \
3165 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3168 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
3169 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
3170 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
3172 return float ## sz ## _round_pack_canonical(pr, s); \
3175 MINMAX(16, min
, true, false, false)
3176 MINMAX(16, minnum
, true, true, false)
3177 MINMAX(16, minnummag
, true, true, true)
3178 MINMAX(16, max
, false, false, false)
3179 MINMAX(16, maxnum
, false, true, false)
3180 MINMAX(16, maxnummag
, false, true, true)
3182 MINMAX(32, min
, true, false, false)
3183 MINMAX(32, minnum
, true, true, false)
3184 MINMAX(32, minnummag
, true, true, true)
3185 MINMAX(32, max
, false, false, false)
3186 MINMAX(32, maxnum
, false, true, false)
3187 MINMAX(32, maxnummag
, false, true, true)
3189 MINMAX(64, min
, true, false, false)
3190 MINMAX(64, minnum
, true, true, false)
3191 MINMAX(64, minnummag
, true, true, true)
3192 MINMAX(64, max
, false, false, false)
3193 MINMAX(64, maxnum
, false, true, false)
3194 MINMAX(64, maxnummag
, false, true, true)
3198 #define BF16_MINMAX(name, ismin, isiee, ismag) \
3199 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3201 FloatParts pa = bfloat16_unpack_canonical(a, s); \
3202 FloatParts pb = bfloat16_unpack_canonical(b, s); \
3203 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
3205 return bfloat16_round_pack_canonical(pr, s); \
3208 BF16_MINMAX(min
, true, false, false)
3209 BF16_MINMAX(minnum
, true, true, false)
3210 BF16_MINMAX(minnummag
, true, true, true)
3211 BF16_MINMAX(max
, false, false, false)
3212 BF16_MINMAX(maxnum
, false, true, false)
3213 BF16_MINMAX(maxnummag
, false, true, true)
3217 /* Floating point compare */
3218 static FloatRelation
compare_floats(FloatParts a
, FloatParts b
, bool is_quiet
,
3221 if (is_nan(a
.cls
) || is_nan(b
.cls
)) {
3223 a
.cls
== float_class_snan
||
3224 b
.cls
== float_class_snan
) {
3225 s
->float_exception_flags
|= float_flag_invalid
;
3227 return float_relation_unordered
;
3230 if (a
.cls
== float_class_zero
) {
3231 if (b
.cls
== float_class_zero
) {
3232 return float_relation_equal
;
3234 return b
.sign ? float_relation_greater
: float_relation_less
;
3235 } else if (b
.cls
== float_class_zero
) {
3236 return a
.sign ? float_relation_less
: float_relation_greater
;
3239 /* The only really important thing about infinity is its sign. If
3240 * both are infinities the sign marks the smallest of the two.
3242 if (a
.cls
== float_class_inf
) {
3243 if ((b
.cls
== float_class_inf
) && (a
.sign
== b
.sign
)) {
3244 return float_relation_equal
;
3246 return a
.sign ? float_relation_less
: float_relation_greater
;
3247 } else if (b
.cls
== float_class_inf
) {
3248 return b
.sign ? float_relation_greater
: float_relation_less
;
3251 if (a
.sign
!= b
.sign
) {
3252 return a
.sign ? float_relation_less
: float_relation_greater
;
3255 if (a
.exp
== b
.exp
) {
3256 if (a
.frac
== b
.frac
) {
3257 return float_relation_equal
;
3260 return a
.frac
> b
.frac ?
3261 float_relation_less
: float_relation_greater
;
3263 return a
.frac
> b
.frac ?
3264 float_relation_greater
: float_relation_less
;
3268 return a
.exp
> b
.exp ? float_relation_less
: float_relation_greater
;
3270 return a
.exp
> b
.exp ? float_relation_greater
: float_relation_less
;
3275 #define COMPARE(name, attr, sz) \
3277 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
3279 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
3280 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
3281 return compare_floats(pa, pb, is_quiet, s); \
3284 COMPARE(soft_f16_compare
, QEMU_FLATTEN
, 16)
3285 COMPARE(soft_f32_compare
, QEMU_SOFTFLOAT_ATTR
, 32)
3286 COMPARE(soft_f64_compare
, QEMU_SOFTFLOAT_ATTR
, 64)
3290 FloatRelation
float16_compare(float16 a
, float16 b
, float_status
*s
)
3292 return soft_f16_compare(a
, b
, false, s
);
3295 FloatRelation
float16_compare_quiet(float16 a
, float16 b
, float_status
*s
)
3297 return soft_f16_compare(a
, b
, true, s
);
3300 static FloatRelation QEMU_FLATTEN
3301 f32_compare(float32 xa
, float32 xb
, bool is_quiet
, float_status
*s
)
3303 union_float32 ua
, ub
;
3308 if (QEMU_NO_HARDFLOAT
) {
3312 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
3313 if (isgreaterequal(ua
.h
, ub
.h
)) {
3314 if (isgreater(ua
.h
, ub
.h
)) {
3315 return float_relation_greater
;
3317 return float_relation_equal
;
3319 if (likely(isless(ua
.h
, ub
.h
))) {
3320 return float_relation_less
;
3322 /* The only condition remaining is unordered.
3323 * Fall through to set flags.
3326 return soft_f32_compare(ua
.s
, ub
.s
, is_quiet
, s
);
3329 FloatRelation
float32_compare(float32 a
, float32 b
, float_status
*s
)
3331 return f32_compare(a
, b
, false, s
);
3334 FloatRelation
float32_compare_quiet(float32 a
, float32 b
, float_status
*s
)
3336 return f32_compare(a
, b
, true, s
);
3339 static FloatRelation QEMU_FLATTEN
3340 f64_compare(float64 xa
, float64 xb
, bool is_quiet
, float_status
*s
)
3342 union_float64 ua
, ub
;
3347 if (QEMU_NO_HARDFLOAT
) {
3351 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
3352 if (isgreaterequal(ua
.h
, ub
.h
)) {
3353 if (isgreater(ua
.h
, ub
.h
)) {
3354 return float_relation_greater
;
3356 return float_relation_equal
;
3358 if (likely(isless(ua
.h
, ub
.h
))) {
3359 return float_relation_less
;
3361 /* The only condition remaining is unordered.
3362 * Fall through to set flags.
3365 return soft_f64_compare(ua
.s
, ub
.s
, is_quiet
, s
);
3368 FloatRelation
float64_compare(float64 a
, float64 b
, float_status
*s
)
3370 return f64_compare(a
, b
, false, s
);
3373 FloatRelation
float64_compare_quiet(float64 a
, float64 b
, float_status
*s
)
3375 return f64_compare(a
, b
, true, s
);
3378 static FloatRelation QEMU_FLATTEN
3379 soft_bf16_compare(bfloat16 a
, bfloat16 b
, bool is_quiet
, float_status
*s
)