qxl: check release info object
[qemu.git] / fpu / softfloat.c
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43
44 ===============================================================================
45 */
46
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98
99 /*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
136 } \
137 }
138
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184
185 /*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205
206 /*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
217
218 /*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234
235 static inline bool can_use_fpu(const float_status *s)
236 {
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242 }
243
244 /*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
260
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
265
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
273
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287 }
288
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297 }
298
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311 }
312
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324 }
325
326 static inline bool f32_is_inf(union_float32 a)
327 {
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332 }
333
334 static inline bool f64_is_inf(union_float64 a)
335 {
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340 }
341
342 /* Note: @fast_test and @post can be NULL */
343 static inline float32
344 float32_gen2(float32 xa, float32 xb, float_status *s,
345 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
346 f32_check_fn pre, f32_check_fn post,
347 f32_check_fn fast_test, soft_f32_op2_fn fast_op)
348 {
349 union_float32 ua, ub, ur;
350
351 ua.s = xa;
352 ub.s = xb;
353
354 if (unlikely(!can_use_fpu(s))) {
355 goto soft;
356 }
357
358 float32_input_flush2(&ua.s, &ub.s, s);
359 if (unlikely(!pre(ua, ub))) {
360 goto soft;
361 }
362 if (fast_test && fast_test(ua, ub)) {
363 return fast_op(ua.s, ub.s, s);
364 }
365
366 ur.h = hard(ua.h, ub.h);
367 if (unlikely(f32_is_inf(ur))) {
368 s->float_exception_flags |= float_flag_overflow;
369 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
370 if (post == NULL || post(ua, ub)) {
371 goto soft;
372 }
373 }
374 return ur.s;
375
376 soft:
377 return soft(ua.s, ub.s, s);
378 }
379
380 static inline float64
381 float64_gen2(float64 xa, float64 xb, float_status *s,
382 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
383 f64_check_fn pre, f64_check_fn post,
384 f64_check_fn fast_test, soft_f64_op2_fn fast_op)
385 {
386 union_float64 ua, ub, ur;
387
388 ua.s = xa;
389 ub.s = xb;
390
391 if (unlikely(!can_use_fpu(s))) {
392 goto soft;
393 }
394
395 float64_input_flush2(&ua.s, &ub.s, s);
396 if (unlikely(!pre(ua, ub))) {
397 goto soft;
398 }
399 if (fast_test && fast_test(ua, ub)) {
400 return fast_op(ua.s, ub.s, s);
401 }
402
403 ur.h = hard(ua.h, ub.h);
404 if (unlikely(f64_is_inf(ur))) {
405 s->float_exception_flags |= float_flag_overflow;
406 } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
407 if (post == NULL || post(ua, ub)) {
408 goto soft;
409 }
410 }
411 return ur.s;
412
413 soft:
414 return soft(ua.s, ub.s, s);
415 }
416
417 /*----------------------------------------------------------------------------
418 | Returns the fraction bits of the half-precision floating-point value `a'.
419 *----------------------------------------------------------------------------*/
420
421 static inline uint32_t extractFloat16Frac(float16 a)
422 {
423 return float16_val(a) & 0x3ff;
424 }
425
426 /*----------------------------------------------------------------------------
427 | Returns the exponent bits of the half-precision floating-point value `a'.
428 *----------------------------------------------------------------------------*/
429
430 static inline int extractFloat16Exp(float16 a)
431 {
432 return (float16_val(a) >> 10) & 0x1f;
433 }
434
435 /*----------------------------------------------------------------------------
436 | Returns the fraction bits of the single-precision floating-point value `a'.
437 *----------------------------------------------------------------------------*/
438
439 static inline uint32_t extractFloat32Frac(float32 a)
440 {
441 return float32_val(a) & 0x007FFFFF;
442 }
443
444 /*----------------------------------------------------------------------------
445 | Returns the exponent bits of the single-precision floating-point value `a'.
446 *----------------------------------------------------------------------------*/
447
448 static inline int extractFloat32Exp(float32 a)
449 {
450 return (float32_val(a) >> 23) & 0xFF;
451 }
452
453 /*----------------------------------------------------------------------------
454 | Returns the sign bit of the single-precision floating-point value `a'.
455 *----------------------------------------------------------------------------*/
456
457 static inline flag extractFloat32Sign(float32 a)
458 {
459 return float32_val(a) >> 31;
460 }
461
462 /*----------------------------------------------------------------------------
463 | Returns the fraction bits of the double-precision floating-point value `a'.
464 *----------------------------------------------------------------------------*/
465
466 static inline uint64_t extractFloat64Frac(float64 a)
467 {
468 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
469 }
470
471 /*----------------------------------------------------------------------------
472 | Returns the exponent bits of the double-precision floating-point value `a'.
473 *----------------------------------------------------------------------------*/
474
475 static inline int extractFloat64Exp(float64 a)
476 {
477 return (float64_val(a) >> 52) & 0x7FF;
478 }
479
480 /*----------------------------------------------------------------------------
481 | Returns the sign bit of the double-precision floating-point value `a'.
482 *----------------------------------------------------------------------------*/
483
484 static inline flag extractFloat64Sign(float64 a)
485 {
486 return float64_val(a) >> 63;
487 }
488
489 /*
490 * Classify a floating point number. Everything above float_class_qnan
491 * is a NaN so cls >= float_class_qnan is any NaN.
492 */
493
494 typedef enum __attribute__ ((__packed__)) {
495 float_class_unclassified,
496 float_class_zero,
497 float_class_normal,
498 float_class_inf,
499 float_class_qnan, /* all NaNs from here */
500 float_class_snan,
501 } FloatClass;
502
503 /* Simple helpers for checking if, or what kind of, NaN we have */
504 static inline __attribute__((unused)) bool is_nan(FloatClass c)
505 {
506 return unlikely(c >= float_class_qnan);
507 }
508
509 static inline __attribute__((unused)) bool is_snan(FloatClass c)
510 {
511 return c == float_class_snan;
512 }
513
514 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
515 {
516 return c == float_class_qnan;
517 }
518
519 /*
520 * Structure holding all of the decomposed parts of a float. The
521 * exponent is unbiased and the fraction is normalized. All
522 * calculations are done with a 64 bit fraction and then rounded as
523 * appropriate for the final format.
524 *
525 * Thanks to the packed FloatClass a decent compiler should be able to
526 * fit the whole structure into registers and avoid using the stack
527 * for parameter passing.
528 */
529
530 typedef struct {
531 uint64_t frac;
532 int32_t exp;
533 FloatClass cls;
534 bool sign;
535 } FloatParts;
536
537 #define DECOMPOSED_BINARY_POINT (64 - 2)
538 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
539 #define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
540
541 /* Structure holding all of the relevant parameters for a format.
542 * exp_size: the size of the exponent field
543 * exp_bias: the offset applied to the exponent field
544 * exp_max: the maximum normalised exponent
545 * frac_size: the size of the fraction field
546 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
547 * The following are computed based the size of fraction
548 * frac_lsb: least significant bit of fraction
549 * frac_lsbm1: the bit below the least significant bit (for rounding)
550 * round_mask/roundeven_mask: masks used for rounding
551 * The following optional modifiers are available:
552 * arm_althp: handle ARM Alternative Half Precision
553 */
554 typedef struct {
555 int exp_size;
556 int exp_bias;
557 int exp_max;
558 int frac_size;
559 int frac_shift;
560 uint64_t frac_lsb;
561 uint64_t frac_lsbm1;
562 uint64_t round_mask;
563 uint64_t roundeven_mask;
564 bool arm_althp;
565 } FloatFmt;
566
567 /* Expand fields based on the size of exponent and fraction */
568 #define FLOAT_PARAMS(E, F) \
569 .exp_size = E, \
570 .exp_bias = ((1 << E) - 1) >> 1, \
571 .exp_max = (1 << E) - 1, \
572 .frac_size = F, \
573 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
574 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
575 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
576 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
577 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
578
579 static const FloatFmt float16_params = {
580 FLOAT_PARAMS(5, 10)
581 };
582
583 static const FloatFmt float16_params_ahp = {
584 FLOAT_PARAMS(5, 10),
585 .arm_althp = true
586 };
587
588 static const FloatFmt float32_params = {
589 FLOAT_PARAMS(8, 23)
590 };
591
592 static const FloatFmt float64_params = {
593 FLOAT_PARAMS(11, 52)
594 };
595
596 /* Unpack a float to parts, but do not canonicalize. */
597 static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
598 {
599 const int sign_pos = fmt.frac_size + fmt.exp_size;
600
601 return (FloatParts) {
602 .cls = float_class_unclassified,
603 .sign = extract64(raw, sign_pos, 1),
604 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
605 .frac = extract64(raw, 0, fmt.frac_size),
606 };
607 }
608
609 static inline FloatParts float16_unpack_raw(float16 f)
610 {
611 return unpack_raw(float16_params, f);
612 }
613
614 static inline FloatParts float32_unpack_raw(float32 f)
615 {
616 return unpack_raw(float32_params, f);
617 }
618
619 static inline FloatParts float64_unpack_raw(float64 f)
620 {
621 return unpack_raw(float64_params, f);
622 }
623
624 /* Pack a float from parts, but do not canonicalize. */
625 static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
626 {
627 const int sign_pos = fmt.frac_size + fmt.exp_size;
628 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
629 return deposit64(ret, sign_pos, 1, p.sign);
630 }
631
632 static inline float16 float16_pack_raw(FloatParts p)
633 {
634 return make_float16(pack_raw(float16_params, p));
635 }
636
637 static inline float32 float32_pack_raw(FloatParts p)
638 {
639 return make_float32(pack_raw(float32_params, p));
640 }
641
642 static inline float64 float64_pack_raw(FloatParts p)
643 {
644 return make_float64(pack_raw(float64_params, p));
645 }
646
647 /*----------------------------------------------------------------------------
648 | Functions and definitions to determine: (1) whether tininess for underflow
649 | is detected before or after rounding by default, (2) what (if anything)
650 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
651 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
652 | are propagated from function inputs to output. These details are target-
653 | specific.
654 *----------------------------------------------------------------------------*/
655 #include "softfloat-specialize.h"
656
657 /* Canonicalize EXP and FRAC, setting CLS. */
658 static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
659 float_status *status)
660 {
661 if (part.exp == parm->exp_max && !parm->arm_althp) {
662 if (part.frac == 0) {
663 part.cls = float_class_inf;
664 } else {
665 part.frac <<= parm->frac_shift;
666 part.cls = (parts_is_snan_frac(part.frac, status)
667 ? float_class_snan : float_class_qnan);
668 }
669 } else if (part.exp == 0) {
670 if (likely(part.frac == 0)) {
671 part.cls = float_class_zero;
672 } else if (status->flush_inputs_to_zero) {
673 float_raise(float_flag_input_denormal, status);
674 part.cls = float_class_zero;
675 part.frac = 0;
676 } else {
677 int shift = clz64(part.frac) - 1;
678 part.cls = float_class_normal;
679 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
680 part.frac <<= shift;
681 }
682 } else {
683 part.cls = float_class_normal;
684 part.exp -= parm->exp_bias;
685 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
686 }
687 return part;
688 }
689
690 /* Round and uncanonicalize a floating-point number by parts. There
691 * are FRAC_SHIFT bits that may require rounding at the bottom of the
692 * fraction; these bits will be removed. The exponent will be biased
693 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
694 */
695
696 static FloatParts round_canonical(FloatParts p, float_status *s,
697 const FloatFmt *parm)
698 {
699 const uint64_t frac_lsb = parm->frac_lsb;
700 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
701 const uint64_t round_mask = parm->round_mask;
702 const uint64_t roundeven_mask = parm->roundeven_mask;
703 const int exp_max = parm->exp_max;
704 const int frac_shift = parm->frac_shift;
705 uint64_t frac, inc;
706 int exp, flags = 0;
707 bool overflow_norm;
708
709 frac = p.frac;
710 exp = p.exp;
711
712 switch (p.cls) {
713 case float_class_normal:
714 switch (s->float_rounding_mode) {
715 case float_round_nearest_even:
716 overflow_norm = false;
717 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
718 break;
719 case float_round_ties_away:
720 overflow_norm = false;
721 inc = frac_lsbm1;
722 break;
723 case float_round_to_zero:
724 overflow_norm = true;
725 inc = 0;
726 break;
727 case float_round_up:
728 inc = p.sign ? 0 : round_mask;
729 overflow_norm = p.sign;
730 break;
731 case float_round_down:
732 inc = p.sign ? round_mask : 0;
733 overflow_norm = !p.sign;
734 break;
735 case float_round_to_odd:
736 overflow_norm = true;
737 inc = frac & frac_lsb ? 0 : round_mask;
738 break;
739 default:
740 g_assert_not_reached();
741 }
742
743 exp += parm->exp_bias;
744 if (likely(exp > 0)) {
745 if (frac & round_mask) {
746 flags |= float_flag_inexact;
747 frac += inc;
748 if (frac & DECOMPOSED_OVERFLOW_BIT) {
749 frac >>= 1;
750 exp++;
751 }
752 }
753 frac >>= frac_shift;
754
755 if (parm->arm_althp) {
756 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
757 if (unlikely(exp > exp_max)) {
758 /* Overflow. Return the maximum normal. */
759 flags = float_flag_invalid;
760 exp = exp_max;
761 frac = -1;
762 }
763 } else if (unlikely(exp >= exp_max)) {
764 flags |= float_flag_overflow | float_flag_inexact;
765 if (overflow_norm) {
766 exp = exp_max - 1;
767 frac = -1;
768 } else {
769 p.cls = float_class_inf;
770 goto do_inf;
771 }
772 }
773 } else if (s->flush_to_zero) {
774 flags |= float_flag_output_denormal;
775 p.cls = float_class_zero;
776 goto do_zero;
777 } else {
778 bool is_tiny = (s->float_detect_tininess
779 == float_tininess_before_rounding)
780 || (exp < 0)
781 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
782
783 shift64RightJamming(frac, 1 - exp, &frac);
784 if (frac & round_mask) {
785 /* Need to recompute round-to-even. */
786 switch (s->float_rounding_mode) {
787 case float_round_nearest_even:
788 inc = ((frac & roundeven_mask) != frac_lsbm1
789 ? frac_lsbm1 : 0);
790 break;
791 case float_round_to_odd:
792 inc = frac & frac_lsb ? 0 : round_mask;
793 break;
794 }
795 flags |= float_flag_inexact;
796 frac += inc;
797 }
798
799 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
800 frac >>= frac_shift;
801
802 if (is_tiny && (flags & float_flag_inexact)) {
803 flags |= float_flag_underflow;
804 }
805 if (exp == 0 && frac == 0) {
806 p.cls = float_class_zero;
807 }
808 }
809 break;
810
811 case float_class_zero:
812 do_zero:
813 exp = 0;
814 frac = 0;
815 break;
816
817 case float_class_inf:
818 do_inf:
819 assert(!parm->arm_althp);
820 exp = exp_max;
821 frac = 0;
822 break;
823
824 case float_class_qnan:
825 case float_class_snan:
826 assert(!parm->arm_althp);
827 exp = exp_max;
828 frac >>= parm->frac_shift;
829 break;
830
831 default:
832 g_assert_not_reached();
833 }
834
835 float_raise(flags, s);
836 p.exp = exp;
837 p.frac = frac;
838 return p;
839 }
840
841 /* Explicit FloatFmt version */
842 static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
843 const FloatFmt *params)
844 {
845 return sf_canonicalize(float16_unpack_raw(f), params, s);
846 }
847
848 static FloatParts float16_unpack_canonical(float16 f, float_status *s)
849 {
850 return float16a_unpack_canonical(f, s, &float16_params);
851 }
852
853 static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
854 const FloatFmt *params)
855 {
856 return float16_pack_raw(round_canonical(p, s, params));
857 }
858
859 static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
860 {
861 return float16a_round_pack_canonical(p, s, &float16_params);
862 }
863
864 static FloatParts float32_unpack_canonical(float32 f, float_status *s)
865 {
866 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
867 }
868
869 static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
870 {
871 return float32_pack_raw(round_canonical(p, s, &float32_params));
872 }
873
874 static FloatParts float64_unpack_canonical(float64 f, float_status *s)
875 {
876 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
877 }
878
879 static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
880 {
881 return float64_pack_raw(round_canonical(p, s, &float64_params));
882 }
883
884 static FloatParts return_nan(FloatParts a, float_status *s)
885 {
886 switch (a.cls) {
887 case float_class_snan:
888 s->float_exception_flags |= float_flag_invalid;
889 a = parts_silence_nan(a, s);
890 /* fall through */
891 case float_class_qnan:
892 if (s->default_nan_mode) {
893 return parts_default_nan(s);
894 }
895 break;
896
897 default:
898 g_assert_not_reached();
899 }
900 return a;
901 }
902
903 static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
904 {
905 if (is_snan(a.cls) || is_snan(b.cls)) {
906 s->float_exception_flags |= float_flag_invalid;
907 }
908
909 if (s->default_nan_mode) {
910 return parts_default_nan(s);
911 } else {
912 if (pickNaN(a.cls, b.cls,
913 a.frac > b.frac ||
914 (a.frac == b.frac && a.sign < b.sign))) {
915 a = b;
916 }
917 if (is_snan(a.cls)) {
918 return parts_silence_nan(a, s);
919 }
920 }
921 return a;
922 }
923
924 static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
925 bool inf_zero, float_status *s)
926 {
927 int which;
928
929 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
930 s->float_exception_flags |= float_flag_invalid;
931 }
932
933 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
934
935 if (s->default_nan_mode) {
936 /* Note that this check is after pickNaNMulAdd so that function
937 * has an opportunity to set the Invalid flag.
938 */
939 which = 3;
940 }
941
942 switch (which) {
943 case 0:
944 break;
945 case 1:
946 a = b;
947 break;
948 case 2:
949 a = c;
950 break;
951 case 3:
952 return parts_default_nan(s);
953 default:
954 g_assert_not_reached();
955 }
956
957 if (is_snan(a.cls)) {
958 return parts_silence_nan(a, s);
959 }
960 return a;
961 }
962
963 /*
964 * Returns the result of adding or subtracting the values of the
965 * floating-point values `a' and `b'. The operation is performed
966 * according to the IEC/IEEE Standard for Binary Floating-Point
967 * Arithmetic.
968 */
969
970 static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
971 float_status *s)
972 {
973 bool a_sign = a.sign;
974 bool b_sign = b.sign ^ subtract;
975
976 if (a_sign != b_sign) {
977 /* Subtraction */
978
979 if (a.cls == float_class_normal && b.cls == float_class_normal) {
980 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
981 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
982 a.frac = a.frac - b.frac;
983 } else {
984 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
985 a.frac = b.frac - a.frac;
986 a.exp = b.exp;
987 a_sign ^= 1;
988 }
989
990 if (a.frac == 0) {
991 a.cls = float_class_zero;
992 a.sign = s->float_rounding_mode == float_round_down;
993 } else {
994 int shift = clz64(a.frac) - 1;
995 a.frac = a.frac << shift;
996 a.exp = a.exp - shift;
997 a.sign = a_sign;
998 }
999 return a;
1000 }
1001 if (is_nan(a.cls) || is_nan(b.cls)) {
1002 return pick_nan(a, b, s);
1003 }
1004 if (a.cls == float_class_inf) {
1005 if (b.cls == float_class_inf) {
1006 float_raise(float_flag_invalid, s);
1007 return parts_default_nan(s);
1008 }
1009 return a;
1010 }
1011 if (a.cls == float_class_zero && b.cls == float_class_zero) {
1012 a.sign = s->float_rounding_mode == float_round_down;
1013 return a;
1014 }
1015 if (a.cls == float_class_zero || b.cls == float_class_inf) {
1016 b.sign = a_sign ^ 1;
1017 return b;
1018 }
1019 if (b.cls == float_class_zero) {
1020 return a;
1021 }
1022 } else {
1023 /* Addition */
1024 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1025 if (a.exp > b.exp) {
1026 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1027 } else if (a.exp < b.exp) {
1028 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1029 a.exp = b.exp;
1030 }
1031 a.frac += b.frac;
1032 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1033 shift64RightJamming(a.frac, 1, &a.frac);
1034 a.exp += 1;
1035 }
1036 return a;
1037 }
1038 if (is_nan(a.cls) || is_nan(b.cls)) {
1039 return pick_nan(a, b, s);
1040 }
1041 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1042 return a;
1043 }
1044 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1045 b.sign = b_sign;
1046 return b;
1047 }
1048 }
1049 g_assert_not_reached();
1050 }
1051
1052 /*
1053 * Returns the result of adding or subtracting the floating-point
1054 * values `a' and `b'. The operation is performed according to the
1055 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1056 */
1057
1058 float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
1059 {
1060 FloatParts pa = float16_unpack_canonical(a, status);
1061 FloatParts pb = float16_unpack_canonical(b, status);
1062 FloatParts pr = addsub_floats(pa, pb, false, status);
1063
1064 return float16_round_pack_canonical(pr, status);
1065 }
1066
1067 float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1068 {
1069 FloatParts pa = float16_unpack_canonical(a, status);
1070 FloatParts pb = float16_unpack_canonical(b, status);
1071 FloatParts pr = addsub_floats(pa, pb, true, status);
1072
1073 return float16_round_pack_canonical(pr, status);
1074 }
1075
1076 static float32 QEMU_SOFTFLOAT_ATTR
1077 soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
1078 {
1079 FloatParts pa = float32_unpack_canonical(a, status);
1080 FloatParts pb = float32_unpack_canonical(b, status);
1081 FloatParts pr = addsub_floats(pa, pb, subtract, status);
1082
1083 return float32_round_pack_canonical(pr, status);
1084 }
1085
1086 static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1087 {
1088 return soft_f32_addsub(a, b, false, status);
1089 }
1090
1091 static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1092 {
1093 return soft_f32_addsub(a, b, true, status);
1094 }
1095
1096 static float64 QEMU_SOFTFLOAT_ATTR
1097 soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
1098 {
1099 FloatParts pa = float64_unpack_canonical(a, status);
1100 FloatParts pb = float64_unpack_canonical(b, status);
1101 FloatParts pr = addsub_floats(pa, pb, subtract, status);
1102
1103 return float64_round_pack_canonical(pr, status);
1104 }
1105
1106 static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
1107 {
1108 return soft_f64_addsub(a, b, false, status);
1109 }
1110
1111 static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1112 {
1113 return soft_f64_addsub(a, b, true, status);
1114 }
1115
1116 static float hard_f32_add(float a, float b)
1117 {
1118 return a + b;
1119 }
1120
1121 static float hard_f32_sub(float a, float b)
1122 {
1123 return a - b;
1124 }
1125
1126 static double hard_f64_add(double a, double b)
1127 {
1128 return a + b;
1129 }
1130
1131 static double hard_f64_sub(double a, double b)
1132 {
1133 return a - b;
1134 }
1135
1136 static bool f32_addsub_post(union_float32 a, union_float32 b)
1137 {
1138 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1139 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1140 }
1141 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1142 }
1143
1144 static bool f64_addsub_post(union_float64 a, union_float64 b)
1145 {
1146 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1147 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1148 } else {
1149 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1150 }
1151 }
1152
1153 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1154 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1155 {
1156 return float32_gen2(a, b, s, hard, soft,
1157 f32_is_zon2, f32_addsub_post, NULL, NULL);
1158 }
1159
1160 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1161 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1162 {
1163 return float64_gen2(a, b, s, hard, soft,
1164 f64_is_zon2, f64_addsub_post, NULL, NULL);
1165 }
1166
1167 float32 QEMU_FLATTEN
1168 float32_add(float32 a, float32 b, float_status *s)
1169 {
1170 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1171 }
1172
1173 float32 QEMU_FLATTEN
1174 float32_sub(float32 a, float32 b, float_status *s)
1175 {
1176 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1177 }
1178
1179 float64 QEMU_FLATTEN
1180 float64_add(float64 a, float64 b, float_status *s)
1181 {
1182 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1183 }
1184
1185 float64 QEMU_FLATTEN
1186 float64_sub(float64 a, float64 b, float_status *s)
1187 {
1188 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1189 }
1190
1191 /*
1192 * Returns the result of multiplying the floating-point values `a' and
1193 * `b'. The operation is performed according to the IEC/IEEE Standard
1194 * for Binary Floating-Point Arithmetic.
1195 */
1196
1197 static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1198 {
1199 bool sign = a.sign ^ b.sign;
1200
1201 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1202 uint64_t hi, lo;
1203 int exp = a.exp + b.exp;
1204
1205 mul64To128(a.frac, b.frac, &hi, &lo);
1206 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1207 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1208 shift64RightJamming(lo, 1, &lo);
1209 exp += 1;
1210 }
1211
1212 /* Re-use a */
1213 a.exp = exp;
1214 a.sign = sign;
1215 a.frac = lo;
1216 return a;
1217 }
1218 /* handle all the NaN cases */
1219 if (is_nan(a.cls) || is_nan(b.cls)) {
1220 return pick_nan(a, b, s);
1221 }
1222 /* Inf * Zero == NaN */
1223 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1224 (a.cls == float_class_zero && b.cls == float_class_inf)) {
1225 s->float_exception_flags |= float_flag_invalid;
1226 return parts_default_nan(s);
1227 }
1228 /* Multiply by 0 or Inf */
1229 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1230 a.sign = sign;
1231 return a;
1232 }
1233 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1234 b.sign = sign;
1235 return b;
1236 }
1237 g_assert_not_reached();
1238 }
1239
1240 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1241 {
1242 FloatParts pa = float16_unpack_canonical(a, status);
1243 FloatParts pb = float16_unpack_canonical(b, status);
1244 FloatParts pr = mul_floats(pa, pb, status);
1245
1246 return float16_round_pack_canonical(pr, status);
1247 }
1248
1249 static float32 QEMU_SOFTFLOAT_ATTR
1250 soft_f32_mul(float32 a, float32 b, float_status *status)
1251 {
1252 FloatParts pa = float32_unpack_canonical(a, status);
1253 FloatParts pb = float32_unpack_canonical(b, status);
1254 FloatParts pr = mul_floats(pa, pb, status);
1255
1256 return float32_round_pack_canonical(pr, status);
1257 }
1258
1259 static float64 QEMU_SOFTFLOAT_ATTR
1260 soft_f64_mul(float64 a, float64 b, float_status *status)
1261 {
1262 FloatParts pa = float64_unpack_canonical(a, status);
1263 FloatParts pb = float64_unpack_canonical(b, status);
1264 FloatParts pr = mul_floats(pa, pb, status);
1265
1266 return float64_round_pack_canonical(pr, status);
1267 }
1268
1269 static float hard_f32_mul(float a, float b)
1270 {
1271 return a * b;
1272 }
1273
1274 static double hard_f64_mul(double a, double b)
1275 {
1276 return a * b;
1277 }
1278
1279 static bool f32_mul_fast_test(union_float32 a, union_float32 b)
1280 {
1281 return float32_is_zero(a.s) || float32_is_zero(b.s);
1282 }
1283
1284 static bool f64_mul_fast_test(union_float64 a, union_float64 b)
1285 {
1286 return float64_is_zero(a.s) || float64_is_zero(b.s);
1287 }
1288
1289 static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
1290 {
1291 bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
1292
1293 return float32_set_sign(float32_zero, signbit);
1294 }
1295
1296 static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
1297 {
1298 bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
1299
1300 return float64_set_sign(float64_zero, signbit);
1301 }
1302
1303 float32 QEMU_FLATTEN
1304 float32_mul(float32 a, float32 b, float_status *s)
1305 {
1306 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1307 f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
1308 }
1309
1310 float64 QEMU_FLATTEN
1311 float64_mul(float64 a, float64 b, float_status *s)
1312 {
1313 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1314 f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
1315 }
1316
1317 /*
1318 * Returns the result of multiplying the floating-point values `a' and
1319 * `b' then adding 'c', with no intermediate rounding step after the
1320 * multiplication. The operation is performed according to the
1321 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1322 * The flags argument allows the caller to select negation of the
1323 * addend, the intermediate product, or the final result. (The
1324 * difference between this and having the caller do a separate
1325 * negation is that negating externally will flip the sign bit on
1326 * NaNs.)
1327 */
1328
1329 static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1330 int flags, float_status *s)
1331 {
1332 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1333 ((1 << float_class_inf) | (1 << float_class_zero));
1334 bool p_sign;
1335 bool sign_flip = flags & float_muladd_negate_result;
1336 FloatClass p_class;
1337 uint64_t hi, lo;
1338 int p_exp;
1339
1340 /* It is implementation-defined whether the cases of (0,inf,qnan)
1341 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1342 * they return if they do), so we have to hand this information
1343 * off to the target-specific pick-a-NaN routine.
1344 */
1345 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1346 return pick_nan_muladd(a, b, c, inf_zero, s);
1347 }
1348
1349 if (inf_zero) {
1350 s->float_exception_flags |= float_flag_invalid;
1351 return parts_default_nan(s);
1352 }
1353
1354 if (flags & float_muladd_negate_c) {
1355 c.sign ^= 1;
1356 }
1357
1358 p_sign = a.sign ^ b.sign;
1359
1360 if (flags & float_muladd_negate_product) {
1361 p_sign ^= 1;
1362 }
1363
1364 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1365 p_class = float_class_inf;
1366 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1367 p_class = float_class_zero;
1368 } else {
1369 p_class = float_class_normal;
1370 }
1371
1372 if (c.cls == float_class_inf) {
1373 if (p_class == float_class_inf && p_sign != c.sign) {
1374 s->float_exception_flags |= float_flag_invalid;
1375 return parts_default_nan(s);
1376 } else {
1377 a.cls = float_class_inf;
1378 a.sign = c.sign ^ sign_flip;
1379 return a;
1380 }
1381 }
1382
1383 if (p_class == float_class_inf) {
1384 a.cls = float_class_inf;
1385 a.sign = p_sign ^ sign_flip;
1386 return a;
1387 }
1388
1389 if (p_class == float_class_zero) {
1390 if (c.cls == float_class_zero) {
1391 if (p_sign != c.sign) {
1392 p_sign = s->float_rounding_mode == float_round_down;
1393 }
1394 c.sign = p_sign;
1395 } else if (flags & float_muladd_halve_result) {
1396 c.exp -= 1;
1397 }
1398 c.sign ^= sign_flip;
1399 return c;
1400 }
1401
1402 /* a & b should be normals now... */
1403 assert(a.cls == float_class_normal &&
1404 b.cls == float_class_normal);
1405
1406 p_exp = a.exp + b.exp;
1407
1408 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1409 * result.
1410 */
1411 mul64To128(a.frac, b.frac, &hi, &lo);
1412 /* binary point now at bit 124 */
1413
1414 /* check for overflow */
1415 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1416 shift128RightJamming(hi, lo, 1, &hi, &lo);
1417 p_exp += 1;
1418 }
1419
1420 /* + add/sub */
1421 if (c.cls == float_class_zero) {
1422 /* move binary point back to 62 */
1423 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1424 } else {
1425 int exp_diff = p_exp - c.exp;
1426 if (p_sign == c.sign) {
1427 /* Addition */
1428 if (exp_diff <= 0) {
1429 shift128RightJamming(hi, lo,
1430 DECOMPOSED_BINARY_POINT - exp_diff,
1431 &hi, &lo);
1432 lo += c.frac;
1433 p_exp = c.exp;
1434 } else {
1435 uint64_t c_hi, c_lo;
1436 /* shift c to the same binary point as the product (124) */
1437 c_hi = c.frac >> 2;
1438 c_lo = 0;
1439 shift128RightJamming(c_hi, c_lo,
1440 exp_diff,
1441 &c_hi, &c_lo);
1442 add128(hi, lo, c_hi, c_lo, &hi, &lo);
1443 /* move binary point back to 62 */
1444 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1445 }
1446
1447 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1448 shift64RightJamming(lo, 1, &lo);
1449 p_exp += 1;
1450 }
1451
1452 } else {
1453 /* Subtraction */
1454 uint64_t c_hi, c_lo;
1455 /* make C binary point match product at bit 124 */
1456 c_hi = c.frac >> 2;
1457 c_lo = 0;
1458
1459 if (exp_diff <= 0) {
1460 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1461 if (exp_diff == 0
1462 &&
1463 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1464 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1465 } else {
1466 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1467 p_sign ^= 1;
1468 p_exp = c.exp;
1469 }
1470 } else {
1471 shift128RightJamming(c_hi, c_lo,
1472 exp_diff,
1473 &c_hi, &c_lo);
1474 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1475 }
1476
1477 if (hi == 0 && lo == 0) {
1478 a.cls = float_class_zero;
1479 a.sign = s->float_rounding_mode == float_round_down;
1480 a.sign ^= sign_flip;
1481 return a;
1482 } else {
1483 int shift;
1484 if (hi != 0) {
1485 shift = clz64(hi);
1486 } else {
1487 shift = clz64(lo) + 64;
1488 }
1489 /* Normalizing to a binary point of 124 is the
1490 correct adjust for the exponent. However since we're
1491 shifting, we might as well put the binary point back
1492 at 62 where we really want it. Therefore shift as
1493 if we're leaving 1 bit at the top of the word, but
1494 adjust the exponent as if we're leaving 3 bits. */
1495 shift -= 1;
1496 if (shift >= 64) {
1497 lo = lo << (shift - 64);
1498 } else {
1499 hi = (hi << shift) | (lo >> (64 - shift));
1500 lo = hi | ((lo << shift) != 0);
1501 }
1502 p_exp -= shift - 2;
1503 }
1504 }
1505 }
1506
1507 if (flags & float_muladd_halve_result) {
1508 p_exp -= 1;
1509 }
1510
1511 /* finally prepare our result */
1512 a.cls = float_class_normal;
1513 a.sign = p_sign ^ sign_flip;
1514 a.exp = p_exp;
1515 a.frac = lo;
1516
1517 return a;
1518 }
1519
1520 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1521 int flags, float_status *status)
1522 {
1523 FloatParts pa = float16_unpack_canonical(a, status);
1524 FloatParts pb = float16_unpack_canonical(b, status);
1525 FloatParts pc = float16_unpack_canonical(c, status);
1526 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1527
1528 return float16_round_pack_canonical(pr, status);
1529 }
1530
1531 static float32 QEMU_SOFTFLOAT_ATTR
1532 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1533 float_status *status)
1534 {
1535 FloatParts pa = float32_unpack_canonical(a, status);
1536 FloatParts pb = float32_unpack_canonical(b, status);
1537 FloatParts pc = float32_unpack_canonical(c, status);
1538 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1539
1540 return float32_round_pack_canonical(pr, status);
1541 }
1542
1543 static float64 QEMU_SOFTFLOAT_ATTR
1544 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1545 float_status *status)
1546 {
1547 FloatParts pa = float64_unpack_canonical(a, status);
1548 FloatParts pb = float64_unpack_canonical(b, status);
1549 FloatParts pc = float64_unpack_canonical(c, status);
1550 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1551
1552 return float64_round_pack_canonical(pr, status);
1553 }
1554
1555 static bool force_soft_fma;
1556
1557 float32 QEMU_FLATTEN
1558 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1559 {
1560 union_float32 ua, ub, uc, ur;
1561
1562 ua.s = xa;
1563 ub.s = xb;
1564 uc.s = xc;
1565
1566 if (unlikely(!can_use_fpu(s))) {
1567 goto soft;
1568 }
1569 if (unlikely(flags & float_muladd_halve_result)) {
1570 goto soft;
1571 }
1572
1573 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1574 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1575 goto soft;
1576 }
1577
1578 if (unlikely(force_soft_fma)) {
1579 goto soft;
1580 }
1581
1582 /*
1583 * When (a || b) == 0, there's no need to check for under/over flow,
1584 * since we know the addend is (normal || 0) and the product is 0.
1585 */
1586 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1587 union_float32 up;
1588 bool prod_sign;
1589
1590 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1591 prod_sign ^= !!(flags & float_muladd_negate_product);
1592 up.s = float32_set_sign(float32_zero, prod_sign);
1593
1594 if (flags & float_muladd_negate_c) {
1595 uc.h = -uc.h;
1596 }
1597 ur.h = up.h + uc.h;
1598 } else {
1599 union_float32 ua_orig = ua;
1600 union_float32 uc_orig = uc;
1601
1602 if (flags & float_muladd_negate_product) {
1603 ua.h = -ua.h;
1604 }
1605 if (flags & float_muladd_negate_c) {
1606 uc.h = -uc.h;
1607 }
1608
1609 ur.h = fmaf(ua.h, ub.h, uc.h);
1610
1611 if (unlikely(f32_is_inf(ur))) {
1612 s->float_exception_flags |= float_flag_overflow;
1613 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1614 ua = ua_orig;
1615 uc = uc_orig;
1616 goto soft;
1617 }
1618 }
1619 if (flags & float_muladd_negate_result) {
1620 return float32_chs(ur.s);
1621 }
1622 return ur.s;
1623
1624 soft:
1625 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1626 }
1627
1628 float64 QEMU_FLATTEN
1629 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1630 {
1631 union_float64 ua, ub, uc, ur;
1632
1633 ua.s = xa;
1634 ub.s = xb;
1635 uc.s = xc;
1636
1637 if (unlikely(!can_use_fpu(s))) {
1638 goto soft;
1639 }
1640 if (unlikely(flags & float_muladd_halve_result)) {
1641 goto soft;
1642 }
1643
1644 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1645 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1646 goto soft;
1647 }
1648
1649 if (unlikely(force_soft_fma)) {
1650 goto soft;
1651 }
1652
1653 /*
1654 * When (a || b) == 0, there's no need to check for under/over flow,
1655 * since we know the addend is (normal || 0) and the product is 0.
1656 */
1657 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1658 union_float64 up;
1659 bool prod_sign;
1660
1661 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1662 prod_sign ^= !!(flags & float_muladd_negate_product);
1663 up.s = float64_set_sign(float64_zero, prod_sign);
1664
1665 if (flags & float_muladd_negate_c) {
1666 uc.h = -uc.h;
1667 }
1668 ur.h = up.h + uc.h;
1669 } else {
1670 union_float64 ua_orig = ua;
1671 union_float64 uc_orig = uc;
1672
1673 if (flags & float_muladd_negate_product) {
1674 ua.h = -ua.h;
1675 }
1676 if (flags & float_muladd_negate_c) {
1677 uc.h = -uc.h;
1678 }
1679
1680 ur.h = fma(ua.h, ub.h, uc.h);
1681
1682 if (unlikely(f64_is_inf(ur))) {
1683 s->float_exception_flags |= float_flag_overflow;
1684 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1685 ua = ua_orig;
1686 uc = uc_orig;
1687 goto soft;
1688 }
1689 }
1690 if (flags & float_muladd_negate_result) {
1691 return float64_chs(ur.s);
1692 }
1693 return ur.s;
1694
1695 soft:
1696 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1697 }
1698
1699 /*
1700 * Returns the result of dividing the floating-point value `a' by the
1701 * corresponding value `b'. The operation is performed according to
1702 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1703 */
1704
1705 static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1706 {
1707 bool sign = a.sign ^ b.sign;
1708
1709 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1710 uint64_t n0, n1, q, r;
1711 int exp = a.exp - b.exp;
1712
1713 /*
1714 * We want a 2*N / N-bit division to produce exactly an N-bit
1715 * result, so that we do not lose any precision and so that we
1716 * do not have to renormalize afterward. If A.frac < B.frac,
1717 * then division would produce an (N-1)-bit result; shift A left
1718 * by one to produce the an N-bit result, and decrement the
1719 * exponent to match.
1720 *
1721 * The udiv_qrnnd algorithm that we're using requires normalization,
1722 * i.e. the msb of the denominator must be set. Since we know that
1723 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1724 * by one (more), and the remainder must be shifted right by one.
1725 */
1726 if (a.frac < b.frac) {
1727 exp -= 1;
1728 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
1729 } else {
1730 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
1731 }
1732 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1733
1734 /*
1735 * Set lsb if there is a remainder, to set inexact.
1736 * As mentioned above, to find the actual value of the remainder we
1737 * would need to shift right, but (1) we are only concerned about
1738 * non-zero-ness, and (2) the remainder will always be even because
1739 * both inputs to the division primitive are even.
1740 */
1741 a.frac = q | (r != 0);
1742 a.sign = sign;
1743 a.exp = exp;
1744 return a;
1745 }
1746 /* handle all the NaN cases */
1747 if (is_nan(a.cls) || is_nan(b.cls)) {
1748 return pick_nan(a, b, s);
1749 }
1750 /* 0/0 or Inf/Inf */
1751 if (a.cls == b.cls
1752 &&
1753 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1754 s->float_exception_flags |= float_flag_invalid;
1755 return parts_default_nan(s);
1756 }
1757 /* Inf / x or 0 / x */
1758 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1759 a.sign = sign;
1760 return a;
1761 }
1762 /* Div 0 => Inf */
1763 if (b.cls == float_class_zero) {
1764 s->float_exception_flags |= float_flag_divbyzero;
1765 a.cls = float_class_inf;
1766 a.sign = sign;
1767 return a;
1768 }
1769 /* Div by Inf */
1770 if (b.cls == float_class_inf) {
1771 a.cls = float_class_zero;
1772 a.sign = sign;
1773 return a;
1774 }
1775 g_assert_not_reached();
1776 }
1777
1778 float16 float16_div(float16 a, float16 b, float_status *status)
1779 {
1780 FloatParts pa = float16_unpack_canonical(a, status);
1781 FloatParts pb = float16_unpack_canonical(b, status);
1782 FloatParts pr = div_floats(pa, pb, status);
1783
1784 return float16_round_pack_canonical(pr, status);
1785 }
1786
1787 static float32 QEMU_SOFTFLOAT_ATTR
1788 soft_f32_div(float32 a, float32 b, float_status *status)
1789 {
1790 FloatParts pa = float32_unpack_canonical(a, status);
1791 FloatParts pb = float32_unpack_canonical(b, status);
1792 FloatParts pr = div_floats(pa, pb, status);
1793
1794 return float32_round_pack_canonical(pr, status);
1795 }
1796
1797 static float64 QEMU_SOFTFLOAT_ATTR
1798 soft_f64_div(float64 a, float64 b, float_status *status)
1799 {
1800 FloatParts pa = float64_unpack_canonical(a, status);
1801 FloatParts pb = float64_unpack_canonical(b, status);
1802 FloatParts pr = div_floats(pa, pb, status);
1803
1804 return float64_round_pack_canonical(pr, status);
1805 }
1806
1807 static float hard_f32_div(float a, float b)
1808 {
1809 return a / b;
1810 }
1811
1812 static double hard_f64_div(double a, double b)
1813 {
1814 return a / b;
1815 }
1816
1817 static bool f32_div_pre(union_float32 a, union_float32 b)
1818 {
1819 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1820 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1821 fpclassify(b.h) == FP_NORMAL;
1822 }
1823 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1824 }
1825
1826 static bool f64_div_pre(union_float64 a, union_float64 b)
1827 {
1828 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1829 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1830 fpclassify(b.h) == FP_NORMAL;
1831 }
1832 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1833 }
1834
1835 static bool f32_div_post(union_float32 a, union_float32 b)
1836 {
1837 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1838 return fpclassify(a.h) != FP_ZERO;
1839 }
1840 return !float32_is_zero(a.s);
1841 }
1842
1843 static bool f64_div_post(union_float64 a, union_float64 b)
1844 {
1845 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1846 return fpclassify(a.h) != FP_ZERO;
1847 }
1848 return !float64_is_zero(a.s);
1849 }
1850
1851 float32 QEMU_FLATTEN
1852 float32_div(float32 a, float32 b, float_status *s)
1853 {
1854 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1855 f32_div_pre, f32_div_post, NULL, NULL);
1856 }
1857
1858 float64 QEMU_FLATTEN
1859 float64_div(float64 a, float64 b, float_status *s)
1860 {
1861 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1862 f64_div_pre, f64_div_post, NULL, NULL);
1863 }
1864
1865 /*
1866 * Float to Float conversions
1867 *
1868 * Returns the result of converting one float format to another. The
1869 * conversion is performed according to the IEC/IEEE Standard for
1870 * Binary Floating-Point Arithmetic.
1871 *
1872 * The float_to_float helper only needs to take care of raising
1873 * invalid exceptions and handling the conversion on NaNs.
1874 */
1875
1876 static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1877 float_status *s)
1878 {
1879 if (dstf->arm_althp) {
1880 switch (a.cls) {
1881 case float_class_qnan:
1882 case float_class_snan:
1883 /* There is no NaN in the destination format. Raise Invalid
1884 * and return a zero with the sign of the input NaN.
1885 */
1886 s->float_exception_flags |= float_flag_invalid;
1887 a.cls = float_class_zero;
1888 a.frac = 0;
1889 a.exp = 0;
1890 break;
1891
1892 case float_class_inf:
1893 /* There is no Inf in the destination format. Raise Invalid
1894 * and return the maximum normal with the correct sign.
1895 */
1896 s->float_exception_flags |= float_flag_invalid;
1897 a.cls = float_class_normal;
1898 a.exp = dstf->exp_max;
1899 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1900 break;
1901
1902 default:
1903 break;
1904 }
1905 } else if (is_nan(a.cls)) {
1906 if (is_snan(a.cls)) {
1907 s->float_exception_flags |= float_flag_invalid;
1908 a = parts_silence_nan(a, s);
1909 }
1910 if (s->default_nan_mode) {
1911 return parts_default_nan(s);
1912 }
1913 }
1914 return a;
1915 }
1916
1917 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1918 {
1919 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1920 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1921 FloatParts pr = float_to_float(p, &float32_params, s);
1922 return float32_round_pack_canonical(pr, s);
1923 }
1924
1925 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1926 {
1927 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1928 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1929 FloatParts pr = float_to_float(p, &float64_params, s);
1930 return float64_round_pack_canonical(pr, s);
1931 }
1932
1933 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1934 {
1935 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1936 FloatParts p = float32_unpack_canonical(a, s);
1937 FloatParts pr = float_to_float(p, fmt16, s);
1938 return float16a_round_pack_canonical(pr, s, fmt16);
1939 }
1940
1941 float64 float32_to_float64(float32 a, float_status *s)
1942 {
1943 FloatParts p = float32_unpack_canonical(a, s);
1944 FloatParts pr = float_to_float(p, &float64_params, s);
1945 return float64_round_pack_canonical(pr, s);
1946 }
1947
1948 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1949 {
1950 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1951 FloatParts p = float64_unpack_canonical(a, s);
1952 FloatParts pr = float_to_float(p, fmt16, s);
1953 return float16a_round_pack_canonical(pr, s, fmt16);
1954 }
1955
1956 float32 float64_to_float32(float64 a, float_status *s)
1957 {
1958 FloatParts p = float64_unpack_canonical(a, s);
1959 FloatParts pr = float_to_float(p, &float32_params, s);
1960 return float32_round_pack_canonical(pr, s);
1961 }
1962
1963 /*
1964 * Rounds the floating-point value `a' to an integer, and returns the
1965 * result as a floating-point value. The operation is performed
1966 * according to the IEC/IEEE Standard for Binary Floating-Point
1967 * Arithmetic.
1968 */
1969
1970 static FloatParts round_to_int(FloatParts a, int rmode,
1971 int scale, float_status *s)
1972 {
1973 switch (a.cls) {
1974 case float_class_qnan:
1975 case float_class_snan:
1976 return return_nan(a, s);
1977
1978 case float_class_zero:
1979 case float_class_inf:
1980 /* already "integral" */
1981 break;
1982
1983 case float_class_normal:
1984 scale = MIN(MAX(scale, -0x10000), 0x10000);
1985 a.exp += scale;
1986
1987 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1988 /* already integral */
1989 break;
1990 }
1991 if (a.exp < 0) {
1992 bool one;
1993 /* all fractional */
1994 s->float_exception_flags |= float_flag_inexact;
1995 switch (rmode) {
1996 case float_round_nearest_even:
1997 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1998 break;
1999 case float_round_ties_away:
2000 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2001 break;
2002 case float_round_to_zero:
2003 one = false;
2004 break;
2005 case float_round_up:
2006 one = !a.sign;
2007 break;
2008 case float_round_down:
2009 one = a.sign;
2010 break;
2011 case float_round_to_odd:
2012 one = true;
2013 break;
2014 default:
2015 g_assert_not_reached();
2016 }
2017
2018 if (one) {
2019 a.frac = DECOMPOSED_IMPLICIT_BIT;
2020 a.exp = 0;
2021 } else {
2022 a.cls = float_class_zero;
2023 }
2024 } else {
2025 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2026 uint64_t frac_lsbm1 = frac_lsb >> 1;
2027 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2028 uint64_t rnd_mask = rnd_even_mask >> 1;
2029 uint64_t inc;
2030
2031 switch (rmode) {
2032 case float_round_nearest_even:
2033 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2034 break;
2035 case float_round_ties_away:
2036 inc = frac_lsbm1;
2037 break;
2038 case float_round_to_zero:
2039 inc = 0;
2040 break;
2041 case float_round_up:
2042 inc = a.sign ? 0 : rnd_mask;
2043 break;
2044 case float_round_down:
2045 inc = a.sign ? rnd_mask : 0;
2046 break;
2047 case float_round_to_odd:
2048 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2049 break;
2050 default:
2051 g_assert_not_reached();
2052 }
2053
2054 if (a.frac & rnd_mask) {
2055 s->float_exception_flags |= float_flag_inexact;
2056 a.frac += inc;
2057 a.frac &= ~rnd_mask;
2058 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2059 a.frac >>= 1;
2060 a.exp++;
2061 }
2062 }
2063 }
2064 break;
2065 default:
2066 g_assert_not_reached();
2067 }
2068 return a;
2069 }
2070
2071 float16 float16_round_to_int(float16 a, float_status *s)
2072 {
2073 FloatParts pa = float16_unpack_canonical(a, s);
2074 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2075 return float16_round_pack_canonical(pr, s);
2076 }
2077
2078 float32 float32_round_to_int(float32 a, float_status *s)
2079 {
2080 FloatParts pa = float32_unpack_canonical(a, s);
2081 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2082 return float32_round_pack_canonical(pr, s);
2083 }
2084
2085 float64 float64_round_to_int(float64 a, float_status *s)
2086 {
2087 FloatParts pa = float64_unpack_canonical(a, s);
2088 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2089 return float64_round_pack_canonical(pr, s);
2090 }
2091
2092 /*
2093 * Returns the result of converting the floating-point value `a' to
2094 * the two's complement integer format. The conversion is performed
2095 * according to the IEC/IEEE Standard for Binary Floating-Point
2096 * Arithmetic---which means in particular that the conversion is
2097 * rounded according to the current rounding mode. If `a' is a NaN,
2098 * the largest positive integer is returned. Otherwise, if the
2099 * conversion overflows, the largest integer with the same sign as `a'
2100 * is returned.
2101 */
2102
2103 static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
2104 int64_t min, int64_t max,
2105 float_status *s)
2106 {
2107 uint64_t r;
2108 int orig_flags = get_float_exception_flags(s);
2109 FloatParts p = round_to_int(in, rmode, scale, s);
2110
2111 switch (p.cls) {
2112 case float_class_snan:
2113 case float_class_qnan:
2114 s->float_exception_flags = orig_flags | float_flag_invalid;
2115 return max;
2116 case float_class_inf:
2117 s->float_exception_flags = orig_flags | float_flag_invalid;
2118 return p.sign ? min : max;
2119 case float_class_zero:
2120 return 0;
2121 case float_class_normal:
2122 if (p.exp < DECOMPOSED_BINARY_POINT) {
2123 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2124 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2125 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2126 } else {
2127 r = UINT64_MAX;
2128 }
2129 if (p.sign) {
2130 if (r <= -(uint64_t) min) {
2131 return -r;
2132 } else {
2133 s->float_exception_flags = orig_flags | float_flag_invalid;
2134 return min;
2135 }
2136 } else {
2137 if (r <= max) {
2138 return r;
2139 } else {
2140 s->float_exception_flags = orig_flags | float_flag_invalid;
2141 return max;
2142 }
2143 }
2144 default:
2145 g_assert_not_reached();
2146 }
2147 }
2148
2149 int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
2150 float_status *s)
2151 {
2152 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2153 rmode, scale, INT16_MIN, INT16_MAX, s);
2154 }
2155
2156 int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
2157 float_status *s)
2158 {
2159 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2160 rmode, scale, INT32_MIN, INT32_MAX, s);
2161 }
2162
2163 int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
2164 float_status *s)
2165 {
2166 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2167 rmode, scale, INT64_MIN, INT64_MAX, s);
2168 }
2169
2170 int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
2171 float_status *s)
2172 {
2173 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2174 rmode, scale, INT16_MIN, INT16_MAX, s);
2175 }
2176
2177 int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
2178 float_status *s)
2179 {
2180 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2181 rmode, scale, INT32_MIN, INT32_MAX, s);
2182 }
2183
2184 int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
2185 float_status *s)
2186 {
2187 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2188 rmode, scale, INT64_MIN, INT64_MAX, s);
2189 }
2190
2191 int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
2192 float_status *s)
2193 {
2194 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2195 rmode, scale, INT16_MIN, INT16_MAX, s);
2196 }
2197
2198 int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
2199 float_status *s)
2200 {
2201 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2202 rmode, scale, INT32_MIN, INT32_MAX, s);
2203 }
2204
2205 int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
2206 float_status *s)
2207 {
2208 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2209 rmode, scale, INT64_MIN, INT64_MAX, s);
2210 }
2211
2212 int16_t float16_to_int16(float16 a, float_status *s)
2213 {
2214 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2215 }
2216
2217 int32_t float16_to_int32(float16 a, float_status *s)
2218 {
2219 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2220 }
2221
2222 int64_t float16_to_int64(float16 a, float_status *s)
2223 {
2224 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2225 }
2226
2227 int16_t float32_to_int16(float32 a, float_status *s)
2228 {
2229 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2230 }
2231
2232 int32_t float32_to_int32(float32 a, float_status *s)
2233 {
2234 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2235 }
2236
2237 int64_t float32_to_int64(float32 a, float_status *s)
2238 {
2239 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2240 }
2241
2242 int16_t float64_to_int16(float64 a, float_status *s)
2243 {
2244 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2245 }
2246
2247 int32_t float64_to_int32(float64 a, float_status *s)
2248 {
2249 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2250 }
2251
2252 int64_t float64_to_int64(float64 a, float_status *s)
2253 {
2254 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2255 }
2256
2257 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2258 {
2259 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2260 }
2261
2262 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2263 {
2264 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2265 }
2266
2267 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2268 {
2269 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2270 }
2271
2272 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2273 {
2274 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2275 }
2276
2277 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2278 {
2279 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2280 }
2281
2282 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2283 {
2284 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2285 }
2286
2287 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2288 {
2289 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2290 }
2291
2292 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2293 {
2294 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2295 }
2296
2297 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2298 {
2299 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2300 }
2301
2302 /*
2303 * Returns the result of converting the floating-point value `a' to
2304 * the unsigned integer format. The conversion is performed according
2305 * to the IEC/IEEE Standard for Binary Floating-Point
2306 * Arithmetic---which means in particular that the conversion is
2307 * rounded according to the current rounding mode. If `a' is a NaN,
2308 * the largest unsigned integer is returned. Otherwise, if the
2309 * conversion overflows, the largest unsigned integer is returned. If
2310 * the 'a' is negative, the result is rounded and zero is returned;
2311 * values that do not round to zero will raise the inexact exception
2312 * flag.
2313 */
2314
2315 static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
2316 uint64_t max, float_status *s)
2317 {
2318 int orig_flags = get_float_exception_flags(s);
2319 FloatParts p = round_to_int(in, rmode, scale, s);
2320 uint64_t r;
2321
2322 switch (p.cls) {
2323 case float_class_snan:
2324 case float_class_qnan:
2325 s->float_exception_flags = orig_flags | float_flag_invalid;
2326 return max;
2327 case float_class_inf:
2328 s->float_exception_flags = orig_flags | float_flag_invalid;
2329 return p.sign ? 0 : max;
2330 case float_class_zero:
2331 return 0;
2332 case float_class_normal:
2333 if (p.sign) {
2334 s->float_exception_flags = orig_flags | float_flag_invalid;
2335 return 0;
2336 }
2337
2338 if (p.exp < DECOMPOSED_BINARY_POINT) {
2339 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2340 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2341 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2342 } else {
2343 s->float_exception_flags = orig_flags | float_flag_invalid;
2344 return max;
2345 }
2346
2347 /* For uint64 this will never trip, but if p.exp is too large
2348 * to shift a decomposed fraction we shall have exited via the
2349 * 3rd leg above.
2350 */
2351 if (r > max) {
2352 s->float_exception_flags = orig_flags | float_flag_invalid;
2353 return max;
2354 }
2355 return r;
2356 default:
2357 g_assert_not_reached();
2358 }
2359 }
2360
2361 uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
2362 float_status *s)
2363 {
2364 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2365 rmode, scale, UINT16_MAX, s);
2366 }
2367
2368 uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
2369 float_status *s)
2370 {
2371 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2372 rmode, scale, UINT32_MAX, s);
2373 }
2374
2375 uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
2376 float_status *s)
2377 {
2378 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2379 rmode, scale, UINT64_MAX, s);
2380 }
2381
2382 uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
2383 float_status *s)
2384 {
2385 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2386 rmode, scale, UINT16_MAX, s);
2387 }
2388
2389 uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
2390 float_status *s)
2391 {
2392 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2393 rmode, scale, UINT32_MAX, s);
2394 }
2395
2396 uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
2397 float_status *s)
2398 {
2399 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2400 rmode, scale, UINT64_MAX, s);
2401 }
2402
2403 uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
2404 float_status *s)
2405 {
2406 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2407 rmode, scale, UINT16_MAX, s);
2408 }
2409
2410 uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
2411 float_status *s)
2412 {
2413 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2414 rmode, scale, UINT32_MAX, s);
2415 }
2416
2417 uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
2418 float_status *s)
2419 {
2420 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2421 rmode, scale, UINT64_MAX, s);
2422 }
2423
2424 uint16_t float16_to_uint16(float16 a, float_status *s)
2425 {
2426 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2427 }
2428
2429 uint32_t float16_to_uint32(float16 a, float_status *s)
2430 {
2431 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2432 }
2433
2434 uint64_t float16_to_uint64(float16 a, float_status *s)
2435 {
2436 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2437 }
2438
2439 uint16_t float32_to_uint16(float32 a, float_status *s)
2440 {
2441 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2442 }
2443
2444 uint32_t float32_to_uint32(float32 a, float_status *s)
2445 {
2446 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2447 }
2448
2449 uint64_t float32_to_uint64(float32 a, float_status *s)
2450 {
2451 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2452 }
2453
2454 uint16_t float64_to_uint16(float64 a, float_status *s)
2455 {
2456 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2457 }
2458
2459 uint32_t float64_to_uint32(float64 a, float_status *s)
2460 {
2461 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2462 }
2463
2464 uint64_t float64_to_uint64(float64 a, float_status *s)
2465 {
2466 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2467 }
2468
2469 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2470 {
2471 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2472 }
2473
2474 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2475 {
2476 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2477 }
2478
2479 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2480 {
2481 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2482 }
2483
2484 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2485 {
2486 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2487 }
2488
2489 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2490 {
2491 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2492 }
2493
2494 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2495 {
2496 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2497 }
2498
2499 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2500 {
2501 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2502 }
2503
2504 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2505 {
2506 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2507 }
2508
2509 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2510 {
2511 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2512 }
2513
2514 /*
2515 * Integer to float conversions
2516 *
2517 * Returns the result of converting the two's complement integer `a'
2518 * to the floating-point format. The conversion is performed according
2519 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2520 */
2521
2522 static FloatParts int_to_float(int64_t a, int scale, float_status *status)
2523 {
2524 FloatParts r = { .sign = false };
2525
2526 if (a == 0) {
2527 r.cls = float_class_zero;
2528 } else {
2529 uint64_t f = a;
2530 int shift;
2531
2532 r.cls = float_class_normal;
2533 if (a < 0) {
2534 f = -f;
2535 r.sign = true;
2536 }
2537 shift = clz64(f) - 1;
2538 scale = MIN(MAX(scale, -0x10000), 0x10000);
2539
2540 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2541 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
2542 }
2543
2544 return r;
2545 }
2546
2547 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2548 {
2549 FloatParts pa = int_to_float(a, scale, status);
2550 return float16_round_pack_canonical(pa, status);
2551 }
2552
2553 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2554 {
2555 return int64_to_float16_scalbn(a, scale, status);
2556 }
2557
2558 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2559 {
2560 return int64_to_float16_scalbn(a, scale, status);
2561 }
2562
2563 float16 int64_to_float16(int64_t a, float_status *status)
2564 {
2565 return int64_to_float16_scalbn(a, 0, status);
2566 }
2567
2568 float16 int32_to_float16(int32_t a, float_status *status)
2569 {
2570 return int64_to_float16_scalbn(a, 0, status);
2571 }
2572
2573 float16 int16_to_float16(int16_t a, float_status *status)
2574 {
2575 return int64_to_float16_scalbn(a, 0, status);
2576 }
2577
2578 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
2579 {
2580 FloatParts pa = int_to_float(a, scale, status);
2581 return float32_round_pack_canonical(pa, status);
2582 }
2583
2584 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2585 {
2586 return int64_to_float32_scalbn(a, scale, status);
2587 }
2588
2589 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2590 {
2591 return int64_to_float32_scalbn(a, scale, status);
2592 }
2593
2594 float32 int64_to_float32(int64_t a, float_status *status)
2595 {
2596 return int64_to_float32_scalbn(a, 0, status);
2597 }
2598
2599 float32 int32_to_float32(int32_t a, float_status *status)
2600 {
2601 return int64_to_float32_scalbn(a, 0, status);
2602 }
2603
2604 float32 int16_to_float32(int16_t a, float_status *status)
2605 {
2606 return int64_to_float32_scalbn(a, 0, status);
2607 }
2608
2609 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
2610 {
2611 FloatParts pa = int_to_float(a, scale, status);
2612 return float64_round_pack_canonical(pa, status);
2613 }
2614
2615 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2616 {
2617 return int64_to_float64_scalbn(a, scale, status);
2618 }
2619
2620 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2621 {
2622 return int64_to_float64_scalbn(a, scale, status);
2623 }
2624
2625 float64 int64_to_float64(int64_t a, float_status *status)
2626 {
2627 return int64_to_float64_scalbn(a, 0, status);
2628 }
2629
2630 float64 int32_to_float64(int32_t a, float_status *status)
2631 {
2632 return int64_to_float64_scalbn(a, 0, status);
2633 }
2634
2635 float64 int16_to_float64(int16_t a, float_status *status)
2636 {
2637 return int64_to_float64_scalbn(a, 0, status);
2638 }
2639
2640
2641 /*
2642 * Unsigned Integer to float conversions
2643 *
2644 * Returns the result of converting the unsigned integer `a' to the
2645 * floating-point format. The conversion is performed according to the
2646 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2647 */
2648
2649 static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
2650 {
2651 FloatParts r = { .sign = false };
2652
2653 if (a == 0) {
2654 r.cls = float_class_zero;
2655 } else {
2656 scale = MIN(MAX(scale, -0x10000), 0x10000);
2657 r.cls = float_class_normal;
2658 if ((int64_t)a < 0) {
2659 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2660 shift64RightJamming(a, 1, &a);
2661 r.frac = a;
2662 } else {
2663 int shift = clz64(a) - 1;
2664 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2665 r.frac = a << shift;
2666 }
2667 }
2668
2669 return r;
2670 }
2671
2672 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
2673 {
2674 FloatParts pa = uint_to_float(a, scale, status);
2675 return float16_round_pack_canonical(pa, status);
2676 }
2677
2678 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2679 {
2680 return uint64_to_float16_scalbn(a, scale, status);
2681 }
2682
2683 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2684 {
2685 return uint64_to_float16_scalbn(a, scale, status);
2686 }
2687
2688 float16 uint64_to_float16(uint64_t a, float_status *status)
2689 {
2690 return uint64_to_float16_scalbn(a, 0, status);
2691 }
2692
2693 float16 uint32_to_float16(uint32_t a, float_status *status)
2694 {
2695 return uint64_to_float16_scalbn(a, 0, status);
2696 }
2697
2698 float16 uint16_to_float16(uint16_t a, float_status *status)
2699 {
2700 return uint64_to_float16_scalbn(a, 0, status);
2701 }
2702
2703 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
2704 {
2705 FloatParts pa = uint_to_float(a, scale, status);
2706 return float32_round_pack_canonical(pa, status);
2707 }
2708
2709 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2710 {
2711 return uint64_to_float32_scalbn(a, scale, status);
2712 }
2713
2714 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2715 {
2716 return uint64_to_float32_scalbn(a, scale, status);
2717 }
2718
2719 float32 uint64_to_float32(uint64_t a, float_status *status)
2720 {
2721 return uint64_to_float32_scalbn(a, 0, status);
2722 }
2723
2724 float32 uint32_to_float32(uint32_t a, float_status *status)
2725 {
2726 return uint64_to_float32_scalbn(a, 0, status);
2727 }
2728
2729 float32 uint16_to_float32(uint16_t a, float_status *status)
2730 {
2731 return uint64_to_float32_scalbn(a, 0, status);
2732 }
2733
2734 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
2735 {
2736 FloatParts pa = uint_to_float(a, scale, status);
2737 return float64_round_pack_canonical(pa, status);
2738 }
2739
2740 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2741 {
2742 return uint64_to_float64_scalbn(a, scale, status);
2743 }
2744
2745 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2746 {
2747 return uint64_to_float64_scalbn(a, scale, status);
2748 }
2749
2750 float64 uint64_to_float64(uint64_t a, float_status *status)
2751 {
2752 return uint64_to_float64_scalbn(a, 0, status);
2753 }
2754
2755 float64 uint32_to_float64(uint32_t a, float_status *status)
2756 {
2757 return uint64_to_float64_scalbn(a, 0, status);
2758 }
2759
2760 float64 uint16_to_float64(uint16_t a, float_status *status)
2761 {
2762 return uint64_to_float64_scalbn(a, 0, status);
2763 }
2764
2765 /* Float Min/Max */
2766 /* min() and max() functions. These can't be implemented as
2767 * 'compare and pick one input' because that would mishandle
2768 * NaNs and +0 vs -0.
2769 *
2770 * minnum() and maxnum() functions. These are similar to the min()
2771 * and max() functions but if one of the arguments is a QNaN and
2772 * the other is numerical then the numerical argument is returned.
2773 * SNaNs will get quietened before being returned.
2774 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2775 * and maxNum() operations. min() and max() are the typical min/max
2776 * semantics provided by many CPUs which predate that specification.
2777 *
2778 * minnummag() and maxnummag() functions correspond to minNumMag()
2779 * and minNumMag() from the IEEE-754 2008.
2780 */
2781 static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2782 bool ieee, bool ismag, float_status *s)
2783 {
2784 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2785 if (ieee) {
2786 /* Takes two floating-point values `a' and `b', one of
2787 * which is a NaN, and returns the appropriate NaN
2788 * result. If either `a' or `b' is a signaling NaN,
2789 * the invalid exception is raised.
2790 */
2791 if (is_snan(a.cls) || is_snan(b.cls)) {
2792 return pick_nan(a, b, s);
2793 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2794 return b;
2795 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2796 return a;
2797 }
2798 }
2799 return pick_nan(a, b, s);
2800 } else {
2801 int a_exp, b_exp;
2802
2803 switch (a.cls) {
2804 case float_class_normal:
2805 a_exp = a.exp;
2806 break;
2807 case float_class_inf:
2808 a_exp = INT_MAX;
2809 break;
2810 case float_class_zero:
2811 a_exp = INT_MIN;
2812 break;
2813 default:
2814 g_assert_not_reached();
2815 break;
2816 }
2817 switch (b.cls) {
2818 case float_class_normal:
2819 b_exp = b.exp;
2820 break;
2821 case float_class_inf:
2822 b_exp = INT_MAX;
2823 break;
2824 case float_class_zero:
2825 b_exp = INT_MIN;
2826 break;
2827 default:
2828 g_assert_not_reached();
2829 break;
2830 }
2831
2832 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2833 bool a_less = a_exp < b_exp;
2834 if (a_exp == b_exp) {
2835 a_less = a.frac < b.frac;
2836 }
2837 return a_less ^ ismin ? b : a;
2838 }
2839
2840 if (a.sign == b.sign) {
2841 bool a_less = a_exp < b_exp;
2842 if (a_exp == b_exp) {
2843 a_less = a.frac < b.frac;
2844 }
2845 return a.sign ^ a_less ^ ismin ? b : a;
2846 } else {
2847 return a.sign ^ ismin ? b : a;
2848 }
2849 }
2850 }
2851
2852 #define MINMAX(sz, name, ismin, isiee, ismag) \
2853 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2854 float_status *s) \
2855 { \
2856 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2857 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2858 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2859 \
2860 return float ## sz ## _round_pack_canonical(pr, s); \
2861 }
2862
2863 MINMAX(16, min, true, false, false)
2864 MINMAX(16, minnum, true, true, false)
2865 MINMAX(16, minnummag, true, true, true)
2866 MINMAX(16, max, false, false, false)
2867 MINMAX(16, maxnum, false, true, false)
2868 MINMAX(16, maxnummag, false, true, true)
2869
2870 MINMAX(32, min, true, false, false)
2871 MINMAX(32, minnum, true, true, false)
2872 MINMAX(32, minnummag, true, true, true)
2873 MINMAX(32, max, false, false, false)
2874 MINMAX(32, maxnum, false, true, false)
2875 MINMAX(32, maxnummag, false, true, true)
2876
2877 MINMAX(64, min, true, false, false)
2878 MINMAX(64, minnum, true, true, false)
2879 MINMAX(64, minnummag, true, true, true)
2880 MINMAX(64, max, false, false, false)
2881 MINMAX(64, maxnum, false, true, false)
2882 MINMAX(64, maxnummag, false, true, true)
2883
2884 #undef MINMAX
2885
2886 /* Floating point compare */
2887 static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2888 float_status *s)
2889 {
2890 if (is_nan(a.cls) || is_nan(b.cls)) {
2891 if (!is_quiet ||
2892 a.cls == float_class_snan ||
2893 b.cls == float_class_snan) {
2894 s->float_exception_flags |= float_flag_invalid;
2895 }
2896 return float_relation_unordered;
2897 }
2898
2899 if (a.cls == float_class_zero) {
2900 if (b.cls == float_class_zero) {
2901 return float_relation_equal;
2902 }
2903 return b.sign ? float_relation_greater : float_relation_less;
2904 } else if (b.cls == float_class_zero) {
2905 return a.sign ? float_relation_less : float_relation_greater;
2906 }
2907
2908 /* The only really important thing about infinity is its sign. If
2909 * both are infinities the sign marks the smallest of the two.
2910 */
2911 if (a.cls == float_class_inf) {
2912 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2913 return float_relation_equal;
2914 }
2915 return a.sign ? float_relation_less : float_relation_greater;
2916 } else if (b.cls == float_class_inf) {
2917 return b.sign ? float_relation_greater : float_relation_less;
2918 }
2919
2920 if (a.sign != b.sign) {
2921 return a.sign ? float_relation_less : float_relation_greater;
2922 }
2923
2924 if (a.exp == b.exp) {
2925 if (a.frac == b.frac) {
2926 return float_relation_equal;
2927 }
2928 if (a.sign) {
2929 return a.frac > b.frac ?
2930 float_relation_less : float_relation_greater;
2931 } else {
2932 return a.frac > b.frac ?
2933 float_relation_greater : float_relation_less;
2934 }
2935 } else {
2936 if (a.sign) {
2937 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2938 } else {
2939 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2940 }
2941 }
2942 }
2943
2944 #define COMPARE(name, attr, sz) \
2945 static int attr \
2946 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
2947 { \
2948 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2949 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2950 return compare_floats(pa, pb, is_quiet, s); \
2951 }
2952
2953 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
2954 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
2955 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
2956
2957 #undef COMPARE
2958
2959 int float16_compare(float16 a, float16 b, float_status *s)
2960 {
2961 return soft_f16_compare(a, b, false, s);
2962 }
2963
2964 int float16_compare_quiet(float16 a, float16 b, float_status *s)
2965 {
2966 return soft_f16_compare(a, b, true, s);
2967 }
2968
2969 static int QEMU_FLATTEN
2970 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
2971 {
2972 union_float32 ua, ub;
2973
2974 ua.s = xa;
2975 ub.s = xb;
2976
2977 if (QEMU_NO_HARDFLOAT) {
2978 goto soft;
2979 }
2980
2981 float32_input_flush2(&ua.s, &ub.s, s);
2982 if (isgreaterequal(ua.h, ub.h)) {
2983 if (isgreater(ua.h, ub.h)) {
2984 return float_relation_greater;
2985 }
2986 return float_relation_equal;
2987 }
2988 if (likely(isless(ua.h, ub.h))) {
2989 return float_relation_less;
2990 }
2991 /* The only condition remaining is unordered.
2992 * Fall through to set flags.
2993 */
2994 soft:
2995 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
2996 }
2997
2998 int float32_compare(float32 a, float32 b, float_status *s)
2999 {
3000 return f32_compare(a, b, false, s);
3001 }
3002
3003 int float32_compare_quiet(float32 a, float32 b, float_status *s)
3004 {
3005 return f32_compare(a, b, true, s);
3006 }
3007
3008 static int QEMU_FLATTEN
3009 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3010 {
3011 union_float64 ua, ub;
3012
3013 ua.s = xa;
3014 ub.s = xb;
3015
3016 if (QEMU_NO_HARDFLOAT) {
3017 goto soft;
3018 }
3019
3020 float64_input_flush2(&ua.s, &ub.s, s);
3021 if (isgreaterequal(ua.h, ub.h)) {
3022 if (isgreater(ua.h, ub.h)) {
3023 return float_relation_greater;
3024 }
3025 return float_relation_equal;
3026 }
3027 if (likely(isless(ua.h, ub.h))) {
3028 return float_relation_less;
3029 }
3030 /* The only condition remaining is unordered.
3031 * Fall through to set flags.
3032 */
3033 soft:
3034 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3035 }
3036
3037 int float64_compare(float64 a, float64 b, float_status *s)
3038 {
3039 return f64_compare(a, b, false, s);
3040 }
3041
3042 int float64_compare_quiet(float64 a, float64 b, float_status *s)
3043 {
3044 return f64_compare(a, b, true, s);
3045 }
3046
3047 /* Multiply A by 2 raised to the power N. */
3048 static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3049 {
3050 if (unlikely(is_nan(a.cls))) {
3051 return return_nan(a, s);
3052 }
3053 if (a.cls == float_class_normal) {
3054 /* The largest float type (even though not supported by FloatParts)
3055 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3056 * still allows rounding to infinity, without allowing overflow
3057 * within the int32_t that backs FloatParts.exp.
3058 */
3059 n = MIN(MAX(n, -0x10000), 0x10000);
3060 a.exp += n;
3061 }
3062 return a;
3063 }
3064
3065 float16 float16_scalbn(float16 a, int n, float_status *status)
3066 {
3067 FloatParts pa = float16_unpack_canonical(a, status);
3068 FloatParts pr = scalbn_decomposed(pa, n, status);
3069 return float16_round_pack_canonical(pr, status);
3070 }
3071
3072 float32 float32_scalbn(float32 a, int n, float_status *status)
3073 {
3074 FloatParts pa = float32_unpack_canonical(a, status);
3075 FloatParts pr = scalbn_decomposed(pa, n, status);
3076 return float32_round_pack_canonical(pr, status);
3077 }
3078
3079 float64 float64_scalbn(float64 a, int n, float_status *status)
3080 {
3081 FloatParts pa = float64_unpack_canonical(a, status);
3082 FloatParts pr = scalbn_decomposed(pa, n, status);
3083 return float64_round_pack_canonical(pr, status);
3084 }
3085
3086 /*
3087 * Square Root
3088 *
3089 * The old softfloat code did an approximation step before zeroing in
3090 * on the final result. However for simpleness we just compute the
3091 * square root by iterating down from the implicit bit to enough extra
3092 * bits to ensure we get a correctly rounded result.
3093 *
3094 * This does mean however the calculation is slower than before,
3095 * especially for 64 bit floats.
3096 */
3097
3098 static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3099 {
3100 uint64_t a_frac, r_frac, s_frac;
3101 int bit, last_bit;
3102
3103 if (is_nan(a.cls)) {
3104 return return_nan(a, s);
3105 }
3106 if (a.cls == float_class_zero) {
3107 return a; /* sqrt(+-0) = +-0 */
3108 }
3109 if (a.sign) {
3110 s->float_exception_flags |= float_flag_invalid;
3111 return parts_default_nan(s);
3112 }
3113 if (a.cls == float_class_inf) {
3114 return a; /* sqrt(+inf) = +inf */
3115 }
3116
3117 assert(a.cls == float_class_normal);
3118
3119 /* We need two overflow bits at the top. Adding room for that is a
3120 * right shift. If the exponent is odd, we can discard the low bit
3121 * by multiplying the fraction by 2; that's a left shift. Combine
3122 * those and we shift right if the exponent is even.
3123 */
3124 a_frac = a.frac;
3125 if (!(a.exp & 1)) {
3126 a_frac >>= 1;
3127 }
3128 a.exp >>= 1;
3129
3130 /* Bit-by-bit computation of sqrt. */
3131 r_frac = 0;
3132 s_frac = 0;
3133
3134 /* Iterate from implicit bit down to the 3 extra bits to compute a
3135 * properly rounded result. Remember we've inserted one more bit
3136 * at the top, so these positions are one less.
3137 */
3138 bit = DECOMPOSED_BINARY_POINT - 1;
3139 last_bit = MAX(p->frac_shift - 4, 0);
3140 do {
3141 uint64_t q = 1ULL << bit;
3142 uint64_t t_frac = s_frac + q;
3143 if (t_frac <= a_frac) {
3144 s_frac = t_frac + q;
3145 a_frac -= t_frac;
3146 r_frac += q;
3147 }
3148 a_frac <<= 1;
3149 } while (--bit >= last_bit);
3150
3151 /* Undo the right shift done above. If there is any remaining
3152 * fraction, the result is inexact. Set the sticky bit.
3153 */
3154 a.frac = (r_frac << 1) + (a_frac != 0);
3155
3156 return a;
3157 }
3158
3159 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3160 {
3161 FloatParts pa = float16_unpack_canonical(a, status);
3162 FloatParts pr = sqrt_float(pa, status, &float16_params);
3163 return float16_round_pack_canonical(pr, status);
3164 }
3165
3166 static float32 QEMU_SOFTFLOAT_ATTR
3167 soft_f32_sqrt(float32 a, float_status *status)
3168 {
3169 FloatParts pa = float32_unpack_canonical(a, status);
3170 FloatParts pr = sqrt_float(pa, status, &float32_params);
3171 return float32_round_pack_canonical(pr, status);
3172 }
3173
3174 static float64 QEMU_SOFTFLOAT_ATTR
3175 soft_f64_sqrt(float64 a, float_status *status)
3176 {
3177 FloatParts pa = float64_unpack_canonical(a, status);
3178 FloatParts pr = sqrt_float(pa, status, &float64_params);
3179 return float64_round_pack_canonical(pr, status);
3180 }
3181
3182 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3183 {
3184 union_float32 ua, ur;
3185
3186 ua.s = xa;
3187 if (unlikely(!can_use_fpu(s))) {
3188 goto soft;
3189 }
3190
3191 float32_input_flush1(&ua.s, s);
3192 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3193 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3194 fpclassify(ua.h) == FP_ZERO) ||
3195 signbit(ua.h))) {
3196 goto soft;
3197 }
3198 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3199 float32_is_neg(ua.s))) {
3200 goto soft;
3201 }
3202 ur.h = sqrtf(ua.h);
3203 return ur.s;
3204
3205 soft:
3206 return soft_f32_sqrt(ua.s, s);
3207 }
3208
3209 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3210 {
3211 union_float64 ua, ur;
3212
3213 ua.s = xa;
3214 if (unlikely(!can_use_fpu(s))) {
3215 goto soft;
3216 }
3217
3218 float64_input_flush1(&ua.s, s);
3219 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3220 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3221 fpclassify(ua.h) == FP_ZERO) ||
3222 signbit(ua.h))) {
3223 goto soft;
3224 }
3225 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3226 float64_is_neg(ua.s))) {
3227 goto soft;
3228 }
3229 ur.h = sqrt(ua.h);
3230 return ur.s;
3231
3232 soft:
3233 return soft_f64_sqrt(ua.s, s);
3234 }
3235
3236 /*----------------------------------------------------------------------------
3237 | The pattern for a default generated NaN.
3238 *----------------------------------------------------------------------------*/
3239
3240 float16 float16_default_nan(float_status *status)
3241 {
3242 FloatParts p = parts_default_nan(status);
3243 p.frac >>= float16_params.frac_shift;
3244 return float16_pack_raw(p);
3245 }
3246
3247 float32 float32_default_nan(float_status *status)
3248 {
3249 FloatParts p = parts_default_nan(status);
3250 p.frac >>= float32_params.frac_shift;
3251 return float32_pack_raw(p);
3252 }
3253
3254 float64 float64_default_nan(float_status *status)
3255 {
3256 FloatParts p = parts_default_nan(status);
3257 p.frac >>= float64_params.frac_shift;
3258 return float64_pack_raw(p);
3259 }
3260
3261 float128 float128_default_nan(float_status *status)
3262 {
3263 FloatParts p = parts_default_nan(status);
3264 float128 r;
3265
3266 /* Extrapolate from the choices made by parts_default_nan to fill
3267 * in the quad-floating format. If the low bit is set, assume we
3268 * want to set all non-snan bits.
3269 */
3270 r.low = -(p.frac & 1);
3271 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
3272 r.high |= LIT64(0x7FFF000000000000);
3273 r.high |= (uint64_t)p.sign << 63;
3274
3275 return r;
3276 }
3277
3278 /*----------------------------------------------------------------------------
3279 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3280 *----------------------------------------------------------------------------*/
3281
3282 float16 float16_silence_nan(float16 a, float_status *status)
3283 {
3284 FloatParts p = float16_unpack_raw(a);
3285 p.frac <<= float16_params.frac_shift;
3286 p = parts_silence_nan(p, status);
3287 p.frac >>= float16_params.frac_shift;
3288 return float16_pack_raw(p);
3289 }
3290
3291 float32 float32_silence_nan(float32 a, float_status *status)
3292 {
3293 FloatParts p = float32_unpack_raw(a);
3294 p.frac <<= float32_params.frac_shift;
3295 p = parts_silence_nan(p, status);
3296 p.frac >>= float32_params.frac_shift;
3297 return float32_pack_raw(p);
3298 }
3299
3300 float64 float64_silence_nan(float64 a, float_status *status)
3301 {
3302 FloatParts p = float64_unpack_raw(a);
3303 p.frac <<= float64_params.frac_shift;
3304 p = parts_silence_nan(p, status);
3305 p.frac >>= float64_params.frac_shift;
3306 return float64_pack_raw(p);
3307 }
3308
3309 /*----------------------------------------------------------------------------
3310 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3311 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3312 | input. If `zSign' is 1, the input is negated before being converted to an
3313 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3314 | is simply rounded to an integer, with the inexact exception raised if the
3315 | input cannot be represented exactly as an integer. However, if the fixed-
3316 | point input is too large, the invalid exception is raised and the largest
3317 | positive or negative integer is returned.
3318 *----------------------------------------------------------------------------*/
3319
3320 static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
3321 {
3322 int8_t roundingMode;
3323 flag roundNearestEven;
3324 int8_t roundIncrement, roundBits;
3325 int32_t z;
3326
3327 roundingMode = status->float_rounding_mode;
3328 roundNearestEven = ( roundingMode == float_round_nearest_even );
3329 switch (roundingMode) {
3330 case float_round_nearest_even:
3331 case float_round_ties_away:
3332 roundIncrement = 0x40;
3333 break;
3334 case float_round_to_zero:
3335 roundIncrement = 0;
3336 break;
3337 case float_round_up:
3338 roundIncrement = zSign ? 0 : 0x7f;
3339 break;
3340 case float_round_down:
3341 roundIncrement = zSign ? 0x7f : 0;
3342 break;
3343 case float_round_to_odd:
3344 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3345 break;
3346 default:
3347 abort();
3348 }
3349 roundBits = absZ & 0x7F;
3350 absZ = ( absZ + roundIncrement )>>7;
3351 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3352 z = absZ;
3353 if ( zSign ) z = - z;
3354 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3355 float_raise(float_flag_invalid, status);
3356 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
3357 }
3358 if (roundBits) {
3359 status->float_exception_flags |= float_flag_inexact;
3360 }
3361 return z;
3362
3363 }
3364
3365 /*----------------------------------------------------------------------------
3366 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3367 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3368 | and returns the properly rounded 64-bit integer corresponding to the input.
3369 | If `zSign' is 1, the input is negated before being converted to an integer.
3370 | Ordinarily, the fixed-point input is simply rounded to an integer, with
3371 | the inexact exception raised if the input cannot be represented exactly as
3372 | an integer. However, if the fixed-point input is too large, the invalid
3373 | exception is raised and the largest positive or negative integer is
3374 | returned.
3375 *----------------------------------------------------------------------------*/
3376
3377 static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
3378 float_status *status)
3379 {
3380 int8_t roundingMode;
3381 flag roundNearestEven, increment;
3382 int64_t z;
3383
3384 roundingMode = status->float_rounding_mode;