colo-compare: Remove superfluous NULL-pointer checks for s->iothread
[qemu.git] / fpu / softfloat.c
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43
44 ===============================================================================
45 */
46
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98
99 /*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
136 } \
137 }
138
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184
185 /*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205
206 /*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
217
218 /*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234
235 static inline bool can_use_fpu(const float_status *s)
236 {
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242 }
243
244 /*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
260
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
265
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
273
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287 }
288
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297 }
298
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311 }
312
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324 }
325
326 static inline bool f32_is_inf(union_float32 a)
327 {
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332 }
333
334 static inline bool f64_is_inf(union_float64 a)
335 {
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340 }
341
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
346 {
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 s->float_exception_flags |= float_flag_overflow;
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371 }
372
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
377 {
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 s->float_exception_flags |= float_flag_overflow;
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402 }
403
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410 return float32_val(a) & 0x007FFFFF;
411 }
412
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416
417 static inline int extractFloat32Exp(float32 a)
418 {
419 return (float32_val(a) >> 23) & 0xFF;
420 }
421
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425
426 static inline bool extractFloat32Sign(float32 a)
427 {
428 return float32_val(a) >> 31;
429 }
430
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443
444 static inline int extractFloat64Exp(float64 a)
445 {
446 return (float64_val(a) >> 52) & 0x7FF;
447 }
448
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452
453 static inline bool extractFloat64Sign(float64 a)
454 {
455 return float64_val(a) >> 63;
456 }
457
458 /*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463 typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
470 } FloatClass;
471
472 /* Simple helpers for checking if, or what kind of, NaN we have */
473 static inline __attribute__((unused)) bool is_nan(FloatClass c)
474 {
475 return unlikely(c >= float_class_qnan);
476 }
477
478 static inline __attribute__((unused)) bool is_snan(FloatClass c)
479 {
480 return c == float_class_snan;
481 }
482
483 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
484 {
485 return c == float_class_qnan;
486 }
487
488 /*
489 * Structure holding all of the decomposed parts of a float. The
490 * exponent is unbiased and the fraction is normalized. All
491 * calculations are done with a 64 bit fraction and then rounded as
492 * appropriate for the final format.
493 *
494 * Thanks to the packed FloatClass a decent compiler should be able to
495 * fit the whole structure into registers and avoid using the stack
496 * for parameter passing.
497 */
498
499 typedef struct {
500 uint64_t frac;
501 int32_t exp;
502 FloatClass cls;
503 bool sign;
504 } FloatParts;
505
506 #define DECOMPOSED_BINARY_POINT (64 - 2)
507 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
508 #define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
509
510 /* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
517 * frac_lsb: least significant bit of fraction
518 * frac_lsbm1: the bit below the least significant bit (for rounding)
519 * round_mask/roundeven_mask: masks used for rounding
520 * The following optional modifiers are available:
521 * arm_althp: handle ARM Alternative Half Precision
522 */
523 typedef struct {
524 int exp_size;
525 int exp_bias;
526 int exp_max;
527 int frac_size;
528 int frac_shift;
529 uint64_t frac_lsb;
530 uint64_t frac_lsbm1;
531 uint64_t round_mask;
532 uint64_t roundeven_mask;
533 bool arm_althp;
534 } FloatFmt;
535
536 /* Expand fields based on the size of exponent and fraction */
537 #define FLOAT_PARAMS(E, F) \
538 .exp_size = E, \
539 .exp_bias = ((1 << E) - 1) >> 1, \
540 .exp_max = (1 << E) - 1, \
541 .frac_size = F, \
542 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
543 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
544 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
545 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
546 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
547
548 static const FloatFmt float16_params = {
549 FLOAT_PARAMS(5, 10)
550 };
551
552 static const FloatFmt float16_params_ahp = {
553 FLOAT_PARAMS(5, 10),
554 .arm_althp = true
555 };
556
557 static const FloatFmt float32_params = {
558 FLOAT_PARAMS(8, 23)
559 };
560
561 static const FloatFmt float64_params = {
562 FLOAT_PARAMS(11, 52)
563 };
564
565 /* Unpack a float to parts, but do not canonicalize. */
566 static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
567 {
568 const int sign_pos = fmt.frac_size + fmt.exp_size;
569
570 return (FloatParts) {
571 .cls = float_class_unclassified,
572 .sign = extract64(raw, sign_pos, 1),
573 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
574 .frac = extract64(raw, 0, fmt.frac_size),
575 };
576 }
577
578 static inline FloatParts float16_unpack_raw(float16 f)
579 {
580 return unpack_raw(float16_params, f);
581 }
582
583 static inline FloatParts float32_unpack_raw(float32 f)
584 {
585 return unpack_raw(float32_params, f);
586 }
587
588 static inline FloatParts float64_unpack_raw(float64 f)
589 {
590 return unpack_raw(float64_params, f);
591 }
592
593 /* Pack a float from parts, but do not canonicalize. */
594 static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
595 {
596 const int sign_pos = fmt.frac_size + fmt.exp_size;
597 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
598 return deposit64(ret, sign_pos, 1, p.sign);
599 }
600
601 static inline float16 float16_pack_raw(FloatParts p)
602 {
603 return make_float16(pack_raw(float16_params, p));
604 }
605
606 static inline float32 float32_pack_raw(FloatParts p)
607 {
608 return make_float32(pack_raw(float32_params, p));
609 }
610
611 static inline float64 float64_pack_raw(FloatParts p)
612 {
613 return make_float64(pack_raw(float64_params, p));
614 }
615
616 /*----------------------------------------------------------------------------
617 | Functions and definitions to determine: (1) whether tininess for underflow
618 | is detected before or after rounding by default, (2) what (if anything)
619 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
620 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
621 | are propagated from function inputs to output. These details are target-
622 | specific.
623 *----------------------------------------------------------------------------*/
624 #include "softfloat-specialize.inc.c"
625
626 /* Canonicalize EXP and FRAC, setting CLS. */
627 static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
628 float_status *status)
629 {
630 if (part.exp == parm->exp_max && !parm->arm_althp) {
631 if (part.frac == 0) {
632 part.cls = float_class_inf;
633 } else {
634 part.frac <<= parm->frac_shift;
635 part.cls = (parts_is_snan_frac(part.frac, status)
636 ? float_class_snan : float_class_qnan);
637 }
638 } else if (part.exp == 0) {
639 if (likely(part.frac == 0)) {
640 part.cls = float_class_zero;
641 } else if (status->flush_inputs_to_zero) {
642 float_raise(float_flag_input_denormal, status);
643 part.cls = float_class_zero;
644 part.frac = 0;
645 } else {
646 int shift = clz64(part.frac) - 1;
647 part.cls = float_class_normal;
648 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
649 part.frac <<= shift;
650 }
651 } else {
652 part.cls = float_class_normal;
653 part.exp -= parm->exp_bias;
654 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
655 }
656 return part;
657 }
658
659 /* Round and uncanonicalize a floating-point number by parts. There
660 * are FRAC_SHIFT bits that may require rounding at the bottom of the
661 * fraction; these bits will be removed. The exponent will be biased
662 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
663 */
664
665 static FloatParts round_canonical(FloatParts p, float_status *s,
666 const FloatFmt *parm)
667 {
668 const uint64_t frac_lsb = parm->frac_lsb;
669 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
670 const uint64_t round_mask = parm->round_mask;
671 const uint64_t roundeven_mask = parm->roundeven_mask;
672 const int exp_max = parm->exp_max;
673 const int frac_shift = parm->frac_shift;
674 uint64_t frac, inc;
675 int exp, flags = 0;
676 bool overflow_norm;
677
678 frac = p.frac;
679 exp = p.exp;
680
681 switch (p.cls) {
682 case float_class_normal:
683 switch (s->float_rounding_mode) {
684 case float_round_nearest_even:
685 overflow_norm = false;
686 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
687 break;
688 case float_round_ties_away:
689 overflow_norm = false;
690 inc = frac_lsbm1;
691 break;
692 case float_round_to_zero:
693 overflow_norm = true;
694 inc = 0;
695 break;
696 case float_round_up:
697 inc = p.sign ? 0 : round_mask;
698 overflow_norm = p.sign;
699 break;
700 case float_round_down:
701 inc = p.sign ? round_mask : 0;
702 overflow_norm = !p.sign;
703 break;
704 case float_round_to_odd:
705 overflow_norm = true;
706 inc = frac & frac_lsb ? 0 : round_mask;
707 break;
708 default:
709 g_assert_not_reached();
710 }
711
712 exp += parm->exp_bias;
713 if (likely(exp > 0)) {
714 if (frac & round_mask) {
715 flags |= float_flag_inexact;
716 frac += inc;
717 if (frac & DECOMPOSED_OVERFLOW_BIT) {
718 frac >>= 1;
719 exp++;
720 }
721 }
722 frac >>= frac_shift;
723
724 if (parm->arm_althp) {
725 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
726 if (unlikely(exp > exp_max)) {
727 /* Overflow. Return the maximum normal. */
728 flags = float_flag_invalid;
729 exp = exp_max;
730 frac = -1;
731 }
732 } else if (unlikely(exp >= exp_max)) {
733 flags |= float_flag_overflow | float_flag_inexact;
734 if (overflow_norm) {
735 exp = exp_max - 1;
736 frac = -1;
737 } else {
738 p.cls = float_class_inf;
739 goto do_inf;
740 }
741 }
742 } else if (s->flush_to_zero) {
743 flags |= float_flag_output_denormal;
744 p.cls = float_class_zero;
745 goto do_zero;
746 } else {
747 bool is_tiny = s->tininess_before_rounding
748 || (exp < 0)
749 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
750
751 shift64RightJamming(frac, 1 - exp, &frac);
752 if (frac & round_mask) {
753 /* Need to recompute round-to-even. */
754 switch (s->float_rounding_mode) {
755 case float_round_nearest_even:
756 inc = ((frac & roundeven_mask) != frac_lsbm1
757 ? frac_lsbm1 : 0);
758 break;
759 case float_round_to_odd:
760 inc = frac & frac_lsb ? 0 : round_mask;
761 break;
762 default:
763 break;
764 }
765 flags |= float_flag_inexact;
766 frac += inc;
767 }
768
769 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
770 frac >>= frac_shift;
771
772 if (is_tiny && (flags & float_flag_inexact)) {
773 flags |= float_flag_underflow;
774 }
775 if (exp == 0 && frac == 0) {
776 p.cls = float_class_zero;
777 }
778 }
779 break;
780
781 case float_class_zero:
782 do_zero:
783 exp = 0;
784 frac = 0;
785 break;
786
787 case float_class_inf:
788 do_inf:
789 assert(!parm->arm_althp);
790 exp = exp_max;
791 frac = 0;
792 break;
793
794 case float_class_qnan:
795 case float_class_snan:
796 assert(!parm->arm_althp);
797 exp = exp_max;
798 frac >>= parm->frac_shift;
799 break;
800
801 default:
802 g_assert_not_reached();
803 }
804
805 float_raise(flags, s);
806 p.exp = exp;
807 p.frac = frac;
808 return p;
809 }
810
811 /* Explicit FloatFmt version */
812 static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
813 const FloatFmt *params)
814 {
815 return sf_canonicalize(float16_unpack_raw(f), params, s);
816 }
817
818 static FloatParts float16_unpack_canonical(float16 f, float_status *s)
819 {
820 return float16a_unpack_canonical(f, s, &float16_params);
821 }
822
823 static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
824 const FloatFmt *params)
825 {
826 return float16_pack_raw(round_canonical(p, s, params));
827 }
828
829 static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
830 {
831 return float16a_round_pack_canonical(p, s, &float16_params);
832 }
833
834 static FloatParts float32_unpack_canonical(float32 f, float_status *s)
835 {
836 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
837 }
838
839 static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
840 {
841 return float32_pack_raw(round_canonical(p, s, &float32_params));
842 }
843
844 static FloatParts float64_unpack_canonical(float64 f, float_status *s)
845 {
846 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
847 }
848
849 static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
850 {
851 return float64_pack_raw(round_canonical(p, s, &float64_params));
852 }
853
854 static FloatParts return_nan(FloatParts a, float_status *s)
855 {
856 switch (a.cls) {
857 case float_class_snan:
858 s->float_exception_flags |= float_flag_invalid;
859 a = parts_silence_nan(a, s);
860 /* fall through */
861 case float_class_qnan:
862 if (s->default_nan_mode) {
863 return parts_default_nan(s);
864 }
865 break;
866
867 default:
868 g_assert_not_reached();
869 }
870 return a;
871 }
872
873 static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
874 {
875 if (is_snan(a.cls) || is_snan(b.cls)) {
876 s->float_exception_flags |= float_flag_invalid;
877 }
878
879 if (s->default_nan_mode) {
880 return parts_default_nan(s);
881 } else {
882 if (pickNaN(a.cls, b.cls,
883 a.frac > b.frac ||
884 (a.frac == b.frac && a.sign < b.sign))) {
885 a = b;
886 }
887 if (is_snan(a.cls)) {
888 return parts_silence_nan(a, s);
889 }
890 }
891 return a;
892 }
893
894 static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
895 bool inf_zero, float_status *s)
896 {
897 int which;
898
899 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
900 s->float_exception_flags |= float_flag_invalid;
901 }
902
903 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
904
905 if (s->default_nan_mode) {
906 /* Note that this check is after pickNaNMulAdd so that function
907 * has an opportunity to set the Invalid flag.
908 */
909 which = 3;
910 }
911
912 switch (which) {
913 case 0:
914 break;
915 case 1:
916 a = b;
917 break;
918 case 2:
919 a = c;
920 break;
921 case 3:
922 return parts_default_nan(s);
923 default:
924 g_assert_not_reached();
925 }
926
927 if (is_snan(a.cls)) {
928 return parts_silence_nan(a, s);
929 }
930 return a;
931 }
932
933 /*
934 * Returns the result of adding or subtracting the values of the
935 * floating-point values `a' and `b'. The operation is performed
936 * according to the IEC/IEEE Standard for Binary Floating-Point
937 * Arithmetic.
938 */
939
940 static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
941 float_status *s)
942 {
943 bool a_sign = a.sign;
944 bool b_sign = b.sign ^ subtract;
945
946 if (a_sign != b_sign) {
947 /* Subtraction */
948
949 if (a.cls == float_class_normal && b.cls == float_class_normal) {
950 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
951 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
952 a.frac = a.frac - b.frac;
953 } else {
954 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
955 a.frac = b.frac - a.frac;
956 a.exp = b.exp;
957 a_sign ^= 1;
958 }
959
960 if (a.frac == 0) {
961 a.cls = float_class_zero;
962 a.sign = s->float_rounding_mode == float_round_down;
963 } else {
964 int shift = clz64(a.frac) - 1;
965 a.frac = a.frac << shift;
966 a.exp = a.exp - shift;
967 a.sign = a_sign;
968 }
969 return a;
970 }
971 if (is_nan(a.cls) || is_nan(b.cls)) {
972 return pick_nan(a, b, s);
973 }
974 if (a.cls == float_class_inf) {
975 if (b.cls == float_class_inf) {
976 float_raise(float_flag_invalid, s);
977 return parts_default_nan(s);
978 }
979 return a;
980 }
981 if (a.cls == float_class_zero && b.cls == float_class_zero) {
982 a.sign = s->float_rounding_mode == float_round_down;
983 return a;
984 }
985 if (a.cls == float_class_zero || b.cls == float_class_inf) {
986 b.sign = a_sign ^ 1;
987 return b;
988 }
989 if (b.cls == float_class_zero) {
990 return a;
991 }
992 } else {
993 /* Addition */
994 if (a.cls == float_class_normal && b.cls == float_class_normal) {
995 if (a.exp > b.exp) {
996 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
997 } else if (a.exp < b.exp) {
998 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
999 a.exp = b.exp;
1000 }
1001 a.frac += b.frac;
1002 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1003 shift64RightJamming(a.frac, 1, &a.frac);
1004 a.exp += 1;
1005 }
1006 return a;
1007 }
1008 if (is_nan(a.cls) || is_nan(b.cls)) {
1009 return pick_nan(a, b, s);
1010 }
1011 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1012 return a;
1013 }
1014 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1015 b.sign = b_sign;
1016 return b;
1017 }
1018 }
1019 g_assert_not_reached();
1020 }
1021
1022 /*
1023 * Returns the result of adding or subtracting the floating-point
1024 * values `a' and `b'. The operation is performed according to the
1025 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1026 */
1027
1028 float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
1029 {
1030 FloatParts pa = float16_unpack_canonical(a, status);
1031 FloatParts pb = float16_unpack_canonical(b, status);
1032 FloatParts pr = addsub_floats(pa, pb, false, status);
1033
1034 return float16_round_pack_canonical(pr, status);
1035 }
1036
1037 float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1038 {
1039 FloatParts pa = float16_unpack_canonical(a, status);
1040 FloatParts pb = float16_unpack_canonical(b, status);
1041 FloatParts pr = addsub_floats(pa, pb, true, status);
1042
1043 return float16_round_pack_canonical(pr, status);
1044 }
1045
1046 static float32 QEMU_SOFTFLOAT_ATTR
1047 soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
1048 {
1049 FloatParts pa = float32_unpack_canonical(a, status);
1050 FloatParts pb = float32_unpack_canonical(b, status);
1051 FloatParts pr = addsub_floats(pa, pb, subtract, status);
1052
1053 return float32_round_pack_canonical(pr, status);
1054 }
1055
1056 static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1057 {
1058 return soft_f32_addsub(a, b, false, status);
1059 }
1060
1061 static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1062 {
1063 return soft_f32_addsub(a, b, true, status);
1064 }
1065
1066 static float64 QEMU_SOFTFLOAT_ATTR
1067 soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
1068 {
1069 FloatParts pa = float64_unpack_canonical(a, status);
1070 FloatParts pb = float64_unpack_canonical(b, status);
1071 FloatParts pr = addsub_floats(pa, pb, subtract, status);
1072
1073 return float64_round_pack_canonical(pr, status);
1074 }
1075
1076 static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
1077 {
1078 return soft_f64_addsub(a, b, false, status);
1079 }
1080
1081 static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1082 {
1083 return soft_f64_addsub(a, b, true, status);
1084 }
1085
1086 static float hard_f32_add(float a, float b)
1087 {
1088 return a + b;
1089 }
1090
1091 static float hard_f32_sub(float a, float b)
1092 {
1093 return a - b;
1094 }
1095
1096 static double hard_f64_add(double a, double b)
1097 {
1098 return a + b;
1099 }
1100
1101 static double hard_f64_sub(double a, double b)
1102 {
1103 return a - b;
1104 }
1105
1106 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1107 {
1108 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1109 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1110 }
1111 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1112 }
1113
1114 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1115 {
1116 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1117 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1118 } else {
1119 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1120 }
1121 }
1122
1123 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1124 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1125 {
1126 return float32_gen2(a, b, s, hard, soft,
1127 f32_is_zon2, f32_addsubmul_post);
1128 }
1129
1130 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1131 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1132 {
1133 return float64_gen2(a, b, s, hard, soft,
1134 f64_is_zon2, f64_addsubmul_post);
1135 }
1136
1137 float32 QEMU_FLATTEN
1138 float32_add(float32 a, float32 b, float_status *s)
1139 {
1140 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1141 }
1142
1143 float32 QEMU_FLATTEN
1144 float32_sub(float32 a, float32 b, float_status *s)
1145 {
1146 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1147 }
1148
1149 float64 QEMU_FLATTEN
1150 float64_add(float64 a, float64 b, float_status *s)
1151 {
1152 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1153 }
1154
1155 float64 QEMU_FLATTEN
1156 float64_sub(float64 a, float64 b, float_status *s)
1157 {
1158 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1159 }
1160
1161 /*
1162 * Returns the result of multiplying the floating-point values `a' and
1163 * `b'. The operation is performed according to the IEC/IEEE Standard
1164 * for Binary Floating-Point Arithmetic.
1165 */
1166
1167 static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1168 {
1169 bool sign = a.sign ^ b.sign;
1170
1171 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1172 uint64_t hi, lo;
1173 int exp = a.exp + b.exp;
1174
1175 mul64To128(a.frac, b.frac, &hi, &lo);
1176 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1177 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1178 shift64RightJamming(lo, 1, &lo);
1179 exp += 1;
1180 }
1181
1182 /* Re-use a */
1183 a.exp = exp;
1184 a.sign = sign;
1185 a.frac = lo;
1186 return a;
1187 }
1188 /* handle all the NaN cases */
1189 if (is_nan(a.cls) || is_nan(b.cls)) {
1190 return pick_nan(a, b, s);
1191 }
1192 /* Inf * Zero == NaN */
1193 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1194 (a.cls == float_class_zero && b.cls == float_class_inf)) {
1195 s->float_exception_flags |= float_flag_invalid;
1196 return parts_default_nan(s);
1197 }
1198 /* Multiply by 0 or Inf */
1199 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1200 a.sign = sign;
1201 return a;
1202 }
1203 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1204 b.sign = sign;
1205 return b;
1206 }
1207 g_assert_not_reached();
1208 }
1209
1210 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1211 {
1212 FloatParts pa = float16_unpack_canonical(a, status);
1213 FloatParts pb = float16_unpack_canonical(b, status);
1214 FloatParts pr = mul_floats(pa, pb, status);
1215
1216 return float16_round_pack_canonical(pr, status);
1217 }
1218
1219 static float32 QEMU_SOFTFLOAT_ATTR
1220 soft_f32_mul(float32 a, float32 b, float_status *status)
1221 {
1222 FloatParts pa = float32_unpack_canonical(a, status);
1223 FloatParts pb = float32_unpack_canonical(b, status);
1224 FloatParts pr = mul_floats(pa, pb, status);
1225
1226 return float32_round_pack_canonical(pr, status);
1227 }
1228
1229 static float64 QEMU_SOFTFLOAT_ATTR
1230 soft_f64_mul(float64 a, float64 b, float_status *status)
1231 {
1232 FloatParts pa = float64_unpack_canonical(a, status);
1233 FloatParts pb = float64_unpack_canonical(b, status);
1234 FloatParts pr = mul_floats(pa, pb, status);
1235
1236 return float64_round_pack_canonical(pr, status);
1237 }
1238
1239 static float hard_f32_mul(float a, float b)
1240 {
1241 return a * b;
1242 }
1243
1244 static double hard_f64_mul(double a, double b)
1245 {
1246 return a * b;
1247 }
1248
1249 float32 QEMU_FLATTEN
1250 float32_mul(float32 a, float32 b, float_status *s)
1251 {
1252 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1253 f32_is_zon2, f32_addsubmul_post);
1254 }
1255
1256 float64 QEMU_FLATTEN
1257 float64_mul(float64 a, float64 b, float_status *s)
1258 {
1259 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1260 f64_is_zon2, f64_addsubmul_post);
1261 }
1262
1263 /*
1264 * Returns the result of multiplying the floating-point values `a' and
1265 * `b' then adding 'c', with no intermediate rounding step after the
1266 * multiplication. The operation is performed according to the
1267 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1268 * The flags argument allows the caller to select negation of the
1269 * addend, the intermediate product, or the final result. (The
1270 * difference between this and having the caller do a separate
1271 * negation is that negating externally will flip the sign bit on
1272 * NaNs.)
1273 */
1274
1275 static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1276 int flags, float_status *s)
1277 {
1278 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1279 ((1 << float_class_inf) | (1 << float_class_zero));
1280 bool p_sign;
1281 bool sign_flip = flags & float_muladd_negate_result;
1282 FloatClass p_class;
1283 uint64_t hi, lo;
1284 int p_exp;
1285
1286 /* It is implementation-defined whether the cases of (0,inf,qnan)
1287 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1288 * they return if they do), so we have to hand this information
1289 * off to the target-specific pick-a-NaN routine.
1290 */
1291 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1292 return pick_nan_muladd(a, b, c, inf_zero, s);
1293 }
1294
1295 if (inf_zero) {
1296 s->float_exception_flags |= float_flag_invalid;
1297 return parts_default_nan(s);
1298 }
1299
1300 if (flags & float_muladd_negate_c) {
1301 c.sign ^= 1;
1302 }
1303
1304 p_sign = a.sign ^ b.sign;
1305
1306 if (flags & float_muladd_negate_product) {
1307 p_sign ^= 1;
1308 }
1309
1310 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1311 p_class = float_class_inf;
1312 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1313 p_class = float_class_zero;
1314 } else {
1315 p_class = float_class_normal;
1316 }
1317
1318 if (c.cls == float_class_inf) {
1319 if (p_class == float_class_inf && p_sign != c.sign) {
1320 s->float_exception_flags |= float_flag_invalid;
1321 return parts_default_nan(s);
1322 } else {
1323 a.cls = float_class_inf;
1324 a.sign = c.sign ^ sign_flip;
1325 return a;
1326 }
1327 }
1328
1329 if (p_class == float_class_inf) {
1330 a.cls = float_class_inf;
1331 a.sign = p_sign ^ sign_flip;
1332 return a;
1333 }
1334
1335 if (p_class == float_class_zero) {
1336 if (c.cls == float_class_zero) {
1337 if (p_sign != c.sign) {
1338 p_sign = s->float_rounding_mode == float_round_down;
1339 }
1340 c.sign = p_sign;
1341 } else if (flags & float_muladd_halve_result) {
1342 c.exp -= 1;
1343 }
1344 c.sign ^= sign_flip;
1345 return c;
1346 }
1347
1348 /* a & b should be normals now... */
1349 assert(a.cls == float_class_normal &&
1350 b.cls == float_class_normal);
1351
1352 p_exp = a.exp + b.exp;
1353
1354 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1355 * result.
1356 */
1357 mul64To128(a.frac, b.frac, &hi, &lo);
1358 /* binary point now at bit 124 */
1359
1360 /* check for overflow */
1361 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1362 shift128RightJamming(hi, lo, 1, &hi, &lo);
1363 p_exp += 1;
1364 }
1365
1366 /* + add/sub */
1367 if (c.cls == float_class_zero) {
1368 /* move binary point back to 62 */
1369 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1370 } else {
1371 int exp_diff = p_exp - c.exp;
1372 if (p_sign == c.sign) {
1373 /* Addition */
1374 if (exp_diff <= 0) {
1375 shift128RightJamming(hi, lo,
1376 DECOMPOSED_BINARY_POINT - exp_diff,
1377 &hi, &lo);
1378 lo += c.frac;
1379 p_exp = c.exp;
1380 } else {
1381 uint64_t c_hi, c_lo;
1382 /* shift c to the same binary point as the product (124) */
1383 c_hi = c.frac >> 2;
1384 c_lo = 0;
1385 shift128RightJamming(c_hi, c_lo,
1386 exp_diff,
1387 &c_hi, &c_lo);
1388 add128(hi, lo, c_hi, c_lo, &hi, &lo);
1389 /* move binary point back to 62 */
1390 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1391 }
1392
1393 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1394 shift64RightJamming(lo, 1, &lo);
1395 p_exp += 1;
1396 }
1397
1398 } else {
1399 /* Subtraction */
1400 uint64_t c_hi, c_lo;
1401 /* make C binary point match product at bit 124 */
1402 c_hi = c.frac >> 2;
1403 c_lo = 0;
1404
1405 if (exp_diff <= 0) {
1406 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1407 if (exp_diff == 0
1408 &&
1409 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1410 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1411 } else {
1412 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1413 p_sign ^= 1;
1414 p_exp = c.exp;
1415 }
1416 } else {
1417 shift128RightJamming(c_hi, c_lo,
1418 exp_diff,
1419 &c_hi, &c_lo);
1420 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1421 }
1422
1423 if (hi == 0 && lo == 0) {
1424 a.cls = float_class_zero;
1425 a.sign = s->float_rounding_mode == float_round_down;
1426 a.sign ^= sign_flip;
1427 return a;
1428 } else {
1429 int shift;
1430 if (hi != 0) {
1431 shift = clz64(hi);
1432 } else {
1433 shift = clz64(lo) + 64;
1434 }
1435 /* Normalizing to a binary point of 124 is the
1436 correct adjust for the exponent. However since we're
1437 shifting, we might as well put the binary point back
1438 at 62 where we really want it. Therefore shift as
1439 if we're leaving 1 bit at the top of the word, but
1440 adjust the exponent as if we're leaving 3 bits. */
1441 shift -= 1;
1442 if (shift >= 64) {
1443 lo = lo << (shift - 64);
1444 } else {
1445 hi = (hi << shift) | (lo >> (64 - shift));
1446 lo = hi | ((lo << shift) != 0);
1447 }
1448 p_exp -= shift - 2;
1449 }
1450 }
1451 }
1452
1453 if (flags & float_muladd_halve_result) {
1454 p_exp -= 1;
1455 }
1456
1457 /* finally prepare our result */
1458 a.cls = float_class_normal;
1459 a.sign = p_sign ^ sign_flip;
1460 a.exp = p_exp;
1461 a.frac = lo;
1462
1463 return a;
1464 }
1465
1466 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1467 int flags, float_status *status)
1468 {
1469 FloatParts pa = float16_unpack_canonical(a, status);
1470 FloatParts pb = float16_unpack_canonical(b, status);
1471 FloatParts pc = float16_unpack_canonical(c, status);
1472 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1473
1474 return float16_round_pack_canonical(pr, status);
1475 }
1476
1477 static float32 QEMU_SOFTFLOAT_ATTR
1478 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1479 float_status *status)
1480 {
1481 FloatParts pa = float32_unpack_canonical(a, status);
1482 FloatParts pb = float32_unpack_canonical(b, status);
1483 FloatParts pc = float32_unpack_canonical(c, status);
1484 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1485
1486 return float32_round_pack_canonical(pr, status);
1487 }
1488
1489 static float64 QEMU_SOFTFLOAT_ATTR
1490 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1491 float_status *status)
1492 {
1493 FloatParts pa = float64_unpack_canonical(a, status);
1494 FloatParts pb = float64_unpack_canonical(b, status);
1495 FloatParts pc = float64_unpack_canonical(c, status);
1496 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1497
1498 return float64_round_pack_canonical(pr, status);
1499 }
1500
1501 static bool force_soft_fma;
1502
1503 float32 QEMU_FLATTEN
1504 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1505 {
1506 union_float32 ua, ub, uc, ur;
1507
1508 ua.s = xa;
1509 ub.s = xb;
1510 uc.s = xc;
1511
1512 if (unlikely(!can_use_fpu(s))) {
1513 goto soft;
1514 }
1515 if (unlikely(flags & float_muladd_halve_result)) {
1516 goto soft;
1517 }
1518
1519 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1520 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1521 goto soft;
1522 }
1523
1524 if (unlikely(force_soft_fma)) {
1525 goto soft;
1526 }
1527
1528 /*
1529 * When (a || b) == 0, there's no need to check for under/over flow,
1530 * since we know the addend is (normal || 0) and the product is 0.
1531 */
1532 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1533 union_float32 up;
1534 bool prod_sign;
1535
1536 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1537 prod_sign ^= !!(flags & float_muladd_negate_product);
1538 up.s = float32_set_sign(float32_zero, prod_sign);
1539
1540 if (flags & float_muladd_negate_c) {
1541 uc.h = -uc.h;
1542 }
1543 ur.h = up.h + uc.h;
1544 } else {
1545 union_float32 ua_orig = ua;
1546 union_float32 uc_orig = uc;
1547
1548 if (flags & float_muladd_negate_product) {
1549 ua.h = -ua.h;
1550 }
1551 if (flags & float_muladd_negate_c) {
1552 uc.h = -uc.h;
1553 }
1554
1555 ur.h = fmaf(ua.h, ub.h, uc.h);
1556
1557 if (unlikely(f32_is_inf(ur))) {
1558 s->float_exception_flags |= float_flag_overflow;
1559 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1560 ua = ua_orig;
1561 uc = uc_orig;
1562 goto soft;
1563 }
1564 }
1565 if (flags & float_muladd_negate_result) {
1566 return float32_chs(ur.s);
1567 }
1568 return ur.s;
1569
1570 soft:
1571 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1572 }
1573
1574 float64 QEMU_FLATTEN
1575 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1576 {
1577 union_float64 ua, ub, uc, ur;
1578
1579 ua.s = xa;
1580 ub.s = xb;
1581 uc.s = xc;
1582
1583 if (unlikely(!can_use_fpu(s))) {
1584 goto soft;
1585 }
1586 if (unlikely(flags & float_muladd_halve_result)) {
1587 goto soft;
1588 }
1589
1590 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1591 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1592 goto soft;
1593 }
1594
1595 if (unlikely(force_soft_fma)) {
1596 goto soft;
1597 }
1598
1599 /*
1600 * When (a || b) == 0, there's no need to check for under/over flow,
1601 * since we know the addend is (normal || 0) and the product is 0.
1602 */
1603 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1604 union_float64 up;
1605 bool prod_sign;
1606
1607 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1608 prod_sign ^= !!(flags & float_muladd_negate_product);
1609 up.s = float64_set_sign(float64_zero, prod_sign);
1610
1611 if (flags & float_muladd_negate_c) {
1612 uc.h = -uc.h;
1613 }
1614 ur.h = up.h + uc.h;
1615 } else {
1616 union_float64 ua_orig = ua;
1617 union_float64 uc_orig = uc;
1618
1619 if (flags & float_muladd_negate_product) {
1620 ua.h = -ua.h;
1621 }
1622 if (flags & float_muladd_negate_c) {
1623 uc.h = -uc.h;
1624 }
1625
1626 ur.h = fma(ua.h, ub.h, uc.h);
1627
1628 if (unlikely(f64_is_inf(ur))) {
1629 s->float_exception_flags |= float_flag_overflow;
1630 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1631 ua = ua_orig;
1632 uc = uc_orig;
1633 goto soft;
1634 }
1635 }
1636 if (flags & float_muladd_negate_result) {
1637 return float64_chs(ur.s);
1638 }
1639 return ur.s;
1640
1641 soft:
1642 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1643 }
1644
1645 /*
1646 * Returns the result of dividing the floating-point value `a' by the
1647 * corresponding value `b'. The operation is performed according to
1648 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1649 */
1650
1651 static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1652 {
1653 bool sign = a.sign ^ b.sign;
1654
1655 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1656 uint64_t n0, n1, q, r;
1657 int exp = a.exp - b.exp;
1658
1659 /*
1660 * We want a 2*N / N-bit division to produce exactly an N-bit
1661 * result, so that we do not lose any precision and so that we
1662 * do not have to renormalize afterward. If A.frac < B.frac,
1663 * then division would produce an (N-1)-bit result; shift A left
1664 * by one to produce the an N-bit result, and decrement the
1665 * exponent to match.
1666 *
1667 * The udiv_qrnnd algorithm that we're using requires normalization,
1668 * i.e. the msb of the denominator must be set. Since we know that
1669 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1670 * by one (more), and the remainder must be shifted right by one.
1671 */
1672 if (a.frac < b.frac) {
1673 exp -= 1;
1674 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
1675 } else {
1676 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
1677 }
1678 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1679
1680 /*
1681 * Set lsb if there is a remainder, to set inexact.
1682 * As mentioned above, to find the actual value of the remainder we
1683 * would need to shift right, but (1) we are only concerned about
1684 * non-zero-ness, and (2) the remainder will always be even because
1685 * both inputs to the division primitive are even.
1686 */
1687 a.frac = q | (r != 0);
1688 a.sign = sign;
1689 a.exp = exp;
1690 return a;
1691 }
1692 /* handle all the NaN cases */
1693 if (is_nan(a.cls) || is_nan(b.cls)) {
1694 return pick_nan(a, b, s);
1695 }
1696 /* 0/0 or Inf/Inf */
1697 if (a.cls == b.cls
1698 &&
1699 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1700 s->float_exception_flags |= float_flag_invalid;
1701 return parts_default_nan(s);
1702 }
1703 /* Inf / x or 0 / x */
1704 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1705 a.sign = sign;
1706 return a;
1707 }
1708 /* Div 0 => Inf */
1709 if (b.cls == float_class_zero) {
1710 s->float_exception_flags |= float_flag_divbyzero;
1711 a.cls = float_class_inf;
1712 a.sign = sign;
1713 return a;
1714 }
1715 /* Div by Inf */
1716 if (b.cls == float_class_inf) {
1717 a.cls = float_class_zero;
1718 a.sign = sign;
1719 return a;
1720 }
1721 g_assert_not_reached();
1722 }
1723
1724 float16 float16_div(float16 a, float16 b, float_status *status)
1725 {
1726 FloatParts pa = float16_unpack_canonical(a, status);
1727 FloatParts pb = float16_unpack_canonical(b, status);
1728 FloatParts pr = div_floats(pa, pb, status);
1729
1730 return float16_round_pack_canonical(pr, status);
1731 }
1732
1733 static float32 QEMU_SOFTFLOAT_ATTR
1734 soft_f32_div(float32 a, float32 b, float_status *status)
1735 {
1736 FloatParts pa = float32_unpack_canonical(a, status);
1737 FloatParts pb = float32_unpack_canonical(b, status);
1738 FloatParts pr = div_floats(pa, pb, status);
1739
1740 return float32_round_pack_canonical(pr, status);
1741 }
1742
1743 static float64 QEMU_SOFTFLOAT_ATTR
1744 soft_f64_div(float64 a, float64 b, float_status *status)
1745 {
1746 FloatParts pa = float64_unpack_canonical(a, status);
1747 FloatParts pb = float64_unpack_canonical(b, status);
1748 FloatParts pr = div_floats(pa, pb, status);
1749
1750 return float64_round_pack_canonical(pr, status);
1751 }
1752
1753 static float hard_f32_div(float a, float b)
1754 {
1755 return a / b;
1756 }
1757
1758 static double hard_f64_div(double a, double b)
1759 {
1760 return a / b;
1761 }
1762
1763 static bool f32_div_pre(union_float32 a, union_float32 b)
1764 {
1765 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1766 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1767 fpclassify(b.h) == FP_NORMAL;
1768 }
1769 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1770 }
1771
1772 static bool f64_div_pre(union_float64 a, union_float64 b)
1773 {
1774 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1775 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1776 fpclassify(b.h) == FP_NORMAL;
1777 }
1778 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1779 }
1780
1781 static bool f32_div_post(union_float32 a, union_float32 b)
1782 {
1783 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1784 return fpclassify(a.h) != FP_ZERO;
1785 }
1786 return !float32_is_zero(a.s);
1787 }
1788
1789 static bool f64_div_post(union_float64 a, union_float64 b)
1790 {
1791 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1792 return fpclassify(a.h) != FP_ZERO;
1793 }
1794 return !float64_is_zero(a.s);
1795 }
1796
1797 float32 QEMU_FLATTEN
1798 float32_div(float32 a, float32 b, float_status *s)
1799 {
1800 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1801 f32_div_pre, f32_div_post);
1802 }
1803
1804 float64 QEMU_FLATTEN
1805 float64_div(float64 a, float64 b, float_status *s)
1806 {
1807 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1808 f64_div_pre, f64_div_post);
1809 }
1810
1811 /*
1812 * Float to Float conversions
1813 *
1814 * Returns the result of converting one float format to another. The
1815 * conversion is performed according to the IEC/IEEE Standard for
1816 * Binary Floating-Point Arithmetic.
1817 *
1818 * The float_to_float helper only needs to take care of raising
1819 * invalid exceptions and handling the conversion on NaNs.
1820 */
1821
1822 static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1823 float_status *s)
1824 {
1825 if (dstf->arm_althp) {
1826 switch (a.cls) {
1827 case float_class_qnan:
1828 case float_class_snan:
1829 /* There is no NaN in the destination format. Raise Invalid
1830 * and return a zero with the sign of the input NaN.
1831 */
1832 s->float_exception_flags |= float_flag_invalid;
1833 a.cls = float_class_zero;
1834 a.frac = 0;
1835 a.exp = 0;
1836 break;
1837
1838 case float_class_inf:
1839 /* There is no Inf in the destination format. Raise Invalid
1840 * and return the maximum normal with the correct sign.
1841 */
1842 s->float_exception_flags |= float_flag_invalid;
1843 a.cls = float_class_normal;
1844 a.exp = dstf->exp_max;
1845 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1846 break;
1847
1848 default:
1849 break;
1850 }
1851 } else if (is_nan(a.cls)) {
1852 if (is_snan(a.cls)) {
1853 s->float_exception_flags |= float_flag_invalid;
1854 a = parts_silence_nan(a, s);
1855 }
1856 if (s->default_nan_mode) {
1857 return parts_default_nan(s);
1858 }
1859 }
1860 return a;
1861 }
1862
1863 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1864 {
1865 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1866 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1867 FloatParts pr = float_to_float(p, &float32_params, s);
1868 return float32_round_pack_canonical(pr, s);
1869 }
1870
1871 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1872 {
1873 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1874 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1875 FloatParts pr = float_to_float(p, &float64_params, s);
1876 return float64_round_pack_canonical(pr, s);
1877 }
1878
1879 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1880 {
1881 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1882 FloatParts p = float32_unpack_canonical(a, s);
1883 FloatParts pr = float_to_float(p, fmt16, s);
1884 return float16a_round_pack_canonical(pr, s, fmt16);
1885 }
1886
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_float32_to_float64(float32 a, float_status *s)
1889 {
1890 FloatParts p = float32_unpack_canonical(a, s);
1891 FloatParts pr = float_to_float(p, &float64_params, s);
1892 return float64_round_pack_canonical(pr, s);
1893 }
1894
1895 float64 float32_to_float64(float32 a, float_status *s)
1896 {
1897 if (likely(float32_is_normal(a))) {
1898 /* Widening conversion can never produce inexact results. */
1899 union_float32 uf;
1900 union_float64 ud;
1901 uf.s = a;
1902 ud.h = uf.h;
1903 return ud.s;
1904 } else if (float32_is_zero(a)) {
1905 return float64_set_sign(float64_zero, float32_is_neg(a));
1906 } else {
1907 return soft_float32_to_float64(a, s);
1908 }
1909 }
1910
1911 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1912 {
1913 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1914 FloatParts p = float64_unpack_canonical(a, s);
1915 FloatParts pr = float_to_float(p, fmt16, s);
1916 return float16a_round_pack_canonical(pr, s, fmt16);
1917 }
1918
1919 float32 float64_to_float32(float64 a, float_status *s)
1920 {
1921 FloatParts p = float64_unpack_canonical(a, s);
1922 FloatParts pr = float_to_float(p, &float32_params, s);
1923 return float32_round_pack_canonical(pr, s);
1924 }
1925
1926 /*
1927 * Rounds the floating-point value `a' to an integer, and returns the
1928 * result as a floating-point value. The operation is performed
1929 * according to the IEC/IEEE Standard for Binary Floating-Point
1930 * Arithmetic.
1931 */
1932
1933 static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
1934 int scale, float_status *s)
1935 {
1936 switch (a.cls) {
1937 case float_class_qnan:
1938 case float_class_snan:
1939 return return_nan(a, s);
1940
1941 case float_class_zero:
1942 case float_class_inf:
1943 /* already "integral" */
1944 break;
1945
1946 case float_class_normal:
1947 scale = MIN(MAX(scale, -0x10000), 0x10000);
1948 a.exp += scale;
1949
1950 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1951 /* already integral */
1952 break;
1953 }
1954 if (a.exp < 0) {
1955 bool one;
1956 /* all fractional */
1957 s->float_exception_flags |= float_flag_inexact;
1958 switch (rmode) {
1959 case float_round_nearest_even:
1960 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1961 break;
1962 case float_round_ties_away:
1963 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1964 break;
1965 case float_round_to_zero:
1966 one = false;
1967 break;
1968 case float_round_up:
1969 one = !a.sign;
1970 break;
1971 case float_round_down:
1972 one = a.sign;
1973 break;
1974 case float_round_to_odd:
1975 one = true;
1976 break;
1977 default:
1978 g_assert_not_reached();
1979 }
1980
1981 if (one) {
1982 a.frac = DECOMPOSED_IMPLICIT_BIT;
1983 a.exp = 0;
1984 } else {
1985 a.cls = float_class_zero;
1986 }
1987 } else {
1988 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1989 uint64_t frac_lsbm1 = frac_lsb >> 1;
1990 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1991 uint64_t rnd_mask = rnd_even_mask >> 1;
1992 uint64_t inc;
1993
1994 switch (rmode) {
1995 case float_round_nearest_even:
1996 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1997 break;
1998 case float_round_ties_away:
1999 inc = frac_lsbm1;
2000 break;
2001 case float_round_to_zero:
2002 inc = 0;
2003 break;
2004 case float_round_up:
2005 inc = a.sign ? 0 : rnd_mask;
2006 break;
2007 case float_round_down:
2008 inc = a.sign ? rnd_mask : 0;
2009 break;
2010 case float_round_to_odd:
2011 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2012 break;
2013 default:
2014 g_assert_not_reached();
2015 }
2016
2017 if (a.frac & rnd_mask) {
2018 s->float_exception_flags |= float_flag_inexact;
2019 a.frac += inc;
2020 a.frac &= ~rnd_mask;
2021 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2022 a.frac >>= 1;
2023 a.exp++;
2024 }
2025 }
2026 }
2027 break;
2028 default:
2029 g_assert_not_reached();
2030 }
2031 return a;
2032 }
2033
2034 float16 float16_round_to_int(float16 a, float_status *s)
2035 {
2036 FloatParts pa = float16_unpack_canonical(a, s);
2037 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2038 return float16_round_pack_canonical(pr, s);
2039 }
2040
2041 float32 float32_round_to_int(float32 a, float_status *s)
2042 {
2043 FloatParts pa = float32_unpack_canonical(a, s);
2044 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2045 return float32_round_pack_canonical(pr, s);
2046 }
2047
2048 float64 float64_round_to_int(float64 a, float_status *s)
2049 {
2050 FloatParts pa = float64_unpack_canonical(a, s);
2051 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2052 return float64_round_pack_canonical(pr, s);
2053 }
2054
2055 /*
2056 * Returns the result of converting the floating-point value `a' to
2057 * the two's complement integer format. The conversion is performed
2058 * according to the IEC/IEEE Standard for Binary Floating-Point
2059 * Arithmetic---which means in particular that the conversion is
2060 * rounded according to the current rounding mode. If `a' is a NaN,
2061 * the largest positive integer is returned. Otherwise, if the
2062 * conversion overflows, the largest integer with the same sign as `a'
2063 * is returned.
2064 */
2065
2066 static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
2067 int scale, int64_t min, int64_t max,
2068 float_status *s)
2069 {
2070 uint64_t r;
2071 int orig_flags = get_float_exception_flags(s);
2072 FloatParts p = round_to_int(in, rmode, scale, s);
2073
2074 switch (p.cls) {
2075 case float_class_snan:
2076 case float_class_qnan:
2077 s->float_exception_flags = orig_flags | float_flag_invalid;
2078 return max;
2079 case float_class_inf:
2080 s->float_exception_flags = orig_flags | float_flag_invalid;
2081 return p.sign ? min : max;
2082 case float_class_zero:
2083 return 0;
2084 case float_class_normal:
2085 if (p.exp < DECOMPOSED_BINARY_POINT) {
2086 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2087 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2088 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2089 } else {
2090 r = UINT64_MAX;
2091 }
2092 if (p.sign) {
2093 if (r <= -(uint64_t) min) {
2094 return -r;
2095 } else {
2096 s->float_exception_flags = orig_flags | float_flag_invalid;
2097 return min;
2098 }
2099 } else {
2100 if (r <= max) {
2101 return r;
2102 } else {
2103 s->float_exception_flags = orig_flags | float_flag_invalid;
2104 return max;
2105 }
2106 }
2107 default:
2108 g_assert_not_reached();
2109 }
2110 }
2111
2112 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2113 float_status *s)
2114 {
2115 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2116 rmode, scale, INT16_MIN, INT16_MAX, s);
2117 }
2118
2119 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2120 float_status *s)
2121 {
2122 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2123 rmode, scale, INT32_MIN, INT32_MAX, s);
2124 }
2125
2126 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2127 float_status *s)
2128 {
2129 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2130 rmode, scale, INT64_MIN, INT64_MAX, s);
2131 }
2132
2133 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2134 float_status *s)
2135 {
2136 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2137 rmode, scale, INT16_MIN, INT16_MAX, s);
2138 }
2139
2140 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2141 float_status *s)
2142 {
2143 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2144 rmode, scale, INT32_MIN, INT32_MAX, s);
2145 }
2146
2147 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2148 float_status *s)
2149 {
2150 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2151 rmode, scale, INT64_MIN, INT64_MAX, s);
2152 }
2153
2154 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2155 float_status *s)
2156 {
2157 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2158 rmode, scale, INT16_MIN, INT16_MAX, s);
2159 }
2160
2161 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2162 float_status *s)
2163 {
2164 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2165 rmode, scale, INT32_MIN, INT32_MAX, s);
2166 }
2167
2168 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2169 float_status *s)
2170 {
2171 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2172 rmode, scale, INT64_MIN, INT64_MAX, s);
2173 }
2174
2175 int16_t float16_to_int16(float16 a, float_status *s)
2176 {
2177 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2178 }
2179
2180 int32_t float16_to_int32(float16 a, float_status *s)
2181 {
2182 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2183 }
2184
2185 int64_t float16_to_int64(float16 a, float_status *s)
2186 {
2187 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2188 }
2189
2190 int16_t float32_to_int16(float32 a, float_status *s)
2191 {
2192 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2193 }
2194
2195 int32_t float32_to_int32(float32 a, float_status *s)
2196 {
2197 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2198 }
2199
2200 int64_t float32_to_int64(float32 a, float_status *s)
2201 {
2202 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2203 }
2204
2205 int16_t float64_to_int16(float64 a, float_status *s)
2206 {
2207 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2208 }
2209
2210 int32_t float64_to_int32(float64 a, float_status *s)
2211 {
2212 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2213 }
2214
2215 int64_t float64_to_int64(float64 a, float_status *s)
2216 {
2217 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2218 }
2219
2220 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2221 {
2222 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2223 }
2224
2225 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2226 {
2227 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2228 }
2229
2230 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2231 {
2232 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2233 }
2234
2235 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2236 {
2237 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2238 }
2239
2240 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2241 {
2242 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2243 }
2244
2245 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2246 {
2247 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2248 }
2249
2250 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2251 {
2252 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2253 }
2254
2255 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2256 {
2257 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2258 }
2259
2260 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2261 {
2262 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2263 }
2264
2265 /*
2266 * Returns the result of converting the floating-point value `a' to
2267 * the unsigned integer format. The conversion is performed according
2268 * to the IEC/IEEE Standard for Binary Floating-Point
2269 * Arithmetic---which means in particular that the conversion is
2270 * rounded according to the current rounding mode. If `a' is a NaN,
2271 * the largest unsigned integer is returned. Otherwise, if the
2272 * conversion overflows, the largest unsigned integer is returned. If
2273 * the 'a' is negative, the result is rounded and zero is returned;
2274 * values that do not round to zero will raise the inexact exception
2275 * flag.
2276 */
2277
2278 static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
2279 int scale, uint64_t max,
2280 float_status *s)
2281 {
2282 int orig_flags = get_float_exception_flags(s);
2283 FloatParts p = round_to_int(in, rmode, scale, s);
2284 uint64_t r;
2285
2286 switch (p.cls) {
2287 case float_class_snan:
2288 case float_class_qnan:
2289 s->float_exception_flags = orig_flags | float_flag_invalid;
2290 return max;
2291 case float_class_inf:
2292 s->float_exception_flags = orig_flags | float_flag_invalid;
2293 return p.sign ? 0 : max;
2294 case float_class_zero:
2295 return 0;
2296 case float_class_normal:
2297 if (p.sign) {
2298 s->float_exception_flags = orig_flags | float_flag_invalid;
2299 return 0;
2300 }
2301
2302 if (p.exp < DECOMPOSED_BINARY_POINT) {
2303 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2304 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2305 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2306 } else {
2307 s->float_exception_flags = orig_flags | float_flag_invalid;
2308 return max;
2309 }
2310
2311 /* For uint64 this will never trip, but if p.exp is too large
2312 * to shift a decomposed fraction we shall have exited via the
2313 * 3rd leg above.
2314 */
2315 if (r > max) {
2316 s->float_exception_flags = orig_flags | float_flag_invalid;
2317 return max;
2318 }
2319 return r;
2320 default:
2321 g_assert_not_reached();
2322 }
2323 }
2324
2325 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2326 float_status *s)
2327 {
2328 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2329 rmode, scale, UINT16_MAX, s);
2330 }
2331
2332 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2333 float_status *s)
2334 {
2335 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2336 rmode, scale, UINT32_MAX, s);
2337 }
2338
2339 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2340 float_status *s)
2341 {
2342 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2343 rmode, scale, UINT64_MAX, s);
2344 }
2345
2346 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2347 float_status *s)
2348 {
2349 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2350 rmode, scale, UINT16_MAX, s);
2351 }
2352
2353 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2354 float_status *s)
2355 {
2356 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2357 rmode, scale, UINT32_MAX, s);
2358 }
2359
2360 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2361 float_status *s)
2362 {
2363 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2364 rmode, scale, UINT64_MAX, s);
2365 }
2366
2367 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2368 float_status *s)
2369 {
2370 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2371 rmode, scale, UINT16_MAX, s);
2372 }
2373
2374 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2375 float_status *s)
2376 {
2377 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2378 rmode, scale, UINT32_MAX, s);
2379 }
2380
2381 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2382 float_status *s)
2383 {
2384 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2385 rmode, scale, UINT64_MAX, s);
2386 }
2387
2388 uint16_t float16_to_uint16(float16 a, float_status *s)
2389 {
2390 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2391 }
2392
2393 uint32_t float16_to_uint32(float16 a, float_status *s)
2394 {
2395 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2396 }
2397
2398 uint64_t float16_to_uint64(float16 a, float_status *s)
2399 {
2400 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2401 }
2402
2403 uint16_t float32_to_uint16(float32 a, float_status *s)
2404 {
2405 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2406 }
2407
2408 uint32_t float32_to_uint32(float32 a, float_status *s)
2409 {
2410 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2411 }
2412
2413 uint64_t float32_to_uint64(float32 a, float_status *s)
2414 {
2415 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2416 }
2417
2418 uint16_t float64_to_uint16(float64 a, float_status *s)
2419 {
2420 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2421 }
2422
2423 uint32_t float64_to_uint32(float64 a, float_status *s)
2424 {
2425 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2426 }
2427
2428 uint64_t float64_to_uint64(float64 a, float_status *s)
2429 {
2430 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2431 }
2432
2433 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2434 {
2435 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2436 }
2437
2438 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2439 {
2440 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2441 }
2442
2443 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2444 {
2445 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2446 }
2447
2448 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2449 {
2450 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2451 }
2452
2453 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2454 {
2455 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2456 }
2457
2458 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2459 {
2460 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2461 }
2462
2463 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2464 {
2465 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2466 }
2467
2468 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2469 {
2470 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2471 }
2472
2473 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2474 {
2475 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2476 }
2477
2478 /*
2479 * Integer to float conversions
2480 *
2481 * Returns the result of converting the two's complement integer `a'
2482 * to the floating-point format. The conversion is performed according
2483 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2484 */
2485
2486 static FloatParts int_to_float(int64_t a, int scale, float_status *status)
2487 {
2488 FloatParts r = { .sign = false };
2489
2490 if (a == 0) {
2491 r.cls = float_class_zero;
2492 } else {
2493 uint64_t f = a;
2494 int shift;
2495
2496 r.cls = float_class_normal;
2497 if (a < 0) {
2498 f = -f;
2499 r.sign = true;
2500 }
2501 shift = clz64(f) - 1;
2502 scale = MIN(MAX(scale, -0x10000), 0x10000);
2503
2504 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2505 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
2506 }
2507
2508 return r;
2509 }
2510
2511 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2512 {
2513 FloatParts pa = int_to_float(a, scale, status);
2514 return float16_round_pack_canonical(pa, status);
2515 }
2516
2517 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2518 {
2519 return int64_to_float16_scalbn(a, scale, status);
2520 }
2521
2522 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2523 {
2524 return int64_to_float16_scalbn(a, scale, status);
2525 }
2526
2527 float16 int64_to_float16(int64_t a, float_status *status)
2528 {
2529 return int64_to_float16_scalbn(a, 0, status);
2530 }
2531
2532 float16 int32_to_float16(int32_t a, float_status *status)
2533 {
2534 return int64_to_float16_scalbn(a, 0, status);
2535 }
2536
2537 float16 int16_to_float16(int16_t a, float_status *status)
2538 {
2539 return int64_to_float16_scalbn(a, 0, status);
2540 }
2541
2542 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
2543 {
2544 FloatParts pa = int_to_float(a, scale, status);
2545 return float32_round_pack_canonical(pa, status);
2546 }
2547
2548 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2549 {
2550 return int64_to_float32_scalbn(a, scale, status);
2551 }
2552
2553 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2554 {
2555 return int64_to_float32_scalbn(a, scale, status);
2556 }
2557
2558 float32 int64_to_float32(int64_t a, float_status *status)
2559 {
2560 return int64_to_float32_scalbn(a, 0, status);
2561 }
2562
2563 float32 int32_to_float32(int32_t a, float_status *status)
2564 {
2565 return int64_to_float32_scalbn(a, 0, status);
2566 }
2567
2568 float32 int16_to_float32(int16_t a, float_status *status)
2569 {
2570 return int64_to_float32_scalbn(a, 0, status);
2571 }
2572
2573 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
2574 {
2575 FloatParts pa = int_to_float(a, scale, status);
2576 return float64_round_pack_canonical(pa, status);
2577 }
2578
2579 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2580 {
2581 return int64_to_float64_scalbn(a, scale, status);
2582 }
2583
2584 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2585 {
2586 return int64_to_float64_scalbn(a, scale, status);
2587 }
2588
2589 float64 int64_to_float64(int64_t a, float_status *status)
2590 {
2591 return int64_to_float64_scalbn(a, 0, status);
2592 }
2593
2594 float64 int32_to_float64(int32_t a, float_status *status)
2595 {
2596 return int64_to_float64_scalbn(a, 0, status);
2597 }
2598
2599 float64 int16_to_float64(int16_t a, float_status *status)
2600 {
2601 return int64_to_float64_scalbn(a, 0, status);
2602 }
2603
2604
2605 /*
2606 * Unsigned Integer to float conversions
2607 *
2608 * Returns the result of converting the unsigned integer `a' to the
2609 * floating-point format. The conversion is performed according to the
2610 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2611 */
2612
2613 static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
2614 {
2615 FloatParts r = { .sign = false };
2616
2617 if (a == 0) {
2618 r.cls = float_class_zero;
2619 } else {
2620 scale = MIN(MAX(scale, -0x10000), 0x10000);
2621 r.cls = float_class_normal;
2622 if ((int64_t)a < 0) {
2623 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2624 shift64RightJamming(a, 1, &a);
2625 r.frac = a;
2626 } else {
2627 int shift = clz64(a) - 1;
2628 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2629 r.frac = a << shift;
2630 }
2631 }
2632
2633 return r;
2634 }
2635
2636 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
2637 {
2638 FloatParts pa = uint_to_float(a, scale, status);
2639 return float16_round_pack_canonical(pa, status);
2640 }
2641
2642 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2643 {
2644 return uint64_to_float16_scalbn(a, scale, status);
2645 }
2646
2647 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2648 {
2649 return uint64_to_float16_scalbn(a, scale, status);
2650 }
2651
2652 float16 uint64_to_float16(uint64_t a, float_status *status)
2653 {
2654 return uint64_to_float16_scalbn(a, 0, status);
2655 }
2656
2657 float16 uint32_to_float16(uint32_t a, float_status *status)
2658 {
2659 return uint64_to_float16_scalbn(a, 0, status);
2660 }
2661
2662 float16 uint16_to_float16(uint16_t a, float_status *status)
2663 {
2664 return uint64_to_float16_scalbn(a, 0, status);
2665 }
2666
2667 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
2668 {
2669 FloatParts pa = uint_to_float(a, scale, status);
2670 return float32_round_pack_canonical(pa, status);
2671 }
2672
2673 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2674 {
2675 return uint64_to_float32_scalbn(a, scale, status);
2676 }
2677
2678 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2679 {
2680 return uint64_to_float32_scalbn(a, scale, status);
2681 }
2682
2683 float32 uint64_to_float32(uint64_t a, float_status *status)
2684 {
2685 return uint64_to_float32_scalbn(a, 0, status);
2686 }
2687
2688 float32 uint32_to_float32(uint32_t a, float_status *status)
2689 {
2690 return uint64_to_float32_scalbn(a, 0, status);
2691 }
2692
2693 float32 uint16_to_float32(uint16_t a, float_status *status)
2694 {
2695 return uint64_to_float32_scalbn(a, 0, status);
2696 }
2697
2698 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
2699 {
2700 FloatParts pa = uint_to_float(a, scale, status);
2701 return float64_round_pack_canonical(pa, status);
2702 }
2703
2704 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2705 {
2706 return uint64_to_float64_scalbn(a, scale, status);
2707 }
2708
2709 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2710 {
2711 return uint64_to_float64_scalbn(a, scale, status);
2712 }
2713
2714 float64 uint64_to_float64(uint64_t a, float_status *status)
2715 {
2716 return uint64_to_float64_scalbn(a, 0, status);
2717 }
2718
2719 float64 uint32_to_float64(uint32_t a, float_status *status)
2720 {
2721 return uint64_to_float64_scalbn(a, 0, status);
2722 }
2723
2724 float64 uint16_to_float64(uint16_t a, float_status *status)
2725 {
2726 return uint64_to_float64_scalbn(a, 0, status);
2727 }
2728
2729 /* Float Min/Max */
2730 /* min() and max() functions. These can't be implemented as
2731 * 'compare and pick one input' because that would mishandle
2732 * NaNs and +0 vs -0.
2733 *
2734 * minnum() and maxnum() functions. These are similar to the min()
2735 * and max() functions but if one of the arguments is a QNaN and
2736 * the other is numerical then the numerical argument is returned.
2737 * SNaNs will get quietened before being returned.
2738 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2739 * and maxNum() operations. min() and max() are the typical min/max
2740 * semantics provided by many CPUs which predate that specification.
2741 *
2742 * minnummag() and maxnummag() functions correspond to minNumMag()
2743 * and minNumMag() from the IEEE-754 2008.
2744 */
2745 static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2746 bool ieee, bool ismag, float_status *s)
2747 {
2748 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2749 if (ieee) {
2750 /* Takes two floating-point values `a' and `b', one of
2751 * which is a NaN, and returns the appropriate NaN
2752 * result. If either `a' or `b' is a signaling NaN,
2753 * the invalid exception is raised.
2754 */
2755 if (is_snan(a.cls) || is_snan(b.cls)) {
2756 return pick_nan(a, b, s);
2757 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2758 return b;
2759 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2760 return a;
2761 }
2762 }
2763 return pick_nan(a, b, s);
2764 } else {
2765 int a_exp, b_exp;
2766
2767 switch (a.cls) {
2768 case float_class_normal:
2769 a_exp = a.exp;
2770 break;
2771 case float_class_inf:
2772 a_exp = INT_MAX;
2773 break;
2774 case float_class_zero:
2775 a_exp = INT_MIN;
2776 break;
2777 default:
2778 g_assert_not_reached();
2779 break;
2780 }
2781 switch (b.cls) {
2782 case float_class_normal:
2783 b_exp = b.exp;
2784 break;
2785 case float_class_inf:
2786 b_exp = INT_MAX;
2787 break;
2788 case float_class_zero:
2789 b_exp = INT_MIN;
2790 break;
2791 default:
2792 g_assert_not_reached();
2793 break;
2794 }
2795
2796 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2797 bool a_less = a_exp < b_exp;
2798 if (a_exp == b_exp) {
2799 a_less = a.frac < b.frac;
2800 }
2801 return a_less ^ ismin ? b : a;
2802 }
2803
2804 if (a.sign == b.sign) {
2805 bool a_less = a_exp < b_exp;
2806 if (a_exp == b_exp) {
2807 a_less = a.frac < b.frac;
2808 }
2809 return a.sign ^ a_less ^ ismin ? b : a;
2810 } else {
2811 return a.sign ^ ismin ? b : a;
2812 }
2813 }
2814 }
2815
2816 #define MINMAX(sz, name, ismin, isiee, ismag) \
2817 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2818 float_status *s) \
2819 { \
2820 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2821 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2822 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2823 \
2824 return float ## sz ## _round_pack_canonical(pr, s); \
2825 }
2826
2827 MINMAX(16, min, true, false, false)
2828 MINMAX(16, minnum, true, true, false)
2829 MINMAX(16, minnummag, true, true, true)
2830 MINMAX(16, max, false, false, false)
2831 MINMAX(16, maxnum, false, true, false)
2832 MINMAX(16, maxnummag, false, true, true)
2833
2834 MINMAX(32, min, true, false, false)
2835 MINMAX(32, minnum, true, true, false)
2836 MINMAX(32, minnummag, true, true, true)
2837 MINMAX(32, max, false, false, false)
2838 MINMAX(32, maxnum, false, true, false)
2839 MINMAX(32, maxnummag, false, true, true)
2840
2841 MINMAX(64, min, true, false, false)
2842 MINMAX(64, minnum, true, true, false)
2843 MINMAX(64, minnummag, true, true, true)
2844 MINMAX(64, max, false, false, false)
2845 MINMAX(64, maxnum, false, true, false)
2846 MINMAX(64, maxnummag, false, true, true)
2847
2848 #undef MINMAX
2849
2850 /* Floating point compare */
2851 static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2852 float_status *s)
2853 {
2854 if (is_nan(a.cls) || is_nan(b.cls)) {
2855 if (!is_quiet ||
2856 a.cls == float_class_snan ||
2857 b.cls == float_class_snan) {
2858 s->float_exception_flags |= float_flag_invalid;
2859 }
2860 return float_relation_unordered;
2861 }
2862
2863 if (a.cls == float_class_zero) {
2864 if (b.cls == float_class_zero) {
2865 return float_relation_equal;
2866 }
2867 return b.sign ? float_relation_greater : float_relation_less;
2868 } else if (b.cls == float_class_zero) {
2869 return a.sign ? float_relation_less : float_relation_greater;
2870 }
2871
2872 /* The only really important thing about infinity is its sign. If
2873 * both are infinities the sign marks the smallest of the two.
2874 */
2875 if (a.cls == float_class_inf) {
2876 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2877 return float_relation_equal;
2878 }
2879 return a.sign ? float_relation_less : float_relation_greater;
2880 } else if (b.cls == float_class_inf) {
2881 return b.sign ? float_relation_greater : float_relation_less;
2882 }
2883
2884 if (a.sign != b.sign) {
2885 return a.sign ? float_relation_less : float_relation_greater;
2886 }
2887
2888 if (a.exp == b.exp) {
2889 if (a.frac == b.frac) {
2890 return float_relation_equal;
2891 }
2892 if (a.sign) {
2893 return a.frac > b.frac ?
2894 float_relation_less : float_relation_greater;
2895 } else {
2896 return a.frac > b.frac ?
2897 float_relation_greater : float_relation_less;
2898 }
2899 } else {
2900 if (a.sign) {
2901 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2902 } else {
2903 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2904 }
2905 }
2906 }
2907
2908 #define COMPARE(name, attr, sz) \
2909 static int attr \
2910 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
2911 { \
2912 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2913 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2914 return compare_floats(pa, pb, is_quiet, s); \
2915 }
2916
2917 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
2918 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
2919 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
2920
2921 #undef COMPARE
2922
2923 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
2924 {
2925 return soft_f16_compare(a, b, false, s);
2926 }
2927
2928 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
2929 {
2930 return soft_f16_compare(a, b, true, s);
2931 }
2932
2933 static FloatRelation QEMU_FLATTEN
2934 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
2935 {
2936 union_float32 ua, ub;
2937
2938 ua.s = xa;
2939 ub.s = xb;
2940
2941 if (QEMU_NO_HARDFLOAT) {
2942 goto soft;
2943 }
2944
2945 float32_input_flush2(&ua.s, &ub.s, s);
2946 if (isgreaterequal(ua.h, ub.h)) {
2947 if (isgreater(ua.h, ub.h)) {
2948 return float_relation_greater;
2949 }
2950 return float_relation_equal;
2951 }
2952 if (likely(isless(ua.h, ub.h))) {
2953 return float_relation_less;
2954 }
2955 /* The only condition remaining is unordered.
2956 * Fall through to set flags.
2957 */
2958 soft:
2959 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
2960 }
2961
2962 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
2963 {
2964 return f32_compare(a, b, false, s);
2965 }
2966
2967 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
2968 {
2969 return f32_compare(a, b, true, s);
2970 }
2971
2972 static FloatRelation QEMU_FLATTEN
2973 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
2974 {
2975 union_float64 ua, ub;
2976
2977 ua.s = xa;
2978 ub.s = xb;
2979
2980 if (QEMU_NO_HARDFLOAT) {
2981 goto soft;
2982 }
2983
2984 float64_input_flush2(&ua.s, &ub.s, s);
2985 if (isgreaterequal(ua.h, ub.h)) {
2986 if (isgreater(ua.h, ub.h)) {
2987 return float_relation_greater;
2988 }
2989 return float_relation_equal;
2990 }
2991 if (likely(isless(ua.h, ub.h))) {
2992 return float_relation_less;
2993 }
2994 /* The only condition remaining is unordered.
2995 * Fall through to set flags.
2996 */
2997 soft:
2998 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
2999 }
3000
3001 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3002 {
3003 return f64_compare(a, b, false, s);
3004 }
3005
3006 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3007 {
3008 return f64_compare(a, b, true, s);
3009 }
3010
3011 /* Multiply A by 2 raised to the power N. */
3012 static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3013 {
3014 if (unlikely(is_nan(a.cls))) {
3015 return return_nan(a, s);
3016 }
3017 if (a.cls == float_class_normal) {
3018 /* The largest float type (even though not supported by FloatParts)
3019 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3020 * still allows rounding to infinity, without allowing overflow
3021 * within the int32_t that backs FloatParts.exp.
3022 */
3023 n = MIN(MAX(n, -0x10000), 0x10000);
3024 a.exp += n;
3025 }
3026 return a;
3027 }
3028
3029 float16 float16_scalbn(float16 a, int n, float_status *status)
3030 {
3031 FloatParts pa = float16_unpack_canonical(a, status);
3032 FloatParts pr = scalbn_decomposed(pa, n, status);
3033 return float16_round_pack_canonical(pr, status);
3034 }
3035
3036 float32 float32_scalbn(float32 a, int n, float_status *status)
3037 {
3038 FloatParts pa = float32_unpack_canonical(a, status);
3039 FloatParts pr = scalbn_decomposed(pa, n, status);
3040 return float32_round_pack_canonical(pr, status);
3041 }
3042
3043 float64 float64_scalbn(float64 a, int n, float_status *status)
3044 {
3045 FloatParts pa = float64_unpack_canonical(a, status);
3046 FloatParts pr = scalbn_decomposed(pa, n, status);
3047 return float64_round_pack_canonical(pr, status);
3048 }
3049
3050 /*
3051 * Square Root
3052 *
3053 * The old softfloat code did an approximation step before zeroing in
3054 * on the final result. However for simpleness we just compute the
3055 * square root by iterating down from the implicit bit to enough extra
3056 * bits to ensure we get a correctly rounded result.
3057 *
3058 * This does mean however the calculation is slower than before,
3059 * especially for 64 bit floats.
3060 */
3061
3062 static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3063 {
3064 uint64_t a_frac, r_frac, s_frac;
3065 int bit, last_bit;
3066
3067 if (is_nan(a.cls)) {
3068 return return_nan(a, s);
3069 }
3070 if (a.cls == float_class_zero) {
3071 return a; /* sqrt(+-0) = +-0 */
3072 }
3073 if (a.sign) {
3074 s->float_exception_flags |= float_flag_invalid;
3075 return parts_default_nan(s);
3076 }
3077 if (a.cls == float_class_inf) {
3078 return a; /* sqrt(+inf) = +inf */
3079 }
3080
3081 assert(a.cls == float_class_normal);
3082
3083 /* We need two overflow bits at the top. Adding room for that is a
3084 * right shift. If the exponent is odd, we can discard the low bit
3085 * by multiplying the fraction by 2; that's a left shift. Combine
3086 * those and we shift right if the exponent is even.
3087 */
3088 a_frac = a.frac;
3089 if (!(a.exp & 1)) {
3090 a_frac >>= 1;
3091 }
3092 a.exp >>= 1;
3093
3094 /* Bit-by-bit computation of sqrt. */
3095 r_frac = 0;
3096 s_frac = 0;
3097
3098 /* Iterate from implicit bit down to the 3 extra bits to compute a
3099 * properly rounded result. Remember we've inserted one more bit
3100 * at the top, so these positions are one less.
3101 */
3102 bit = DECOMPOSED_BINARY_POINT - 1;
3103 last_bit = MAX(p->frac_shift - 4, 0);
3104 do {
3105 uint64_t q = 1ULL << bit;
3106 uint64_t t_frac = s_frac + q;
3107 if (t_frac <= a_frac) {
3108 s_frac = t_frac + q;
3109 a_frac -= t_frac;
3110 r_frac += q;
3111 }
3112 a_frac <<= 1;
3113 } while (--bit >= last_bit);
3114
3115 /* Undo the right shift done above. If there is any remaining
3116 * fraction, the result is inexact. Set the sticky bit.
3117 */
3118 a.frac = (r_frac << 1) + (a_frac != 0);
3119
3120 return a;
3121 }
3122
3123 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3124 {
3125 FloatParts pa = float16_unpack_canonical(a, status);
3126 FloatParts pr = sqrt_float(pa, status, &float16_params);
3127 return float16_round_pack_canonical(pr, status);
3128 }
3129
3130 static float32 QEMU_SOFTFLOAT_ATTR
3131 soft_f32_sqrt(float32 a, float_status *status)
3132 {
3133 FloatParts pa = float32_unpack_canonical(a, status);
3134 FloatParts pr = sqrt_float(pa, status, &float32_params);
3135 return float32_round_pack_canonical(pr, status);
3136 }
3137
3138 static float64 QEMU_SOFTFLOAT_ATTR
3139 soft_f64_sqrt(float64 a, float_status *status)
3140 {
3141 FloatParts pa = float64_unpack_canonical(a, status);
3142 FloatParts pr = sqrt_float(pa, status, &float64_params);
3143 return float64_round_pack_canonical(pr, status);
3144 }
3145
3146 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3147 {
3148 union_float32 ua, ur;
3149
3150 ua.s = xa;
3151 if (unlikely(!can_use_fpu(s))) {
3152 goto soft;
3153 }
3154
3155 float32_input_flush1(&ua.s, s);
3156 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3157 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3158 fpclassify(ua.h) == FP_ZERO) ||
3159 signbit(ua.h))) {
3160 goto soft;
3161 }
3162 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3163 float32_is_neg(ua.s))) {
3164 goto soft;
3165 }
3166 ur.h = sqrtf(ua.h);
3167 return ur.s;
3168
3169 soft:
3170 return soft_f32_sqrt(ua.s, s);
3171 }
3172
3173 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3174 {
3175 union_float64 ua, ur;
3176
3177 ua.s = xa;
3178 if (unlikely(!can_use_fpu(s))) {
3179 goto soft;
3180 }
3181
3182 float64_input_flush1(&ua.s, s);
3183 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3184 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3185 fpclassify(ua.h) == FP_ZERO) ||
3186 signbit(ua.h))) {
3187 goto soft;
3188 }
3189 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3190 float64_is_neg(ua.s))) {
3191 goto soft;
3192 }
3193 ur.h = sqrt(ua.h);
3194 return ur.s;
3195
3196 soft:
3197 return soft_f64_sqrt(ua.s, s);
3198 }
3199
3200 /*----------------------------------------------------------------------------
3201 | The pattern for a default generated NaN.
3202 *----------------------------------------------------------------------------*/
3203
3204 float16 float16_default_nan(float_status *status)
3205 {
3206 FloatParts p = parts_default_nan(status);
3207 p.frac >>= float16_params.frac_shift;
3208 return float16_pack_raw(p);
3209 }
3210
3211 float32 float32_default_nan(float_status *status)
3212 {
3213 FloatParts p = parts_default_nan(status);
3214 p.frac >>= float32_params.frac_shift;
3215 return float32_pack_raw(p);
3216 }
3217
3218 float64 float64_default_nan(float_status *status)
3219 {
3220 FloatParts p = parts_default_nan(status);
3221 p.frac >>= float64_params.frac_shift;
3222 return float64_pack_raw(p);
3223 }
3224
3225 float128 float128_default_nan(float_status *status)
3226 {
3227 FloatParts p = parts_default_nan(status);
3228 float128 r;
3229
3230 /* Extrapolate from the choices made by parts_default_nan to fill
3231 * in the quad-floating format. If the low bit is set, assume we
3232 * want to set all non-snan bits.
3233 */
3234 r.low = -(p.frac & 1);
3235 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
3236 r.high |= UINT64_C(0x7FFF000000000000);
3237 r.high |= (uint64_t)p.sign << 63;
3238
3239 return r;
3240 }
3241
3242 /*----------------------------------------------------------------------------
3243 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3244 *----------------------------------------------------------------------------*/
3245
3246 float16 float16_silence_nan(float16 a, float_status *status)
3247 {
3248 FloatParts p = float16_unpack_raw(a);
3249 p.frac <<= float16_params.frac_shift;
3250 p = parts_silence_nan(p, status);
3251 p.frac >>= float16_params.frac_shift;
3252 return float16_pack_raw(p);
3253 }
3254
3255 float32 float32_silence_nan(float32 a, float_status *status)
3256 {
3257 FloatParts p = float32_unpack_raw(a);
3258 p.frac <<= float32_params.frac_shift;
3259 p = parts_silence_nan(p, status);
3260 p.frac >>= float32_params.frac_shift;
3261 return float32_pack_raw(p);
3262 }
3263
3264 float64 float64_silence_nan(float64 a, float_status *status)
3265 {
3266 FloatParts p = float64_unpack_raw(a);
3267 p.frac <<= float64_params.frac_shift;
3268 p = parts_silence_nan(p, status);
3269 p.frac >>= float64_params.frac_shift;
3270 return float64_pack_raw(p);
3271 }
3272
3273
3274 /*----------------------------------------------------------------------------
3275 | If `a' is denormal and we are in flush-to-zero mode then set the
3276 | input-denormal exception and return zero. Otherwise just return the value.
3277 *----------------------------------------------------------------------------*/
3278
3279 static bool parts_squash_denormal(FloatParts p, float_status *status)
3280 {
3281 if (p.exp == 0 && p.frac != 0) {
3282 float_raise(float_flag_input_denormal, status);
3283 return true;
3284 }
3285
3286 return false;
3287 }
3288
3289 float16 float16_squash_input_denormal(float16 a, float_status *status)
3290 {
3291 if (status->flush_inputs_to_zero) {
3292 FloatParts p = float16_unpack_raw(a);
3293 if (parts_squash_denormal(p, status)) {
3294 return float16_set_sign(float16_zero, p.sign);
3295 }
3296 }
3297 return a;
3298 }
3299
3300 float32 float32_squash_input_denormal(float32 a, float_status *status)
3301 {
3302 if (status->flush_inputs_to_zero) {
3303 FloatParts p = float32_unpack_raw(a);
3304 if (parts_squash_denormal(p, status)) {
3305 return float32_set_sign(float32_zero, p.sign);
3306 }
3307 }
3308 return a;
3309 }
3310
3311 float64 float64_squash_input_denormal(float64 a, float_status *status)
3312 {
3313 if (status->flush_inputs_to_zero) {
3314 FloatParts p = float64_unpack_raw(a);
3315 if (parts_squash_denormal(p, status)) {
3316 return float64_set_sign(float64_zero, p.sign);
3317 }
3318 }
3319 return a;
3320 }
3321
3322 /*----------------------------------------------------------------------------
3323 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3324 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3325 | input. If `zSign' is 1, the input is negated before being converted to an
3326 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3327 | is simply rounded to an integer, with the inexact exception raised if the
3328 | input cannot be represented exactly as an integer. However, if the fixed-
3329 | point input is too large, the invalid exception is raised and the largest
3330 | positive or negative integer is returned.
3331 *----------------------------------------------------------------------------*/
3332
3333 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3334 float_status *status)
3335 {
3336 int8_t roundingMode;
3337 bool roundNearestEven;
3338 int8_t roundIncrement, roundBits;
3339 int32_t z;
3340
3341 roundingMode = status->float_rounding_mode;
3342 roundNearestEven = ( roundingMode == float_round_nearest_even );
3343 switch (roundingMode) {
3344 case float_round_nearest_even:
3345 case float_round_ties_away:
3346 roundIncrement = 0x40;
3347 break;
3348 case float_round_to_zero:
3349 roundIncrement = 0;
3350 break;
3351 case float_round_up:
3352 roundIncrement = zSign ? 0 : 0x7f;
3353 break;
3354 case float_round_down:
3355 roundIncrement = zSign ? 0x7f : 0;
3356 break;
3357 case float_round_to_odd:
3358 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3359 break;
3360 default:
3361 abort();
3362 }
3363 roundBits = absZ & 0x7F;
3364 absZ = ( absZ + roundIncrement )>>7;
3365 if (!(roundBits ^ 0x40) && roundNearestEven) {
3366 absZ &= ~1;
3367 }
3368 z = absZ;
3369 if ( zSign ) z = - z;
3370 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3371 float_raise(float_flag_invalid, status);
3372 return zSign ? INT32_MIN : INT32_MAX;
3373 }
3374 if (roundBits) {
3375 status->float_exception_flags |= float_flag_inexact;
3376 }
3377 return z;
3378
3379 }
3380
3381 /*----------------------------------------------------------------------------
3382 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3383 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3384 | and returns the properly rounded 64-bit integer corresponding to the input.
3385 | If `zSign' is 1, the input is negated before being converted to an integer.
3386 | Ordinarily, the fixed-point input is simply rounded to an integer, with
3387 | the inexact exception raised if the input cannot be represented exactly as
3388 | an integer. However, if the fixed-point input is too large, the invalid
3389 | exception is raised and the largest positive or negative integer is