Merge remote-tracking branch 'remotes/alistair/tags/pull-register-20200927' into...
[qemu.git] / fpu / softfloat.c
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43
44 ===============================================================================
45 */
46
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98
99 /*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
136 } \
137 }
138
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184
185 /*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205
206 /*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
217
218 /*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234
235 static inline bool can_use_fpu(const float_status *s)
236 {
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242 }
243
244 /*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
260
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
265
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
273
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287 }
288
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297 }
298
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311 }
312
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324 }
325
326 static inline bool f32_is_inf(union_float32 a)
327 {
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332 }
333
334 static inline bool f64_is_inf(union_float64 a)
335 {
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340 }
341
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
346 {
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 s->float_exception_flags |= float_flag_overflow;
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371 }
372
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
377 {
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 s->float_exception_flags |= float_flag_overflow;
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402 }
403
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410 return float32_val(a) & 0x007FFFFF;
411 }
412
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416
417 static inline int extractFloat32Exp(float32 a)
418 {
419 return (float32_val(a) >> 23) & 0xFF;
420 }
421
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425
426 static inline bool extractFloat32Sign(float32 a)
427 {
428 return float32_val(a) >> 31;
429 }
430
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443
444 static inline int extractFloat64Exp(float64 a)
445 {
446 return (float64_val(a) >> 52) & 0x7FF;
447 }
448
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452
453 static inline bool extractFloat64Sign(float64 a)
454 {
455 return float64_val(a) >> 63;
456 }
457
458 /*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463 typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
470 } FloatClass;
471
472 /* Simple helpers for checking if, or what kind of, NaN we have */
473 static inline __attribute__((unused)) bool is_nan(FloatClass c)
474 {
475 return unlikely(c >= float_class_qnan);
476 }
477
478 static inline __attribute__((unused)) bool is_snan(FloatClass c)
479 {
480 return c == float_class_snan;
481 }
482
483 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
484 {
485 return c == float_class_qnan;
486 }
487
488 /*
489 * Structure holding all of the decomposed parts of a float. The
490 * exponent is unbiased and the fraction is normalized. All
491 * calculations are done with a 64 bit fraction and then rounded as
492 * appropriate for the final format.
493 *
494 * Thanks to the packed FloatClass a decent compiler should be able to
495 * fit the whole structure into registers and avoid using the stack
496 * for parameter passing.
497 */
498
499 typedef struct {
500 uint64_t frac;
501 int32_t exp;
502 FloatClass cls;
503 bool sign;
504 } FloatParts;
505
506 #define DECOMPOSED_BINARY_POINT (64 - 2)
507 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
508 #define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
509
510 /* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
517 * frac_lsb: least significant bit of fraction
518 * frac_lsbm1: the bit below the least significant bit (for rounding)
519 * round_mask/roundeven_mask: masks used for rounding
520 * The following optional modifiers are available:
521 * arm_althp: handle ARM Alternative Half Precision
522 */
523 typedef struct {
524 int exp_size;
525 int exp_bias;
526 int exp_max;
527 int frac_size;
528 int frac_shift;
529 uint64_t frac_lsb;
530 uint64_t frac_lsbm1;
531 uint64_t round_mask;
532 uint64_t roundeven_mask;
533 bool arm_althp;
534 } FloatFmt;
535
536 /* Expand fields based on the size of exponent and fraction */
537 #define FLOAT_PARAMS(E, F) \
538 .exp_size = E, \
539 .exp_bias = ((1 << E) - 1) >> 1, \
540 .exp_max = (1 << E) - 1, \
541 .frac_size = F, \
542 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
543 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
544 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
545 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
546 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
547
548 static const FloatFmt float16_params = {
549 FLOAT_PARAMS(5, 10)
550 };
551
552 static const FloatFmt float16_params_ahp = {
553 FLOAT_PARAMS(5, 10),
554 .arm_althp = true
555 };
556
557 static const FloatFmt bfloat16_params = {
558 FLOAT_PARAMS(8, 7)
559 };
560
561 static const FloatFmt float32_params = {
562 FLOAT_PARAMS(8, 23)
563 };
564
565 static const FloatFmt float64_params = {
566 FLOAT_PARAMS(11, 52)
567 };
568
569 /* Unpack a float to parts, but do not canonicalize. */
570 static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
571 {
572 const int sign_pos = fmt.frac_size + fmt.exp_size;
573
574 return (FloatParts) {
575 .cls = float_class_unclassified,
576 .sign = extract64(raw, sign_pos, 1),
577 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
578 .frac = extract64(raw, 0, fmt.frac_size),
579 };
580 }
581
582 static inline FloatParts float16_unpack_raw(float16 f)
583 {
584 return unpack_raw(float16_params, f);
585 }
586
587 static inline FloatParts bfloat16_unpack_raw(bfloat16 f)
588 {
589 return unpack_raw(bfloat16_params, f);
590 }
591
592 static inline FloatParts float32_unpack_raw(float32 f)
593 {
594 return unpack_raw(float32_params, f);
595 }
596
597 static inline FloatParts float64_unpack_raw(float64 f)
598 {
599 return unpack_raw(float64_params, f);
600 }
601
602 /* Pack a float from parts, but do not canonicalize. */
603 static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
604 {
605 const int sign_pos = fmt.frac_size + fmt.exp_size;
606 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
607 return deposit64(ret, sign_pos, 1, p.sign);
608 }
609
610 static inline float16 float16_pack_raw(FloatParts p)
611 {
612 return make_float16(pack_raw(float16_params, p));
613 }
614
615 static inline bfloat16 bfloat16_pack_raw(FloatParts p)
616 {
617 return pack_raw(bfloat16_params, p);
618 }
619
620 static inline float32 float32_pack_raw(FloatParts p)
621 {
622 return make_float32(pack_raw(float32_params, p));
623 }
624
625 static inline float64 float64_pack_raw(FloatParts p)
626 {
627 return make_float64(pack_raw(float64_params, p));
628 }
629
630 /*----------------------------------------------------------------------------
631 | Functions and definitions to determine: (1) whether tininess for underflow
632 | is detected before or after rounding by default, (2) what (if anything)
633 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
634 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
635 | are propagated from function inputs to output. These details are target-
636 | specific.
637 *----------------------------------------------------------------------------*/
638 #include "softfloat-specialize.c.inc"
639
640 /* Canonicalize EXP and FRAC, setting CLS. */
641 static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
642 float_status *status)
643 {
644 if (part.exp == parm->exp_max && !parm->arm_althp) {
645 if (part.frac == 0) {
646 part.cls = float_class_inf;
647 } else {
648 part.frac <<= parm->frac_shift;
649 part.cls = (parts_is_snan_frac(part.frac, status)
650 ? float_class_snan : float_class_qnan);
651 }
652 } else if (part.exp == 0) {
653 if (likely(part.frac == 0)) {
654 part.cls = float_class_zero;
655 } else if (status->flush_inputs_to_zero) {
656 float_raise(float_flag_input_denormal, status);
657 part.cls = float_class_zero;
658 part.frac = 0;
659 } else {
660 int shift = clz64(part.frac) - 1;
661 part.cls = float_class_normal;
662 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
663 part.frac <<= shift;
664 }
665 } else {
666 part.cls = float_class_normal;
667 part.exp -= parm->exp_bias;
668 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
669 }
670 return part;
671 }
672
673 /* Round and uncanonicalize a floating-point number by parts. There
674 * are FRAC_SHIFT bits that may require rounding at the bottom of the
675 * fraction; these bits will be removed. The exponent will be biased
676 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
677 */
678
679 static FloatParts round_canonical(FloatParts p, float_status *s,
680 const FloatFmt *parm)
681 {
682 const uint64_t frac_lsb = parm->frac_lsb;
683 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
684 const uint64_t round_mask = parm->round_mask;
685 const uint64_t roundeven_mask = parm->roundeven_mask;
686 const int exp_max = parm->exp_max;
687 const int frac_shift = parm->frac_shift;
688 uint64_t frac, inc;
689 int exp, flags = 0;
690 bool overflow_norm;
691
692 frac = p.frac;
693 exp = p.exp;
694
695 switch (p.cls) {
696 case float_class_normal:
697 switch (s->float_rounding_mode) {
698 case float_round_nearest_even:
699 overflow_norm = false;
700 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
701 break;
702 case float_round_ties_away:
703 overflow_norm = false;
704 inc = frac_lsbm1;
705 break;
706 case float_round_to_zero:
707 overflow_norm = true;
708 inc = 0;
709 break;
710 case float_round_up:
711 inc = p.sign ? 0 : round_mask;
712 overflow_norm = p.sign;
713 break;
714 case float_round_down:
715 inc = p.sign ? round_mask : 0;
716 overflow_norm = !p.sign;
717 break;
718 case float_round_to_odd:
719 overflow_norm = true;
720 inc = frac & frac_lsb ? 0 : round_mask;
721 break;
722 default:
723 g_assert_not_reached();
724 }
725
726 exp += parm->exp_bias;
727 if (likely(exp > 0)) {
728 if (frac & round_mask) {
729 flags |= float_flag_inexact;
730 frac += inc;
731 if (frac & DECOMPOSED_OVERFLOW_BIT) {
732 frac >>= 1;
733 exp++;
734 }
735 }
736 frac >>= frac_shift;
737
738 if (parm->arm_althp) {
739 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
740 if (unlikely(exp > exp_max)) {
741 /* Overflow. Return the maximum normal. */
742 flags = float_flag_invalid;
743 exp = exp_max;
744 frac = -1;
745 }
746 } else if (unlikely(exp >= exp_max)) {
747 flags |= float_flag_overflow | float_flag_inexact;
748 if (overflow_norm) {
749 exp = exp_max - 1;
750 frac = -1;
751 } else {
752 p.cls = float_class_inf;
753 goto do_inf;
754 }
755 }
756 } else if (s->flush_to_zero) {
757 flags |= float_flag_output_denormal;
758 p.cls = float_class_zero;
759 goto do_zero;
760 } else {
761 bool is_tiny = s->tininess_before_rounding
762 || (exp < 0)
763 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
764
765 shift64RightJamming(frac, 1 - exp, &frac);
766 if (frac & round_mask) {
767 /* Need to recompute round-to-even. */
768 switch (s->float_rounding_mode) {
769 case float_round_nearest_even:
770 inc = ((frac & roundeven_mask) != frac_lsbm1
771 ? frac_lsbm1 : 0);
772 break;
773 case float_round_to_odd:
774 inc = frac & frac_lsb ? 0 : round_mask;
775 break;
776 default:
777 break;
778 }
779 flags |= float_flag_inexact;
780 frac += inc;
781 }
782
783 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
784 frac >>= frac_shift;
785
786 if (is_tiny && (flags & float_flag_inexact)) {
787 flags |= float_flag_underflow;
788 }
789 if (exp == 0 && frac == 0) {
790 p.cls = float_class_zero;
791 }
792 }
793 break;
794
795 case float_class_zero:
796 do_zero:
797 exp = 0;
798 frac = 0;
799 break;
800
801 case float_class_inf:
802 do_inf:
803 assert(!parm->arm_althp);
804 exp = exp_max;
805 frac = 0;
806 break;
807
808 case float_class_qnan:
809 case float_class_snan:
810 assert(!parm->arm_althp);
811 exp = exp_max;
812 frac >>= parm->frac_shift;
813 break;
814
815 default:
816 g_assert_not_reached();
817 }
818
819 float_raise(flags, s);
820 p.exp = exp;
821 p.frac = frac;
822 return p;
823 }
824
825 /* Explicit FloatFmt version */
826 static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
827 const FloatFmt *params)
828 {
829 return sf_canonicalize(float16_unpack_raw(f), params, s);
830 }
831
832 static FloatParts float16_unpack_canonical(float16 f, float_status *s)
833 {
834 return float16a_unpack_canonical(f, s, &float16_params);
835 }
836
837 static FloatParts bfloat16_unpack_canonical(bfloat16 f, float_status *s)
838 {
839 return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
840 }
841
842 static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
843 const FloatFmt *params)
844 {
845 return float16_pack_raw(round_canonical(p, s, params));
846 }
847
848 static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
849 {
850 return float16a_round_pack_canonical(p, s, &float16_params);
851 }
852
853 static bfloat16 bfloat16_round_pack_canonical(FloatParts p, float_status *s)
854 {
855 return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
856 }
857
858 static FloatParts float32_unpack_canonical(float32 f, float_status *s)
859 {
860 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
861 }
862
863 static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
864 {
865 return float32_pack_raw(round_canonical(p, s, &float32_params));
866 }
867
868 static FloatParts float64_unpack_canonical(float64 f, float_status *s)
869 {
870 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
871 }
872
873 static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
874 {
875 return float64_pack_raw(round_canonical(p, s, &float64_params));
876 }
877
878 static FloatParts return_nan(FloatParts a, float_status *s)
879 {
880 switch (a.cls) {
881 case float_class_snan:
882 s->float_exception_flags |= float_flag_invalid;
883 a = parts_silence_nan(a, s);
884 /* fall through */
885 case float_class_qnan:
886 if (s->default_nan_mode) {
887 return parts_default_nan(s);
888 }
889 break;
890
891 default:
892 g_assert_not_reached();
893 }
894 return a;
895 }
896
897 static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
898 {
899 if (is_snan(a.cls) || is_snan(b.cls)) {
900 s->float_exception_flags |= float_flag_invalid;
901 }
902
903 if (s->default_nan_mode) {
904 return parts_default_nan(s);
905 } else {
906 if (pickNaN(a.cls, b.cls,
907 a.frac > b.frac ||
908 (a.frac == b.frac && a.sign < b.sign), s)) {
909 a = b;
910 }
911 if (is_snan(a.cls)) {
912 return parts_silence_nan(a, s);
913 }
914 }
915 return a;
916 }
917
918 static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
919 bool inf_zero, float_status *s)
920 {
921 int which;
922
923 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
924 s->float_exception_flags |= float_flag_invalid;
925 }
926
927 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
928
929 if (s->default_nan_mode) {
930 /* Note that this check is after pickNaNMulAdd so that function
931 * has an opportunity to set the Invalid flag.
932 */
933 which = 3;
934 }
935
936 switch (which) {
937 case 0:
938 break;
939 case 1:
940 a = b;
941 break;
942 case 2:
943 a = c;
944 break;
945 case 3:
946 return parts_default_nan(s);
947 default:
948 g_assert_not_reached();
949 }
950
951 if (is_snan(a.cls)) {
952 return parts_silence_nan(a, s);
953 }
954 return a;
955 }
956
957 /*
958 * Returns the result of adding or subtracting the values of the
959 * floating-point values `a' and `b'. The operation is performed
960 * according to the IEC/IEEE Standard for Binary Floating-Point
961 * Arithmetic.
962 */
963
964 static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
965 float_status *s)
966 {
967 bool a_sign = a.sign;
968 bool b_sign = b.sign ^ subtract;
969
970 if (a_sign != b_sign) {
971 /* Subtraction */
972
973 if (a.cls == float_class_normal && b.cls == float_class_normal) {
974 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
975 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
976 a.frac = a.frac - b.frac;
977 } else {
978 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
979 a.frac = b.frac - a.frac;
980 a.exp = b.exp;
981 a_sign ^= 1;
982 }
983
984 if (a.frac == 0) {
985 a.cls = float_class_zero;
986 a.sign = s->float_rounding_mode == float_round_down;
987 } else {
988 int shift = clz64(a.frac) - 1;
989 a.frac = a.frac << shift;
990 a.exp = a.exp - shift;
991 a.sign = a_sign;
992 }
993 return a;
994 }
995 if (is_nan(a.cls) || is_nan(b.cls)) {
996 return pick_nan(a, b, s);
997 }
998 if (a.cls == float_class_inf) {
999 if (b.cls == float_class_inf) {
1000 float_raise(float_flag_invalid, s);
1001 return parts_default_nan(s);
1002 }
1003 return a;
1004 }
1005 if (a.cls == float_class_zero && b.cls == float_class_zero) {
1006 a.sign = s->float_rounding_mode == float_round_down;
1007 return a;
1008 }
1009 if (a.cls == float_class_zero || b.cls == float_class_inf) {
1010 b.sign = a_sign ^ 1;
1011 return b;
1012 }
1013 if (b.cls == float_class_zero) {
1014 return a;
1015 }
1016 } else {
1017 /* Addition */
1018 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1019 if (a.exp > b.exp) {
1020 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1021 } else if (a.exp < b.exp) {
1022 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1023 a.exp = b.exp;
1024 }
1025 a.frac += b.frac;
1026 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1027 shift64RightJamming(a.frac, 1, &a.frac);
1028 a.exp += 1;
1029 }
1030 return a;
1031 }
1032 if (is_nan(a.cls) || is_nan(b.cls)) {
1033 return pick_nan(a, b, s);
1034 }
1035 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1036 return a;
1037 }
1038 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1039 b.sign = b_sign;
1040 return b;
1041 }
1042 }
1043 g_assert_not_reached();
1044 }
1045
1046 /*
1047 * Returns the result of adding or subtracting the floating-point
1048 * values `a' and `b'. The operation is performed according to the
1049 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1050 */
1051
1052 float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
1053 {
1054 FloatParts pa = float16_unpack_canonical(a, status);
1055 FloatParts pb = float16_unpack_canonical(b, status);
1056 FloatParts pr = addsub_floats(pa, pb, false, status);
1057
1058 return float16_round_pack_canonical(pr, status);
1059 }
1060
1061 float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1062 {
1063 FloatParts pa = float16_unpack_canonical(a, status);
1064 FloatParts pb = float16_unpack_canonical(b, status);
1065 FloatParts pr = addsub_floats(pa, pb, true, status);
1066
1067 return float16_round_pack_canonical(pr, status);
1068 }
1069
1070 static float32 QEMU_SOFTFLOAT_ATTR
1071 soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
1072 {
1073 FloatParts pa = float32_unpack_canonical(a, status);
1074 FloatParts pb = float32_unpack_canonical(b, status);
1075 FloatParts pr = addsub_floats(pa, pb, subtract, status);
1076
1077 return float32_round_pack_canonical(pr, status);
1078 }
1079
1080 static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1081 {
1082 return soft_f32_addsub(a, b, false, status);
1083 }
1084
1085 static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1086 {
1087 return soft_f32_addsub(a, b, true, status);
1088 }
1089
1090 static float64 QEMU_SOFTFLOAT_ATTR
1091 soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
1092 {
1093 FloatParts pa = float64_unpack_canonical(a, status);
1094 FloatParts pb = float64_unpack_canonical(b, status);
1095 FloatParts pr = addsub_floats(pa, pb, subtract, status);
1096
1097 return float64_round_pack_canonical(pr, status);
1098 }
1099
1100 static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
1101 {
1102 return soft_f64_addsub(a, b, false, status);
1103 }
1104
1105 static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1106 {
1107 return soft_f64_addsub(a, b, true, status);
1108 }
1109
1110 static float hard_f32_add(float a, float b)
1111 {
1112 return a + b;
1113 }
1114
1115 static float hard_f32_sub(float a, float b)
1116 {
1117 return a - b;
1118 }
1119
1120 static double hard_f64_add(double a, double b)
1121 {
1122 return a + b;
1123 }
1124
1125 static double hard_f64_sub(double a, double b)
1126 {
1127 return a - b;
1128 }
1129
1130 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1131 {
1132 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1133 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1134 }
1135 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1136 }
1137
1138 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1139 {
1140 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1141 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1142 } else {
1143 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1144 }
1145 }
1146
1147 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1148 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1149 {
1150 return float32_gen2(a, b, s, hard, soft,
1151 f32_is_zon2, f32_addsubmul_post);
1152 }
1153
1154 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1155 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1156 {
1157 return float64_gen2(a, b, s, hard, soft,
1158 f64_is_zon2, f64_addsubmul_post);
1159 }
1160
1161 float32 QEMU_FLATTEN
1162 float32_add(float32 a, float32 b, float_status *s)
1163 {
1164 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1165 }
1166
1167 float32 QEMU_FLATTEN
1168 float32_sub(float32 a, float32 b, float_status *s)
1169 {
1170 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1171 }
1172
1173 float64 QEMU_FLATTEN
1174 float64_add(float64 a, float64 b, float_status *s)
1175 {
1176 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1177 }
1178
1179 float64 QEMU_FLATTEN
1180 float64_sub(float64 a, float64 b, float_status *s)
1181 {
1182 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1183 }
1184
1185 /*
1186 * Returns the result of adding or subtracting the bfloat16
1187 * values `a' and `b'.
1188 */
1189 bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1190 {
1191 FloatParts pa = bfloat16_unpack_canonical(a, status);
1192 FloatParts pb = bfloat16_unpack_canonical(b, status);
1193 FloatParts pr = addsub_floats(pa, pb, false, status);
1194
1195 return bfloat16_round_pack_canonical(pr, status);
1196 }
1197
1198 bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1199 {
1200 FloatParts pa = bfloat16_unpack_canonical(a, status);
1201 FloatParts pb = bfloat16_unpack_canonical(b, status);
1202 FloatParts pr = addsub_floats(pa, pb, true, status);
1203
1204 return bfloat16_round_pack_canonical(pr, status);
1205 }
1206
1207 /*
1208 * Returns the result of multiplying the floating-point values `a' and
1209 * `b'. The operation is performed according to the IEC/IEEE Standard
1210 * for Binary Floating-Point Arithmetic.
1211 */
1212
1213 static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1214 {
1215 bool sign = a.sign ^ b.sign;
1216
1217 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1218 uint64_t hi, lo;
1219 int exp = a.exp + b.exp;
1220
1221 mul64To128(a.frac, b.frac, &hi, &lo);
1222 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1223 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1224 shift64RightJamming(lo, 1, &lo);
1225 exp += 1;
1226 }
1227
1228 /* Re-use a */
1229 a.exp = exp;
1230 a.sign = sign;
1231 a.frac = lo;
1232 return a;
1233 }
1234 /* handle all the NaN cases */
1235 if (is_nan(a.cls) || is_nan(b.cls)) {
1236 return pick_nan(a, b, s);
1237 }
1238 /* Inf * Zero == NaN */
1239 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1240 (a.cls == float_class_zero && b.cls == float_class_inf)) {
1241 s->float_exception_flags |= float_flag_invalid;
1242 return parts_default_nan(s);
1243 }
1244 /* Multiply by 0 or Inf */
1245 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1246 a.sign = sign;
1247 return a;
1248 }
1249 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1250 b.sign = sign;
1251 return b;
1252 }
1253 g_assert_not_reached();
1254 }
1255
1256 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1257 {
1258 FloatParts pa = float16_unpack_canonical(a, status);
1259 FloatParts pb = float16_unpack_canonical(b, status);
1260 FloatParts pr = mul_floats(pa, pb, status);
1261
1262 return float16_round_pack_canonical(pr, status);
1263 }
1264
1265 static float32 QEMU_SOFTFLOAT_ATTR
1266 soft_f32_mul(float32 a, float32 b, float_status *status)
1267 {
1268 FloatParts pa = float32_unpack_canonical(a, status);
1269 FloatParts pb = float32_unpack_canonical(b, status);
1270 FloatParts pr = mul_floats(pa, pb, status);
1271
1272 return float32_round_pack_canonical(pr, status);
1273 }
1274
1275 static float64 QEMU_SOFTFLOAT_ATTR
1276 soft_f64_mul(float64 a, float64 b, float_status *status)
1277 {
1278 FloatParts pa = float64_unpack_canonical(a, status);
1279 FloatParts pb = float64_unpack_canonical(b, status);
1280 FloatParts pr = mul_floats(pa, pb, status);
1281
1282 return float64_round_pack_canonical(pr, status);
1283 }
1284
1285 static float hard_f32_mul(float a, float b)
1286 {
1287 return a * b;
1288 }
1289
1290 static double hard_f64_mul(double a, double b)
1291 {
1292 return a * b;
1293 }
1294
1295 float32 QEMU_FLATTEN
1296 float32_mul(float32 a, float32 b, float_status *s)
1297 {
1298 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1299 f32_is_zon2, f32_addsubmul_post);
1300 }
1301
1302 float64 QEMU_FLATTEN
1303 float64_mul(float64 a, float64 b, float_status *s)
1304 {
1305 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1306 f64_is_zon2, f64_addsubmul_post);
1307 }
1308
1309 /*
1310 * Returns the result of multiplying the bfloat16
1311 * values `a' and `b'.
1312 */
1313
1314 bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1315 {
1316 FloatParts pa = bfloat16_unpack_canonical(a, status);
1317 FloatParts pb = bfloat16_unpack_canonical(b, status);
1318 FloatParts pr = mul_floats(pa, pb, status);
1319
1320 return bfloat16_round_pack_canonical(pr, status);
1321 }
1322
1323 /*
1324 * Returns the result of multiplying the floating-point values `a' and
1325 * `b' then adding 'c', with no intermediate rounding step after the
1326 * multiplication. The operation is performed according to the
1327 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1328 * The flags argument allows the caller to select negation of the
1329 * addend, the intermediate product, or the final result. (The
1330 * difference between this and having the caller do a separate
1331 * negation is that negating externally will flip the sign bit on
1332 * NaNs.)
1333 */
1334
1335 static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1336 int flags, float_status *s)
1337 {
1338 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1339 ((1 << float_class_inf) | (1 << float_class_zero));
1340 bool p_sign;
1341 bool sign_flip = flags & float_muladd_negate_result;
1342 FloatClass p_class;
1343 uint64_t hi, lo;
1344 int p_exp;
1345
1346 /* It is implementation-defined whether the cases of (0,inf,qnan)
1347 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1348 * they return if they do), so we have to hand this information
1349 * off to the target-specific pick-a-NaN routine.
1350 */
1351 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1352 return pick_nan_muladd(a, b, c, inf_zero, s);
1353 }
1354
1355 if (inf_zero) {
1356 s->float_exception_flags |= float_flag_invalid;
1357 return parts_default_nan(s);
1358 }
1359
1360 if (flags & float_muladd_negate_c) {
1361 c.sign ^= 1;
1362 }
1363
1364 p_sign = a.sign ^ b.sign;
1365
1366 if (flags & float_muladd_negate_product) {
1367 p_sign ^= 1;
1368 }
1369
1370 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1371 p_class = float_class_inf;
1372 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1373 p_class = float_class_zero;
1374 } else {
1375 p_class = float_class_normal;
1376 }
1377
1378 if (c.cls == float_class_inf) {
1379 if (p_class == float_class_inf && p_sign != c.sign) {
1380 s->float_exception_flags |= float_flag_invalid;
1381 return parts_default_nan(s);
1382 } else {
1383 a.cls = float_class_inf;
1384 a.sign = c.sign ^ sign_flip;
1385 return a;
1386 }
1387 }
1388
1389 if (p_class == float_class_inf) {
1390 a.cls = float_class_inf;
1391 a.sign = p_sign ^ sign_flip;
1392 return a;
1393 }
1394
1395 if (p_class == float_class_zero) {
1396 if (c.cls == float_class_zero) {
1397 if (p_sign != c.sign) {
1398 p_sign = s->float_rounding_mode == float_round_down;
1399 }
1400 c.sign = p_sign;
1401 } else if (flags & float_muladd_halve_result) {
1402 c.exp -= 1;
1403 }
1404 c.sign ^= sign_flip;
1405 return c;
1406 }
1407
1408 /* a & b should be normals now... */
1409 assert(a.cls == float_class_normal &&
1410 b.cls == float_class_normal);
1411
1412 p_exp = a.exp + b.exp;
1413
1414 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1415 * result.
1416 */
1417 mul64To128(a.frac, b.frac, &hi, &lo);
1418 /* binary point now at bit 124 */
1419
1420 /* check for overflow */
1421 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1422 shift128RightJamming(hi, lo, 1, &hi, &lo);
1423 p_exp += 1;
1424 }
1425
1426 /* + add/sub */
1427 if (c.cls == float_class_zero) {
1428 /* move binary point back to 62 */
1429 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1430 } else {
1431 int exp_diff = p_exp - c.exp;
1432 if (p_sign == c.sign) {
1433 /* Addition */
1434 if (exp_diff <= 0) {
1435 shift128RightJamming(hi, lo,
1436 DECOMPOSED_BINARY_POINT - exp_diff,
1437 &hi, &lo);
1438 lo += c.frac;
1439 p_exp = c.exp;
1440 } else {
1441 uint64_t c_hi, c_lo;
1442 /* shift c to the same binary point as the product (124) */
1443 c_hi = c.frac >> 2;
1444 c_lo = 0;
1445 shift128RightJamming(c_hi, c_lo,
1446 exp_diff,
1447 &c_hi, &c_lo);
1448 add128(hi, lo, c_hi, c_lo, &hi, &lo);
1449 /* move binary point back to 62 */
1450 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1451 }
1452
1453 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1454 shift64RightJamming(lo, 1, &lo);
1455 p_exp += 1;
1456 }
1457
1458 } else {
1459 /* Subtraction */
1460 uint64_t c_hi, c_lo;
1461 /* make C binary point match product at bit 124 */
1462 c_hi = c.frac >> 2;
1463 c_lo = 0;
1464
1465 if (exp_diff <= 0) {
1466 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1467 if (exp_diff == 0
1468 &&
1469 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1470 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1471 } else {
1472 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1473 p_sign ^= 1;
1474 p_exp = c.exp;
1475 }
1476 } else {
1477 shift128RightJamming(c_hi, c_lo,
1478 exp_diff,
1479 &c_hi, &c_lo);
1480 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1481 }
1482
1483 if (hi == 0 && lo == 0) {
1484 a.cls = float_class_zero;
1485 a.sign = s->float_rounding_mode == float_round_down;
1486 a.sign ^= sign_flip;
1487 return a;
1488 } else {
1489 int shift;
1490 if (hi != 0) {
1491 shift = clz64(hi);
1492 } else {
1493 shift = clz64(lo) + 64;
1494 }
1495 /* Normalizing to a binary point of 124 is the
1496 correct adjust for the exponent. However since we're
1497 shifting, we might as well put the binary point back
1498 at 62 where we really want it. Therefore shift as
1499 if we're leaving 1 bit at the top of the word, but
1500 adjust the exponent as if we're leaving 3 bits. */
1501 shift -= 1;
1502 if (shift >= 64) {
1503 lo = lo << (shift - 64);
1504 } else {
1505 hi = (hi << shift) | (lo >> (64 - shift));
1506 lo = hi | ((lo << shift) != 0);
1507 }
1508 p_exp -= shift - 2;
1509 }
1510 }
1511 }
1512
1513 if (flags & float_muladd_halve_result) {
1514 p_exp -= 1;
1515 }
1516
1517 /* finally prepare our result */
1518 a.cls = float_class_normal;
1519 a.sign = p_sign ^ sign_flip;
1520 a.exp = p_exp;
1521 a.frac = lo;
1522
1523 return a;
1524 }
1525
1526 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1527 int flags, float_status *status)
1528 {
1529 FloatParts pa = float16_unpack_canonical(a, status);
1530 FloatParts pb = float16_unpack_canonical(b, status);
1531 FloatParts pc = float16_unpack_canonical(c, status);
1532 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1533
1534 return float16_round_pack_canonical(pr, status);
1535 }
1536
1537 static float32 QEMU_SOFTFLOAT_ATTR
1538 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1539 float_status *status)
1540 {
1541 FloatParts pa = float32_unpack_canonical(a, status);
1542 FloatParts pb = float32_unpack_canonical(b, status);
1543 FloatParts pc = float32_unpack_canonical(c, status);
1544 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1545
1546 return float32_round_pack_canonical(pr, status);
1547 }
1548
1549 static float64 QEMU_SOFTFLOAT_ATTR
1550 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1551 float_status *status)
1552 {
1553 FloatParts pa = float64_unpack_canonical(a, status);
1554 FloatParts pb = float64_unpack_canonical(b, status);
1555 FloatParts pc = float64_unpack_canonical(c, status);
1556 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1557
1558 return float64_round_pack_canonical(pr, status);
1559 }
1560
1561 static bool force_soft_fma;
1562
1563 float32 QEMU_FLATTEN
1564 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1565 {
1566 union_float32 ua, ub, uc, ur;
1567
1568 ua.s = xa;
1569 ub.s = xb;
1570 uc.s = xc;
1571
1572 if (unlikely(!can_use_fpu(s))) {
1573 goto soft;
1574 }
1575 if (unlikely(flags & float_muladd_halve_result)) {
1576 goto soft;
1577 }
1578
1579 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1580 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1581 goto soft;
1582 }
1583
1584 if (unlikely(force_soft_fma)) {
1585 goto soft;
1586 }
1587
1588 /*
1589 * When (a || b) == 0, there's no need to check for under/over flow,
1590 * since we know the addend is (normal || 0) and the product is 0.
1591 */
1592 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1593 union_float32 up;
1594 bool prod_sign;
1595
1596 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1597 prod_sign ^= !!(flags & float_muladd_negate_product);
1598 up.s = float32_set_sign(float32_zero, prod_sign);
1599
1600 if (flags & float_muladd_negate_c) {
1601 uc.h = -uc.h;
1602 }
1603 ur.h = up.h + uc.h;
1604 } else {
1605 union_float32 ua_orig = ua;
1606 union_float32 uc_orig = uc;
1607
1608 if (flags & float_muladd_negate_product) {
1609 ua.h = -ua.h;
1610 }
1611 if (flags & float_muladd_negate_c) {
1612 uc.h = -uc.h;
1613 }
1614
1615 ur.h = fmaf(ua.h, ub.h, uc.h);
1616
1617 if (unlikely(f32_is_inf(ur))) {
1618 s->float_exception_flags |= float_flag_overflow;
1619 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1620 ua = ua_orig;
1621 uc = uc_orig;
1622 goto soft;
1623 }
1624 }
1625 if (flags & float_muladd_negate_result) {
1626 return float32_chs(ur.s);
1627 }
1628 return ur.s;
1629
1630 soft:
1631 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1632 }
1633
1634 float64 QEMU_FLATTEN
1635 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1636 {
1637 union_float64 ua, ub, uc, ur;
1638
1639 ua.s = xa;
1640 ub.s = xb;
1641 uc.s = xc;
1642
1643 if (unlikely(!can_use_fpu(s))) {
1644 goto soft;
1645 }
1646 if (unlikely(flags & float_muladd_halve_result)) {
1647 goto soft;
1648 }
1649
1650 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1651 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1652 goto soft;
1653 }
1654
1655 if (unlikely(force_soft_fma)) {
1656 goto soft;
1657 }
1658
1659 /*
1660 * When (a || b) == 0, there's no need to check for under/over flow,
1661 * since we know the addend is (normal || 0) and the product is 0.
1662 */
1663 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1664 union_float64 up;
1665 bool prod_sign;
1666
1667 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1668 prod_sign ^= !!(flags & float_muladd_negate_product);
1669 up.s = float64_set_sign(float64_zero, prod_sign);
1670
1671 if (flags & float_muladd_negate_c) {
1672 uc.h = -uc.h;
1673 }
1674 ur.h = up.h + uc.h;
1675 } else {
1676 union_float64 ua_orig = ua;
1677 union_float64 uc_orig = uc;
1678
1679 if (flags & float_muladd_negate_product) {
1680 ua.h = -ua.h;
1681 }
1682 if (flags & float_muladd_negate_c) {
1683 uc.h = -uc.h;
1684 }
1685
1686 ur.h = fma(ua.h, ub.h, uc.h);
1687
1688 if (unlikely(f64_is_inf(ur))) {
1689 s->float_exception_flags |= float_flag_overflow;
1690 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1691 ua = ua_orig;
1692 uc = uc_orig;
1693 goto soft;
1694 }
1695 }
1696 if (flags & float_muladd_negate_result) {
1697 return float64_chs(ur.s);
1698 }
1699 return ur.s;
1700
1701 soft:
1702 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1703 }
1704
1705 /*
1706 * Returns the result of multiplying the bfloat16 values `a'
1707 * and `b' then adding 'c', with no intermediate rounding step after the
1708 * multiplication.
1709 */
1710
1711 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1712 int flags, float_status *status)
1713 {
1714 FloatParts pa = bfloat16_unpack_canonical(a, status);
1715 FloatParts pb = bfloat16_unpack_canonical(b, status);
1716 FloatParts pc = bfloat16_unpack_canonical(c, status);
1717 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1718
1719 return bfloat16_round_pack_canonical(pr, status);
1720 }
1721
1722 /*
1723 * Returns the result of dividing the floating-point value `a' by the
1724 * corresponding value `b'. The operation is performed according to
1725 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1726 */
1727
1728 static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1729 {
1730 bool sign = a.sign ^ b.sign;
1731
1732 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1733 uint64_t n0, n1, q, r;
1734 int exp = a.exp - b.exp;
1735
1736 /*
1737 * We want a 2*N / N-bit division to produce exactly an N-bit
1738 * result, so that we do not lose any precision and so that we
1739 * do not have to renormalize afterward. If A.frac < B.frac,
1740 * then division would produce an (N-1)-bit result; shift A left
1741 * by one to produce the an N-bit result, and decrement the
1742 * exponent to match.
1743 *
1744 * The udiv_qrnnd algorithm that we're using requires normalization,
1745 * i.e. the msb of the denominator must be set. Since we know that
1746 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1747 * by one (more), and the remainder must be shifted right by one.
1748 */
1749 if (a.frac < b.frac) {
1750 exp -= 1;
1751 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
1752 } else {
1753 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
1754 }
1755 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1756
1757 /*
1758 * Set lsb if there is a remainder, to set inexact.
1759 * As mentioned above, to find the actual value of the remainder we
1760 * would need to shift right, but (1) we are only concerned about
1761 * non-zero-ness, and (2) the remainder will always be even because
1762 * both inputs to the division primitive are even.
1763 */
1764 a.frac = q | (r != 0);
1765 a.sign = sign;
1766 a.exp = exp;
1767 return a;
1768 }
1769 /* handle all the NaN cases */
1770 if (is_nan(a.cls) || is_nan(b.cls)) {
1771 return pick_nan(a, b, s);
1772 }
1773 /* 0/0 or Inf/Inf */
1774 if (a.cls == b.cls
1775 &&
1776 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1777 s->float_exception_flags |= float_flag_invalid;
1778 return parts_default_nan(s);
1779 }
1780 /* Inf / x or 0 / x */
1781 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1782 a.sign = sign;
1783 return a;
1784 }
1785 /* Div 0 => Inf */
1786 if (b.cls == float_class_zero) {
1787 s->float_exception_flags |= float_flag_divbyzero;
1788 a.cls = float_class_inf;
1789 a.sign = sign;
1790 return a;
1791 }
1792 /* Div by Inf */
1793 if (b.cls == float_class_inf) {
1794 a.cls = float_class_zero;
1795 a.sign = sign;
1796 return a;
1797 }
1798 g_assert_not_reached();
1799 }
1800
1801 float16 float16_div(float16 a, float16 b, float_status *status)
1802 {
1803 FloatParts pa = float16_unpack_canonical(a, status);
1804 FloatParts pb = float16_unpack_canonical(b, status);
1805 FloatParts pr = div_floats(pa, pb, status);
1806
1807 return float16_round_pack_canonical(pr, status);
1808 }
1809
1810 static float32 QEMU_SOFTFLOAT_ATTR
1811 soft_f32_div(float32 a, float32 b, float_status *status)
1812 {
1813 FloatParts pa = float32_unpack_canonical(a, status);
1814 FloatParts pb = float32_unpack_canonical(b, status);
1815 FloatParts pr = div_floats(pa, pb, status);
1816
1817 return float32_round_pack_canonical(pr, status);
1818 }
1819
1820 static float64 QEMU_SOFTFLOAT_ATTR
1821 soft_f64_div(float64 a, float64 b, float_status *status)
1822 {
1823 FloatParts pa = float64_unpack_canonical(a, status);
1824 FloatParts pb = float64_unpack_canonical(b, status);
1825 FloatParts pr = div_floats(pa, pb, status);
1826
1827 return float64_round_pack_canonical(pr, status);
1828 }
1829
1830 static float hard_f32_div(float a, float b)
1831 {
1832 return a / b;
1833 }
1834
1835 static double hard_f64_div(double a, double b)
1836 {
1837 return a / b;
1838 }
1839
1840 static bool f32_div_pre(union_float32 a, union_float32 b)
1841 {
1842 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1843 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1844 fpclassify(b.h) == FP_NORMAL;
1845 }
1846 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1847 }
1848
1849 static bool f64_div_pre(union_float64 a, union_float64 b)
1850 {
1851 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1852 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1853 fpclassify(b.h) == FP_NORMAL;
1854 }
1855 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1856 }
1857
1858 static bool f32_div_post(union_float32 a, union_float32 b)
1859 {
1860 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1861 return fpclassify(a.h) != FP_ZERO;
1862 }
1863 return !float32_is_zero(a.s);
1864 }
1865
1866 static bool f64_div_post(union_float64 a, union_float64 b)
1867 {
1868 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1869 return fpclassify(a.h) != FP_ZERO;
1870 }
1871 return !float64_is_zero(a.s);
1872 }
1873
1874 float32 QEMU_FLATTEN
1875 float32_div(float32 a, float32 b, float_status *s)
1876 {
1877 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1878 f32_div_pre, f32_div_post);
1879 }
1880
1881 float64 QEMU_FLATTEN
1882 float64_div(float64 a, float64 b, float_status *s)
1883 {
1884 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1885 f64_div_pre, f64_div_post);
1886 }
1887
1888 /*
1889 * Returns the result of dividing the bfloat16
1890 * value `a' by the corresponding value `b'.
1891 */
1892
1893 bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1894 {
1895 FloatParts pa = bfloat16_unpack_canonical(a, status);
1896 FloatParts pb = bfloat16_unpack_canonical(b, status);
1897 FloatParts pr = div_floats(pa, pb, status);
1898
1899 return bfloat16_round_pack_canonical(pr, status);
1900 }
1901
1902 /*
1903 * Float to Float conversions
1904 *
1905 * Returns the result of converting one float format to another. The
1906 * conversion is performed according to the IEC/IEEE Standard for
1907 * Binary Floating-Point Arithmetic.
1908 *
1909 * The float_to_float helper only needs to take care of raising
1910 * invalid exceptions and handling the conversion on NaNs.
1911 */
1912
1913 static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1914 float_status *s)
1915 {
1916 if (dstf->arm_althp) {
1917 switch (a.cls) {
1918 case float_class_qnan:
1919 case float_class_snan:
1920 /* There is no NaN in the destination format. Raise Invalid
1921 * and return a zero with the sign of the input NaN.
1922 */
1923 s->float_exception_flags |= float_flag_invalid;
1924 a.cls = float_class_zero;
1925 a.frac = 0;
1926 a.exp = 0;
1927 break;
1928
1929 case float_class_inf:
1930 /* There is no Inf in the destination format. Raise Invalid
1931 * and return the maximum normal with the correct sign.
1932 */
1933 s->float_exception_flags |= float_flag_invalid;
1934 a.cls = float_class_normal;
1935 a.exp = dstf->exp_max;
1936 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1937 break;
1938
1939 default:
1940 break;
1941 }
1942 } else if (is_nan(a.cls)) {
1943 if (is_snan(a.cls)) {
1944 s->float_exception_flags |= float_flag_invalid;
1945 a = parts_silence_nan(a, s);
1946 }
1947 if (s->default_nan_mode) {
1948 return parts_default_nan(s);
1949 }
1950 }
1951 return a;
1952 }
1953
1954 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1955 {
1956 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1957 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1958 FloatParts pr = float_to_float(p, &float32_params, s);
1959 return float32_round_pack_canonical(pr, s);
1960 }
1961
1962 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1963 {
1964 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1965 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1966 FloatParts pr = float_to_float(p, &float64_params, s);
1967 return float64_round_pack_canonical(pr, s);
1968 }
1969
1970 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1971 {
1972 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1973 FloatParts p = float32_unpack_canonical(a, s);
1974 FloatParts pr = float_to_float(p, fmt16, s);
1975 return float16a_round_pack_canonical(pr, s, fmt16);
1976 }
1977
1978 static float64 QEMU_SOFTFLOAT_ATTR
1979 soft_float32_to_float64(float32 a, float_status *s)
1980 {
1981 FloatParts p = float32_unpack_canonical(a, s);
1982 FloatParts pr = float_to_float(p, &float64_params, s);
1983 return float64_round_pack_canonical(pr, s);
1984 }
1985
1986 float64 float32_to_float64(float32 a, float_status *s)
1987 {
1988 if (likely(float32_is_normal(a))) {
1989 /* Widening conversion can never produce inexact results. */
1990 union_float32 uf;
1991 union_float64 ud;
1992 uf.s = a;
1993 ud.h = uf.h;
1994 return ud.s;
1995 } else if (float32_is_zero(a)) {
1996 return float64_set_sign(float64_zero, float32_is_neg(a));
1997 } else {
1998 return soft_float32_to_float64(a, s);
1999 }
2000 }
2001
2002 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2003 {
2004 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2005 FloatParts p = float64_unpack_canonical(a, s);
2006 FloatParts pr = float_to_float(p, fmt16, s);
2007 return float16a_round_pack_canonical(pr, s, fmt16);
2008 }
2009
2010 float32 float64_to_float32(float64 a, float_status *s)
2011 {
2012 FloatParts p = float64_unpack_canonical(a, s);
2013 FloatParts pr = float_to_float(p, &float32_params, s);
2014 return float32_round_pack_canonical(pr, s);
2015 }
2016
2017 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2018 {
2019 FloatParts p = bfloat16_unpack_canonical(a, s);
2020 FloatParts pr = float_to_float(p, &float32_params, s);
2021 return float32_round_pack_canonical(pr, s);
2022 }
2023
2024 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2025 {
2026 FloatParts p = bfloat16_unpack_canonical(a, s);
2027 FloatParts pr = float_to_float(p, &float64_params, s);
2028 return float64_round_pack_canonical(pr, s);
2029 }
2030
2031 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2032 {
2033 FloatParts p = float32_unpack_canonical(a, s);
2034 FloatParts pr = float_to_float(p, &bfloat16_params, s);
2035 return bfloat16_round_pack_canonical(pr, s);
2036 }
2037
2038 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2039 {
2040 FloatParts p = float64_unpack_canonical(a, s);
2041 FloatParts pr = float_to_float(p, &bfloat16_params, s);
2042 return bfloat16_round_pack_canonical(pr, s);
2043 }
2044
2045 /*
2046 * Rounds the floating-point value `a' to an integer, and returns the
2047 * result as a floating-point value. The operation is performed
2048 * according to the IEC/IEEE Standard for Binary Floating-Point
2049 * Arithmetic.
2050 */
2051
2052 static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
2053 int scale, float_status *s)
2054 {
2055 switch (a.cls) {
2056 case float_class_qnan:
2057 case float_class_snan:
2058 return return_nan(a, s);
2059
2060 case float_class_zero:
2061 case float_class_inf:
2062 /* already "integral" */
2063 break;
2064
2065 case float_class_normal:
2066 scale = MIN(MAX(scale, -0x10000), 0x10000);
2067 a.exp += scale;
2068
2069 if (a.exp >= DECOMPOSED_BINARY_POINT) {
2070 /* already integral */
2071 break;
2072 }
2073 if (a.exp < 0) {
2074 bool one;
2075 /* all fractional */
2076 s->float_exception_flags |= float_flag_inexact;
2077 switch (rmode) {
2078 case float_round_nearest_even:
2079 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2080 break;
2081 case float_round_ties_away:
2082 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2083 break;
2084 case float_round_to_zero:
2085 one = false;
2086 break;
2087 case float_round_up:
2088 one = !a.sign;
2089 break;
2090 case float_round_down:
2091 one = a.sign;
2092 break;
2093 case float_round_to_odd:
2094 one = true;
2095 break;
2096 default:
2097 g_assert_not_reached();
2098 }
2099
2100 if (one) {
2101 a.frac = DECOMPOSED_IMPLICIT_BIT;
2102 a.exp = 0;
2103 } else {
2104 a.cls = float_class_zero;
2105 }
2106 } else {
2107 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2108 uint64_t frac_lsbm1 = frac_lsb >> 1;
2109 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2110 uint64_t rnd_mask = rnd_even_mask >> 1;
2111 uint64_t inc;
2112
2113 switch (rmode) {
2114 case float_round_nearest_even:
2115 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2116 break;
2117 case float_round_ties_away:
2118 inc = frac_lsbm1;
2119 break;
2120 case float_round_to_zero:
2121 inc = 0;
2122 break;
2123 case float_round_up:
2124 inc = a.sign ? 0 : rnd_mask;
2125 break;
2126 case float_round_down:
2127 inc = a.sign ? rnd_mask : 0;
2128 break;
2129 case float_round_to_odd:
2130 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2131 break;
2132 default:
2133 g_assert_not_reached();
2134 }
2135
2136 if (a.frac & rnd_mask) {
2137 s->float_exception_flags |= float_flag_inexact;
2138 a.frac += inc;
2139 a.frac &= ~rnd_mask;
2140 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2141 a.frac >>= 1;
2142 a.exp++;
2143 }
2144 }
2145 }
2146 break;
2147 default:
2148 g_assert_not_reached();
2149 }
2150 return a;
2151 }
2152
2153 float16 float16_round_to_int(float16 a, float_status *s)
2154 {
2155 FloatParts pa = float16_unpack_canonical(a, s);
2156 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2157 return float16_round_pack_canonical(pr, s);
2158 }
2159
2160 float32 float32_round_to_int(float32 a, float_status *s)
2161 {
2162 FloatParts pa = float32_unpack_canonical(a, s);
2163 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2164 return float32_round_pack_canonical(pr, s);
2165 }
2166
2167 float64 float64_round_to_int(float64 a, float_status *s)
2168 {
2169 FloatParts pa = float64_unpack_canonical(a, s);
2170 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2171 return float64_round_pack_canonical(pr, s);
2172 }
2173
2174 /*
2175 * Rounds the bfloat16 value `a' to an integer, and returns the
2176 * result as a bfloat16 value.
2177 */
2178
2179 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2180 {
2181 FloatParts pa = bfloat16_unpack_canonical(a, s);
2182 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2183 return bfloat16_round_pack_canonical(pr, s);
2184 }
2185
2186 /*
2187 * Returns the result of converting the floating-point value `a' to
2188 * the two's complement integer format. The conversion is performed
2189 * according to the IEC/IEEE Standard for Binary Floating-Point
2190 * Arithmetic---which means in particular that the conversion is
2191 * rounded according to the current rounding mode. If `a' is a NaN,
2192 * the largest positive integer is returned. Otherwise, if the
2193 * conversion overflows, the largest integer with the same sign as `a'
2194 * is returned.
2195 */
2196
2197 static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
2198 int scale, int64_t min, int64_t max,
2199 float_status *s)
2200 {
2201 uint64_t r;
2202 int orig_flags = get_float_exception_flags(s);
2203 FloatParts p = round_to_int(in, rmode, scale, s);
2204
2205 switch (p.cls) {
2206 case float_class_snan:
2207 case float_class_qnan:
2208 s->float_exception_flags = orig_flags | float_flag_invalid;
2209 return max;
2210 case float_class_inf:
2211 s->float_exception_flags = orig_flags | float_flag_invalid;
2212 return p.sign ? min : max;
2213 case float_class_zero:
2214 return 0;
2215 case float_class_normal:
2216 if (p.exp < DECOMPOSED_BINARY_POINT) {
2217 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2218 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2219 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2220 } else {
2221 r = UINT64_MAX;
2222 }
2223 if (p.sign) {
2224 if (r <= -(uint64_t) min) {
2225 return -r;
2226 } else {
2227 s->float_exception_flags = orig_flags | float_flag_invalid;
2228 return min;
2229 }
2230 } else {
2231 if (r <= max) {
2232 return r;
2233 } else {
2234 s->float_exception_flags = orig_flags | float_flag_invalid;
2235 return max;
2236 }
2237 }
2238 default:
2239 g_assert_not_reached();
2240 }
2241 }
2242
2243 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2244 float_status *s)
2245 {
2246 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2247 rmode, scale, INT8_MIN, INT8_MAX, s);
2248 }
2249
2250 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2251 float_status *s)
2252 {
2253 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2254 rmode, scale, INT16_MIN, INT16_MAX, s);
2255 }
2256
2257 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2258 float_status *s)
2259 {
2260 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2261 rmode, scale, INT32_MIN, INT32_MAX, s);
2262 }
2263
2264 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2265 float_status *s)
2266 {
2267 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2268 rmode, scale, INT64_MIN, INT64_MAX, s);
2269 }
2270
2271 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2272 float_status *s)
2273 {
2274 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2275 rmode, scale, INT16_MIN, INT16_MAX, s);
2276 }
2277
2278 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2279 float_status *s)
2280 {
2281 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2282 rmode, scale, INT32_MIN, INT32_MAX, s);
2283 }
2284
2285 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2286 float_status *s)
2287 {
2288 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2289 rmode, scale, INT64_MIN, INT64_MAX, s);
2290 }
2291
2292 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2293 float_status *s)
2294 {
2295 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2296 rmode, scale, INT16_MIN, INT16_MAX, s);
2297 }
2298
2299 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2300 float_status *s)
2301 {
2302 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2303 rmode, scale, INT32_MIN, INT32_MAX, s);
2304 }
2305
2306 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2307 float_status *s)
2308 {
2309 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2310 rmode, scale, INT64_MIN, INT64_MAX, s);
2311 }
2312
2313 int8_t float16_to_int8(float16 a, float_status *s)
2314 {
2315 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2316 }
2317
2318 int16_t float16_to_int16(float16 a, float_status *s)
2319 {
2320 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2321 }
2322
2323 int32_t float16_to_int32(float16 a, float_status *s)
2324 {
2325 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2326 }
2327
2328 int64_t float16_to_int64(float16 a, float_status *s)
2329 {
2330 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2331 }
2332
2333 int16_t float32_to_int16(float32 a, float_status *s)
2334 {
2335 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2336 }
2337
2338 int32_t float32_to_int32(float32 a, float_status *s)
2339 {
2340 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2341 }
2342
2343 int64_t float32_to_int64(float32 a, float_status *s)
2344 {
2345 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2346 }
2347
2348 int16_t float64_to_int16(float64 a, float_status *s)
2349 {
2350 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2351 }
2352
2353 int32_t float64_to_int32(float64 a, float_status *s)
2354 {
2355 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2356 }
2357
2358 int64_t float64_to_int64(float64 a, float_status *s)
2359 {
2360 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2361 }
2362
2363 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2364 {
2365 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2366 }
2367
2368 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2369 {
2370 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2371 }
2372
2373 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2374 {
2375 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2376 }
2377
2378 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2379 {
2380 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2381 }
2382
2383 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2384 {
2385 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2386 }
2387
2388 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2389 {
2390 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2391 }
2392
2393 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2394 {
2395 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2396 }
2397
2398 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2399 {
2400 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2401 }
2402
2403 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2404 {
2405 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2406 }
2407
2408 /*
2409 * Returns the result of converting the floating-point value `a' to
2410 * the two's complement integer format.
2411 */
2412
2413 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2414 float_status *s)
2415 {
2416 return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2417 rmode, scale, INT16_MIN, INT16_MAX, s);
2418 }
2419
2420 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2421 float_status *s)
2422 {
2423 return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2424 rmode, scale, INT32_MIN, INT32_MAX, s);
2425 }
2426
2427 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2428 float_status *s)
2429 {
2430 return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2431 rmode, scale, INT64_MIN, INT64_MAX, s);
2432 }
2433
2434 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2435 {
2436 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2437 }
2438
2439 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2440 {
2441 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2442 }
2443
2444 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2445 {
2446 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2447 }
2448
2449 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2450 {
2451 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2452 }
2453
2454 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2455 {
2456 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2457 }
2458
2459 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2460 {
2461 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2462 }
2463
2464 /*
2465 * Returns the result of converting the floating-point value `a' to
2466 * the unsigned integer format. The conversion is performed according
2467 * to the IEC/IEEE Standard for Binary Floating-Point
2468 * Arithmetic---which means in particular that the conversion is
2469 * rounded according to the current rounding mode. If `a' is a NaN,
2470 * the largest unsigned integer is returned. Otherwise, if the
2471 * conversion overflows, the largest unsigned integer is returned. If
2472 * the 'a' is negative, the result is rounded and zero is returned;
2473 * values that do not round to zero will raise the inexact exception
2474 * flag.
2475 */
2476
2477 static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
2478 int scale, uint64_t max,
2479 float_status *s)
2480 {
2481 int orig_flags = get_float_exception_flags(s);
2482 FloatParts p = round_to_int(in, rmode, scale, s);
2483 uint64_t r;
2484
2485 switch (p.cls) {
2486 case float_class_snan:
2487 case float_class_qnan:
2488 s->float_exception_flags = orig_flags | float_flag_invalid;
2489 return max;
2490 case float_class_inf:
2491 s->float_exception_flags = orig_flags | float_flag_invalid;
2492 return p.sign ? 0 : max;
2493 case float_class_zero:
2494 return 0;
2495 case float_class_normal:
2496 if (p.sign) {
2497 s->float_exception_flags = orig_flags | float_flag_invalid;
2498 return 0;
2499 }
2500
2501 if (p.exp < DECOMPOSED_BINARY_POINT) {
2502 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2503 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2504 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2505 } else {
2506 s->float_exception_flags = orig_flags | float_flag_invalid;
2507 return max;
2508 }
2509
2510 /* For uint64 this will never trip, but if p.exp is too large
2511 * to shift a decomposed fraction we shall have exited via the
2512 * 3rd leg above.
2513 */
2514 if (r > max) {
2515 s->float_exception_flags = orig_flags | float_flag_invalid;
2516 return max;
2517 }
2518 return r;
2519 default:
2520 g_assert_not_reached();
2521 }
2522 }
2523
2524 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2525 float_status *s)
2526 {
2527 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2528 rmode, scale, UINT8_MAX, s);
2529 }
2530
2531 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2532 float_status *s)
2533 {
2534 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2535 rmode, scale, UINT16_MAX, s);
2536 }
2537
2538 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2539 float_status *s)
2540 {
2541 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2542 rmode, scale, UINT32_MAX, s);
2543 }
2544
2545 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2546 float_status *s)
2547 {
2548 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2549 rmode, scale, UINT64_MAX, s);
2550 }
2551
2552 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2553 float_status *s)
2554 {
2555 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2556 rmode, scale, UINT16_MAX, s);
2557 }
2558
2559 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2560 float_status *s)
2561 {
2562 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2563 rmode, scale, UINT32_MAX, s);
2564 }
2565
2566 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2567 float_status *s)
2568 {
2569 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2570 rmode, scale, UINT64_MAX, s);
2571 }
2572
2573 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2574 float_status *s)
2575 {
2576 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2577 rmode, scale, UINT16_MAX, s);
2578 }
2579
2580 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2581 float_status *s)
2582 {
2583 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2584 rmode, scale, UINT32_MAX, s);
2585 }
2586
2587 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2588 float_status *s)
2589 {
2590 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2591 rmode, scale, UINT64_MAX, s);
2592 }
2593
2594 uint8_t float16_to_uint8(float16 a, float_status *s)
2595 {
2596 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2597 }
2598
2599 uint16_t float16_to_uint16(float16 a, float_status *s)
2600 {
2601 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2602 }
2603
2604 uint32_t float16_to_uint32(float16 a, float_status *s)
2605 {
2606 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2607 }
2608
2609 uint64_t float16_to_uint64(float16 a, float_status *s)
2610 {
2611 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2612 }
2613
2614 uint16_t float32_to_uint16(float32 a, float_status *s)
2615 {
2616 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2617 }
2618
2619 uint32_t float32_to_uint32(float32 a, float_status *s)
2620 {
2621 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2622 }
2623
2624 uint64_t float32_to_uint64(float32 a, float_status *s)
2625 {
2626 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2627 }
2628
2629 uint16_t float64_to_uint16(float64 a, float_status *s)
2630 {
2631 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2632 }
2633
2634 uint32_t float64_to_uint32(float64 a, float_status *s)
2635 {
2636 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2637 }
2638
2639 uint64_t float64_to_uint64(float64 a, float_status *s)
2640 {
2641 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2642 }
2643
2644 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2645 {
2646 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2647 }
2648
2649 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2650 {
2651 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2652 }
2653
2654 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2655 {
2656 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2657 }
2658
2659 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2660 {
2661 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2662 }
2663
2664 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2665 {
2666 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2667 }
2668
2669 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2670 {
2671 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2672 }
2673
2674 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2675 {
2676 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2677 }
2678
2679 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2680 {
2681 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2682 }
2683
2684 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2685 {
2686 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2687 }
2688
2689 /*
2690 * Returns the result of converting the bfloat16 value `a' to
2691 * the unsigned integer format.
2692 */
2693
2694 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2695 int scale, float_status *s)
2696 {
2697 return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2698 rmode, scale, UINT16_MAX, s);
2699 }
2700
2701 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2702 int scale, float_status *s)
2703 {
2704 return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2705 rmode, scale, UINT32_MAX, s);
2706 }
2707
2708 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2709 int scale, float_status *s)
2710 {
2711 return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2712 rmode, scale, UINT64_MAX, s);
2713 }
2714
2715 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2716 {
2717 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2718 }
2719
2720 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2721 {
2722 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2723 }
2724
2725 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2726 {
2727 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2728 }
2729
2730 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2731 {
2732 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2733 }
2734
2735 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2736 {
2737 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2738 }
2739
2740 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2741 {
2742 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2743 }
2744
2745 /*
2746 * Integer to float conversions
2747 *
2748 * Returns the result of converting the two's complement integer `a'
2749 * to the floating-point format. The conversion is performed according
2750 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2751 */
2752
2753 static FloatParts int_to_float(int64_t a, int scale, float_status *status)
2754 {
2755 FloatParts r = { .sign = false };
2756
2757 if (a == 0) {
2758 r.cls = float_class_zero;
2759 } else {
2760 uint64_t f = a;
2761 int shift;
2762
2763 r.cls = float_class_normal;
2764 if (a < 0) {
2765 f = -f;
2766 r.sign = true;
2767 }
2768 shift = clz64(f) - 1;
2769 scale = MIN(MAX(scale, -0x10000), 0x10000);
2770
2771 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2772 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
2773 }
2774
2775 return r;
2776 }
2777
2778 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2779 {
2780 FloatParts pa = int_to_float(a, scale, status);
2781 return float16_round_pack_canonical(pa, status);
2782 }
2783
2784 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2785 {
2786 return int64_to_float16_scalbn(a, scale, status);
2787 }
2788
2789 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2790 {
2791 return int64_to_float16_scalbn(a, scale, status);
2792 }
2793
2794 float16 int64_to_float16(int64_t a, float_status *status)
2795 {
2796 return int64_to_float16_scalbn(a, 0, status);
2797 }
2798
2799 float16 int32_to_float16(int32_t a, float_status *status)
2800 {
2801 return int64_to_float16_scalbn(a, 0, status);
2802 }
2803
2804 float16 int16_to_float16(int16_t a, float_status *status)
2805 {
2806 return int64_to_float16_scalbn(a, 0, status);
2807 }
2808
2809 float16 int8_to_float16(int8_t a, float_status *status)
2810 {
2811 return int64_to_float16_scalbn(a, 0, status);
2812 }
2813
2814 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
2815 {
2816 FloatParts pa = int_to_float(a, scale, status);
2817 return float32_round_pack_canonical(pa, status);
2818 }
2819
2820 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2821 {
2822 return int64_to_float32_scalbn(a, scale, status);
2823 }
2824
2825 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2826 {
2827 return int64_to_float32_scalbn(a, scale, status);
2828 }
2829
2830 float32 int64_to_float32(int64_t a, float_status *status)
2831 {
2832 return int64_to_float32_scalbn(a, 0, status);
2833 }
2834
2835 float32 int32_to_float32(int32_t a, float_status *status)
2836 {
2837 return int64_to_float32_scalbn(a, 0, status);
2838 }
2839
2840 float32 int16_to_float32(int16_t a, float_status *status)
2841 {
2842 return int64_to_float32_scalbn(a, 0, status);
2843 }
2844
2845 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
2846 {
2847 FloatParts pa = int_to_float(a, scale, status);
2848 return float64_round_pack_canonical(pa, status);
2849 }
2850
2851 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2852 {
2853 return int64_to_float64_scalbn(a, scale, status);
2854 }
2855
2856 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2857 {
2858 return int64_to_float64_scalbn(a, scale, status);
2859 }
2860
2861 float64 int64_to_float64(int64_t a, float_status *status)
2862 {
2863 return int64_to_float64_scalbn(a, 0, status);
2864 }
2865
2866 float64 int32_to_float64(int32_t a, float_status *status)
2867 {
2868 return int64_to_float64_scalbn(a, 0, status);
2869 }
2870
2871 float64 int16_to_float64(int16_t a, float_status *status)
2872 {
2873 return int64_to_float64_scalbn(a, 0, status);
2874 }
2875
2876 /*
2877 * Returns the result of converting the two's complement integer `a'
2878 * to the bfloat16 format.
2879 */
2880
2881 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
2882 {
2883 FloatParts pa = int_to_float(a, scale, status);
2884 return bfloat16_round_pack_canonical(pa, status);
2885 }
2886
2887 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
2888 {
2889 return int64_to_bfloat16_scalbn(a, scale, status);
2890 }
2891
2892 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
2893 {
2894 return int64_to_bfloat16_scalbn(a, scale, status);
2895 }
2896
2897 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
2898 {
2899 return int64_to_bfloat16_scalbn(a, 0, status);
2900 }
2901
2902 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
2903 {
2904 return int64_to_bfloat16_scalbn(a, 0, status);
2905 }
2906
2907 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
2908 {
2909 return int64_to_bfloat16_scalbn(a, 0, status);
2910 }
2911
2912 /*
2913 * Unsigned Integer to float conversions
2914 *
2915 * Returns the result of converting the unsigned integer `a' to the
2916 * floating-point format. The conversion is performed according to the
2917 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2918 */
2919
2920 static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
2921 {
2922 FloatParts r = { .sign = false };
2923
2924 if (a == 0) {
2925 r.cls = float_class_zero;
2926 } else {
2927 scale = MIN(MAX(scale, -0x10000), 0x10000);
2928 r.cls = float_class_normal;
2929 if ((int64_t)a < 0) {
2930 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2931 shift64RightJamming(a, 1, &a);
2932 r.frac = a;
2933 } else {
2934 int shift = clz64(a) - 1;
2935 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2936 r.frac = a << shift;
2937 }
2938 }
2939
2940 return r;
2941 }
2942
2943 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
2944 {
2945 FloatParts pa = uint_to_float(a, scale, status);
2946 return float16_round_pack_canonical(pa, status);
2947 }
2948
2949 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2950 {
2951 return uint64_to_float16_scalbn(a, scale, status);
2952 }
2953
2954 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2955 {
2956 return uint64_to_float16_scalbn(a, scale, status);
2957 }
2958
2959 float16 uint64_to_float16(uint64_t a, float_status *status)
2960 {
2961 return uint64_to_float16_scalbn(a, 0, status);
2962 }
2963
2964 float16 uint32_to_float16(uint32_t a, float_status *status)
2965 {
2966 return uint64_to_float16_scalbn(a, 0, status);
2967 }
2968
2969 float16 uint16_to_float16(uint16_t a, float_status *status)
2970 {
2971 return uint64_to_float16_scalbn(a, 0, status);
2972 }
2973
2974 float16 uint8_to_float16(uint8_t a, float_status *status)
2975 {
2976 return uint64_to_float16_scalbn(a, 0, status);
2977 }
2978
2979 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
2980 {
2981 FloatParts pa = uint_to_float(a, scale, status);
2982 return float32_round_pack_canonical(pa, status);
2983 }
2984
2985 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2986 {
2987 return uint64_to_float32_scalbn(a, scale, status);
2988 }
2989
2990 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2991 {
2992 return uint64_to_float32_scalbn(a, scale, status);
2993 }
2994
2995 float32 uint64_to_float32(uint64_t a, float_status *status)
2996 {
2997 return uint64_to_float32_scalbn(a, 0, status);
2998 }
2999
3000 float32 uint32_to_float32(uint32_t a, float_status *status)
3001 {
3002 return uint64_to_float32_scalbn(a, 0, status);
3003 }
3004
3005 float32 uint16_to_float32(uint16_t a, float_status *status)
3006 {
3007 return uint64_to_float32_scalbn(a, 0, status);
3008 }
3009
3010 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3011 {
3012 FloatParts pa = uint_to_float(a, scale, status);
3013 return float64_round_pack_canonical(pa, status);
3014 }
3015
3016 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3017 {
3018 return uint64_to_float64_scalbn(a, scale, status);
3019 }
3020
3021 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3022 {
3023 return uint64_to_float64_scalbn(a, scale, status);
3024 }
3025
3026 float64 uint64_to_float64(uint64_t a, float_status *status)
3027 {
3028 return uint64_to_float64_scalbn(a, 0, status);
3029 }
3030
3031 float64 uint32_to_float64(uint32_t a, float_status *status)
3032 {
3033 return uint64_to_float64_scalbn(a, 0, status);
3034 }
3035
3036 float64 uint16_to_float64(uint16_t a, float_status *status)
3037 {
3038 return uint64_to_float64_scalbn(a, 0, status);
3039 }
3040
3041 /*
3042 * Returns the result of converting the unsigned integer `a' to the
3043 * bfloat16 format.
3044 */
3045
3046 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3047 {
3048 FloatParts pa = uint_to_float(a, scale, status);
3049 return bfloat16_round_pack_canonical(pa, status);
3050 }
3051
3052 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3053 {
3054 return uint64_to_bfloat16_scalbn(a, scale, status);
3055 }
3056
3057 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3058 {
3059 return uint64_to_bfloat16_scalbn(a, scale, status);
3060 }
3061
3062 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3063 {
3064 return uint64_to_bfloat16_scalbn(a, 0, status);
3065 }
3066
3067 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3068 {
3069 return uint64_to_bfloat16_scalbn(a, 0, status);
3070 }
3071
3072 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3073 {
3074 return uint64_to_bfloat16_scalbn(a, 0, status);
3075 }
3076
3077 /* Float Min/Max */
3078 /* min() and max() functions. These can't be implemented as
3079 * 'compare and pick one input' because that would mishandle
3080 * NaNs and +0 vs -0.
3081 *
3082 * minnum() and maxnum() functions. These are similar to the min()
3083 * and max() functions but if one of the arguments is a QNaN and
3084 * the other is numerical then the numerical argument is returned.
3085 * SNaNs will get quietened before being returned.
3086 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3087 * and maxNum() operations. min() and max() are the typical min/max
3088 * semantics provided by many CPUs which predate that specification.
3089 *
3090 * minnummag() and maxnummag() functions correspond to minNumMag()
3091 * and minNumMag() from the IEEE-754 2008.
3092 */
3093 static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
3094 bool ieee, bool ismag, float_status *s)
3095 {
3096 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3097 if (ieee) {
3098 /* Takes two floating-point values `a' and `b', one of
3099 * which is a NaN, and returns the appropriate NaN
3100 * result. If either `a' or `b' is a signaling NaN,
3101 * the invalid exception is raised.
3102 */
3103 if (is_snan(a.cls) || is_snan(b.cls)) {
3104 return pick_nan(a, b, s);
3105 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3106 return b;
3107 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3108 return a;
3109 }
3110 }
3111 return pick_nan(a, b, s);
3112 } else {
3113 int a_exp, b_exp;
3114
3115 switch (a.cls) {
3116 case float_class_normal:
3117 a_exp = a.exp;
3118 break;
3119 case float_class_inf:
3120 a_exp = INT_MAX;
3121 break;
3122 case float_class_zero:
3123 a_exp = INT_MIN;
3124 break;
3125 default:
3126 g_assert_not_reached();
3127 break;
3128 }
3129 switch (b.cls) {
3130 case float_class_normal:
3131 b_exp = b.exp;
3132 break;
3133 case float_class_inf:
3134 b_exp = INT_MAX;
3135 break;
3136 case float_class_zero:
3137 b_exp = INT_MIN;
3138 break;
3139 default:
3140 g_assert_not_reached();
3141 break;
3142 }
3143
3144 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3145 bool a_less = a_exp < b_exp;
3146 if (a_exp == b_exp) {
3147 a_less = a.frac < b.frac;
3148 }
3149 return a_less ^ ismin ? b : a;
3150 }
3151
3152 if (a.sign == b.sign) {
3153 bool a_less = a_exp < b_exp;
3154 if (a_exp == b_exp) {
3155 a_less = a.frac < b.frac;
3156 }
3157 return a.sign ^ a_less ^ ismin ? b : a;
3158 } else {
3159 return a.sign ^ ismin ? b : a;
3160 }
3161 }
3162 }
3163
3164 #define MINMAX(sz, name, ismin, isiee, ismag) \
3165 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3166 float_status *s) \
3167 { \
3168 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
3169 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
3170 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
3171 \
3172 return float ## sz ## _round_pack_canonical(pr, s); \
3173 }
3174
3175 MINMAX(16, min, true, false, false)
3176 MINMAX(16, minnum, true, true, false)
3177 MINMAX(16, minnummag, true, true, true)
3178 MINMAX(16, max, false, false, false)
3179 MINMAX(16, maxnum, false, true, false)
3180 MINMAX(16, maxnummag, false, true, true)
3181
3182 MINMAX(32, min, true, false, false)
3183 MINMAX(32, minnum, true, true, false)
3184 MINMAX(32, minnummag, true, true, true)
3185 MINMAX(32, max, false, false, false)
3186 MINMAX(32, maxnum, false, true, false)
3187 MINMAX(32, maxnummag, false, true, true)
3188
3189 MINMAX(64, min, true, false, false)
3190 MINMAX(64, minnum, true, true, false)
3191 MINMAX(64, minnummag, true, true, true)
3192 MINMAX(64, max, false, false, false)
3193 MINMAX(64, maxnum, false, true, false)
3194 MINMAX(64, maxnummag, false, true, true)
3195
3196 #undef MINMAX
3197
3198 #define BF16_MINMAX(name, ismin, isiee, ismag) \
3199 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3200 { \
3201 FloatParts pa = bfloat16_unpack_canonical(a, s); \
3202 FloatParts pb = bfloat16_unpack_canonical(b, s); \
3203 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
3204 \
3205 return bfloat16_round_pack_canonical(pr, s); \
3206 }
3207
3208 BF16_MINMAX(min, true, false, false)
3209 BF16_MINMAX(minnum, true, true, false)
3210 BF16_MINMAX(minnummag, true, true, true)
3211 BF16_MINMAX(max, false, false, false)
3212 BF16_MINMAX(maxnum, false, true, false)
3213 BF16_MINMAX(maxnummag, false, true, true)
3214
3215 #undef BF16_MINMAX
3216
3217 /* Floating point compare */
3218 static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
3219 float_status *s)
3220 {
3221 if (is_nan(a.cls) || is_nan(b.cls)) {
3222 if (!is_quiet ||
3223 a.cls == float_class_snan ||
3224 b.cls == float_class_snan) {
3225 s->float_exception_flags |= float_flag_invalid;
3226 }
3227 return float_relation_unordered;
3228 }
3229
3230 if (a.cls == float_class_zero) {
3231 if (b.cls == float_class_zero) {
3232 return float_relation_equal;
3233 }
3234 return b.sign ? float_relation_greater : float_relation_less;
3235 } else if (b.cls == float_class_zero) {
3236 return a.sign ? float_relation_less : float_relation_greater;
3237 }
3238
3239 /* The only really important thing about infinity is its sign. If
3240 * both are infinities the sign marks the smallest of the two.
3241 */
3242 if (a.cls == float_class_inf) {
3243 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3244 return float_relation_equal;
3245 }
3246 return a.sign ? float_relation_less : float_relation_greater;
3247 } else if (b.cls == float_class_inf) {
3248 return b.sign ? float_relation_greater : float_relation_less;
3249 }
3250
3251 if (a.sign != b.sign) {
3252 return a.sign ? float_relation_less : float_relation_greater;
3253 }
3254
3255 if (a.exp == b.exp) {
3256 if (a.frac == b.frac) {
3257 return float_relation_equal;
3258 }
3259 if (a.sign) {
3260 return a.frac > b.frac ?
3261 float_relation_less : float_relation_greater;
3262 } else {
3263 return a.frac > b.frac ?
3264 float_relation_greater : float_relation_less;
3265 }
3266 } else {
3267 if (a.sign) {
3268 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3269 } else {
3270 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3271 }
3272 }
3273 }
3274
3275 #define COMPARE(name, attr, sz) \
3276 static int attr \
3277 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
3278 { \
3279 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
3280 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
3281 return compare_floats(pa, pb, is_quiet, s); \
3282 }
3283
3284 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3285 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3286 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3287
3288 #undef COMPARE
3289
3290 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3291 {
3292 return soft_f16_compare(a, b, false, s);
3293 }
3294
3295 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3296 {
3297 return soft_f16_compare(a, b, true, s);
3298 }
3299
3300 static FloatRelation QEMU_FLATTEN
3301 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3302 {
3303 union_float32 ua, ub;
3304
3305 ua.s = xa;
3306 ub.s = xb;
3307
3308 if (QEMU_NO_HARDFLOAT) {
3309 goto soft;
3310 }
3311
3312 float32_input_flush2(&ua.s, &ub.s, s);
3313 if (isgreaterequal(ua.h, ub.h)) {
3314 if (isgreater(ua.h, ub.h)) {
3315 return float_relation_greater;
3316 }
3317 return float_relation_equal;
3318 }
3319 if (likely(isless(ua.h, ub.h))) {
3320 return float_relation_less;
3321 }
3322 /* The only condition remaining is unordered.
3323 * Fall through to set flags.
3324 */
3325 soft:
3326 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3327 }
3328
3329 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3330 {
3331 return f32_compare(a, b, false, s);
3332 }
3333
3334 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3335 {
3336 return f32_compare(a, b, true, s);
3337 }
3338
3339 static FloatRelation QEMU_FLATTEN
3340 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3341 {
3342 union_float64 ua, ub;
3343
3344 ua.s = xa;
3345 ub.s = xb;
3346
3347 if (QEMU_NO_HARDFLOAT) {
3348 goto soft;
3349 }
3350
3351 float64_input_flush2(&ua.s, &ub.s, s);
3352 if (isgreaterequal(ua.h, ub.h)) {
3353 if (isgreater(ua.h, ub.h)) {
3354 return float_relation_greater;
3355 }
3356 return float_relation_equal;
3357 }
3358 if (likely(isless(ua.h, ub.h))) {
3359 return float_relation_less;
3360 }
3361 /* The only condition remaining is unordered.
3362 * Fall through to set flags.
3363 */
3364 soft:
3365 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3366 }
3367
3368 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3369 {
3370 return f64_compare(a, b, false, s);
3371 }
3372
3373 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3374 {
3375 return f64_compare(a, b, true, s);
3376 }
3377
3378 static FloatRelation QEMU_FLATTEN
3379 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3380 {
3381 FloatParts pa = bfloat16_unpack_canonical(a, s);
3382 FloatParts pb = bfloat16_unpack_canonical(b, s);
3383 return compare_floats(pa, pb, is_quiet, s);
3384 }
3385
3386 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3387 {
3388 return soft_bf16_compare(a, b, false, s);
3389 }
3390
3391 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3392 {
3393 return soft_bf16_compare(a, b, true, s);
3394 }
3395
3396 /* Multiply A by 2 raised to the power N. */
3397 static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3398 {
3399 if (unlikely(is_nan(a.cls))) {
3400 return return_nan(a, s);
3401 }
3402 if (a.cls == float_class_normal) {
3403 /* The largest float type (even though not supported by FloatParts)
3404 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3405 * still allows rounding to infinity, without allowing overflow
3406 * within the int32_t that backs FloatParts.exp.
3407 */
3408 n = MIN(MAX(n, -0x10000), 0x10000);
3409 a.exp += n;
3410 }
3411 return a;
3412 }
3413
3414 float16 float16_scalbn(float16 a, int n, float_status *status)
3415 {
3416 FloatParts pa = float16_unpack_canonical(a, status);
3417 FloatParts pr = scalbn_decomposed(pa, n, status);
3418 return float16_round_pack_canonical(pr, status);
3419 }
3420
3421 float32 float32_scalbn(float32 a, int n, float_status *status)
3422 {
3423 FloatParts pa = float32_unpack_canonical(a, status);
3424 FloatParts pr = scalbn_decomposed(pa, n, status);
3425 return float32_round_pack_canonical(pr, status);
3426 }
3427
3428 float64 float64_scalbn(float64 a, int n, float_status *status)
3429 {
3430 FloatParts pa = float64_unpack_canonical(a, status);
3431 FloatParts pr = scalbn_decomposed(pa, n, status);
3432 return float64_round_pack_canonical(pr, status);
3433 }
3434
3435 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3436 {
3437 FloatParts pa = bfloat16_unpack_canonical(a, status);
3438 FloatParts pr = scalbn_decomposed(pa, n, status);
3439 return bfloat16_round_pack_canonical(pr, status);
3440 }
3441
3442 /*
3443 * Square Root
3444 *
3445 * The old softfloat code did an approximation step before zeroing in
3446 * on the final result. However for simpleness we just compute the
3447 * square root by iterating down from the implicit bit to enough extra
3448 * bits to ensure we get a correctly rounded result.
3449 *
3450 * This does mean however the calculation is slower than before,
3451 * especially for 64 bit floats.
3452 */
3453
3454 static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3455 {
3456 uint64_t a_frac, r_frac, s_frac;
3457 int bit, last_bit;
3458
3459 if (is_nan(a.cls)) {
3460 return return_nan(a, s);
3461 }
3462 if (a.cls == float_class_zero) {
3463 return a; /* sqrt(+-0) = +-0 */
3464 }
3465 if (a.sign) {
3466 s->float_exception_flags |= float_flag_invalid;
3467 return parts_default_nan(s);
3468 }
3469 if (a.cls == float_class_inf) {
3470 return a; /* sqrt(+inf) = +inf */
3471 }
3472
3473 assert(a.cls == float_class_normal);
3474
3475 /* We need two overflow bits at the top. Adding room for that is a
3476 * right shift. If the exponent is odd, we can discard the low bit
3477 * by multiplying the fraction by 2; that's a left shift. Combine
3478 * those and we shift right if the exponent is even.
3479 */
3480 a_frac = a.frac;
3481 if (!(a.exp & 1)) {
3482 a_frac >>= 1;
3483 }
3484 a.exp >>= 1;
3485
3486 /* Bit-by-bit computation of sqrt. */
3487 r_frac = 0;
3488 s_frac = 0;
3489
3490 /* Iterate from implicit bit down to the 3 extra bits to compute a
3491 * properly rounded result. Remember we've inserted one more bit
3492 * at the top, so these positions are one less.
3493 */
3494 bit = DECOMPOSED_BINARY_POINT - 1;
3495 last_bit = MAX(p->frac_shift - 4, 0);
3496 do {
3497 uint64_t q = 1ULL << bit;
3498 uint64_t t_frac = s_frac + q;
3499 if (t_frac <= a_frac) {
3500 s_frac = t_frac + q;
3501 a_frac -= t_frac;
3502 r_frac += q;
3503 }
3504 a_frac <<= 1;
3505 } while (--bit >= last_bit);
3506
3507 /* Undo the right shift done above. If there is any remaining
3508 * fraction, the result is inexact. Set the sticky bit.
3509 */
3510 a.frac = (r_frac << 1) + (a_frac != 0);
3511
3512 return a;
3513 }
3514
3515 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3516 {
3517 FloatParts pa = float16_unpack_canonical(a, status);
3518 FloatParts pr = sqrt_float(pa, status, &float16_params);
3519 return float16_round_pack_canonical(pr, status);
3520 }
3521
3522 static float32 QEMU_SOFTFLOAT_ATTR
3523 soft_f32_sqrt(float32 a, float_status *status)
3524 {
3525 FloatParts pa = float32_unpack_canonical(a, status);
3526 FloatParts pr = sqrt_float(pa, status, &float32_params);
3527 return float32_round_pack_canonical(pr, status);
3528 }
3529
3530 static float64 QEMU_SOFTFLOAT_ATTR
3531 soft_f64_sqrt(float64 a, float_status *status)
3532 {
3533 FloatParts pa = float64_unpack_canonical(a, status);
3534 FloatParts pr = sqrt_float(pa, status, &float64_params);
3535 return float64_round_pack_canonical(pr, status);
3536 }
3537
3538 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3539 {
3540 union_float32 ua, ur;
3541
3542 ua.s = xa;
3543 if (unlikely(!can_use_fpu(s))) {
3544 goto soft;
3545 }
3546
3547 float32_input_flush1(&ua.s, s);
3548 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3549 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3550 fpclassify(ua.h) == FP_ZERO) ||
3551 signbit(ua.h))) {
3552 goto soft;
3553 }
3554 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3555 float32_is_neg(ua.s))) {
3556 goto soft;
3557 }
3558 ur.h = sqrtf(ua.h);
3559 return ur.s;
3560
3561 soft:
3562 return soft_f32_sqrt(ua.s, s);
3563 }
3564
3565 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3566 {
3567 union_float64 ua, ur;
3568
3569 ua.s = xa;
3570 if (unlikely(!can_use_fpu(s))) {
3571 goto soft;
3572 }
3573
3574 float64_input_flush1(&ua.s, s);
3575 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3576 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3577 fpclassify(ua.h) == FP_ZERO) ||
3578 signbit(ua.h))) {
3579 goto soft;
3580 }
3581 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3582 float64_is_neg(ua.s))) {
3583 goto soft;
3584 }
3585 ur.h = sqrt(ua.h);
3586 return ur.s;
3587
3588 soft:
3589 return soft_f64_sqrt(ua.s, s);
3590 }
3591
3592 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3593 {
3594 FloatParts pa = bfloat16_unpack_canonical(a, status);
3595 FloatParts pr = sqrt_float(pa, status, &bfloat16_params);
3596 return bfloat16_round_pack_canonical(pr, status);
3597 }
3598
3599 /*----------------------------------------------------------------------------
3600 | The pattern for a default generated NaN.
3601 *----------------------------------------------------------------------------*/
3602
3603 float16 float16_default_nan(float_status *status)
3604 {
3605 FloatParts p = parts_default_nan(status);
3606 p.frac >>= float16_params.frac_shift;
3607 return float16_pack_raw(p);
3608 }
3609
3610 float32 float32_default_nan(float_status *status)
3611 {
3612 FloatParts p = parts_default_nan(status);
3613 p.frac >>= float32_params.frac_shift;
3614 return float32_pack_raw(p);
3615 }
3616
3617 float64 float64_default_nan(float_status *status)
3618 {
3619 FloatParts p = parts_default_nan(status);
3620 p.frac >>= float64_params.frac_shift;
3621 return float64_pack_raw(p);
3622 }
3623
3624 float128 float128_default_nan(float_status *status)
3625 {
3626 FloatParts p = parts_default_nan(status);
3627 float128 r;
3628
3629 /* Extrapolate from the choices made by parts_default_nan to fill
3630 * in the quad-floating format. If the low bit is set, assume we
3631 * want to set all non-snan bits.
3632 */
3633 r.low = -(p.frac & 1);
3634 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
3635 r.high |= UINT64_C(0x7FFF000000000000);
3636 r.high |= (uint64_t)p.sign << 63;
3637
3638 return r;
3639 }
3640
3641 bfloat16 bfloat16_default_nan(float_status *status)
3642 {
3643 FloatParts p = parts_default_nan(status);
3644 p.frac >>= bfloat16_params.frac_shift;
3645 return bfloat16_pack_raw(p);
3646 }
3647
3648 /*----------------------------------------------------------------------------
3649 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3650 *----------------------------------------------------------------------------*/
3651
3652 float16 float16_silence_nan(float16 a, float_status *status)
3653 {
3654 FloatParts p = float16_unpack_raw(a);
3655 p.frac <<= float16_params.frac_shift;
3656 p = parts_silence_nan(p, status);
3657 p.frac >>= float16_params.frac_shift;
3658 return float16_pack_raw(p);
3659 }
3660
3661 float32 float32_silence_nan(float32 a, float_status *status)
3662 {
3663 FloatParts p = float32_unpack_raw(a);
3664 p.frac <<= float32_params.frac_shift;
3665 p = parts_silence_nan(p, status);
3666 p.frac >>= float32_params.frac_shift;
3667 return float32_pack_raw(p);
3668 }
3669
3670 float64 float64_silence_nan(float64 a, float_status *status)
3671 {
3672 FloatParts p = float64_unpack_raw(a);
3673 p.frac <<= float64_params.frac_shift;
3674 p = parts_silence_nan(p, status);
3675 p.frac >>= float64_params.frac_shift;
3676 return float64_pack_raw(p);
3677 }
3678
3679 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3680 {
3681 FloatParts p = bfloat16_unpack_raw(a);
3682 p.frac <<= bfloat16_params.frac_shift;
3683 p = parts_silence_nan(p, status);
3684 p.frac >>= bfloat16_params.frac_shift;
3685 return bfloat16_pack_raw(p);
3686 }
3687
3688 /*----------------------------------------------------------------------------
3689 | If `a' is denormal and we are in flush-to-zero mode then set the
3690 | input-denormal exception and return zero. Otherwise just return the value.
3691 *----------------------------------------------------------------------------*/
3692
3693 static bool parts_squash_denormal(FloatParts p, float_status *status)
3694 {
3695 if (p.exp == 0 && p.frac != 0) {
3696 float_raise(float_flag_input_denormal, status);
3697 return true;
3698 }
3699
3700 return false;
3701 }
3702
3703 float16 float16_squash_input_denormal(float16 a, float_status *status)
3704 {
3705 if (status->flush_inputs_to_zero) {
3706 FloatParts p = float16_unpack_raw(a);
3707 if (parts_squash_denormal(p, status)) {
3708 return float16_set_sign(float16_zero, p.sign);
3709 }
3710 }
3711 return a;
3712 }
3713
3714 float32 float32_squash_input_denormal(float32 a, float_status *status)
3715 {
3716 if (status->flush_inputs_to_zero) {
3717 FloatParts p = float32_unpack_raw(a);
3718 if (parts_squash_denormal(p, status)) {
3719 return float32_set_sign(float32_zero, p.sign);
3720 }
3721 }
3722 return a;
3723 }
3724
3725 float64 float64_squash_input_denormal(float64 a, float_status *status)
3726 {
3727 if (status->flush_inputs_to_zero) {
3728 FloatParts p = float64_unpack_raw(a);
3729 if (parts_squash_denormal(p, status)) {
3730 return float64_set_sign(float64_zero, p.sign);
3731 }
3732 }
3733 return a;
3734 }
3735
3736 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3737 {
3738 if (status->flush_inputs_to_zero) {
3739 FloatParts p = bfloat16_unpack_raw(a);
3740 if (parts_squash_denormal(p, status)) {
3741 return bfloat16_set_sign(bfloat16_zero, p.sign);
3742 }
3743 }
3744 return a;
3745 }
3746
3747 /*----------------------------------------------------------------------------
3748 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3749 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3750 | input. If `zSign' is 1, the input is negated before being converted to an
3751 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3752 | is simply rounded to an integer, with the inexact exception raised if the
3753 | input cannot be represented exactly as an integer. However, if the fixed-
3754 | point input is too large, the invalid exception is raised and the largest
3755 | positive or negative integer is returned.
3756 *----------------------------------------------------------------------------*/
3757
3758 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3759 float_status *status)
3760 {
3761 int8_t roundingMode;
3762 bool roundNearestEven;
3763 int8_t roundIncrement, roundBits;
3764 int32_t z;
3765
3766 roundingMode = status->float_rounding_mode;
3767 roundNearestEven = ( roundingMode == float_round_nearest_even );
3768 switch (roundingMode) {
3769 case float_round_nearest_even:
3770 case float_round_ties_away:
3771 roundIncrement = 0x40;
3772 break;
3773 case float_round_to_zero:
3774 roundIncrement = 0;
3775 break;
3776 case float_round_up:
3777 roundIncrement = zSign ? 0 : 0x7f;
3778 break;
3779 case float_round_down:
3780 roundIncrement = zSign ? 0x7f : 0;
3781 break;
3782 case float_round_to_odd:
3783 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3784 break;
3785 default:
3786 abort();
3787 }
3788 roundBits = absZ & 0x7F;
3789 absZ = ( absZ + roundIncrement )>>7;
3790 if (!(roundBits ^ 0x40) && roundNearestEven) {
3791 absZ &= ~1;
3792 }
3793 z = absZ;
3794 if ( zSign ) z = - z;
3795 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3796 float_raise(float_flag_invalid, status);
3797 return zSign ? INT32_MIN : INT32_MAX;
3798 }
3799 if (roundBits) {
3800 status->float_exception_flags |= float_flag_inexact;
3801 }
3802 return z;
3803
3804 }
3805
3806 /*----------------------------------------------------------------------------
3807 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3808 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3809 | and returns the properly rounded 64-bit integer corresponding to the input.
3810 | If `zSign' is 1, the input is negated before being converted to an integer.
3811 | Ordinarily, the fixed-point input is simply rounded to an integer, with
3812 | the inexact exception raised if the input cannot be represented exactly as
3813 | an integer. However, if the fixed-point input is too large, the invalid
3814 | exception is raised and the largest positive or negative integer is
3815 | returned.
3816 *----------------------------------------------------------------------------*/
3817
3818 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
3819 float_status *status)
3820 {
3821 int8_t roundingMode;
3822 bool roundNearestEven, increment;
3823 int64_t z;
3824
3825 roundingMode = status->float_rounding_mode;
3826 roundNearestEven = ( roundingMode == float_round_nearest_even );
3827 switch (roundingMode) {
3828 case float_round_nearest_even:
3829 case float_round_ties_away:
3830 increment = ((int64_t) absZ1 < 0);
3831 break;
3832 case float_round_to_zero:
3833 increment = 0;
3834 break;
3835 case float_round_up:
3836 increment = !zSign && absZ1;
3837 break;
3838 case float_round_down:
3839 increment = zSign && absZ1;
3840 break;
3841 case float_round_to_odd:
3842 increment = !(absZ0 & 1) && absZ1;
3843 break;
3844 default:
3845 abort();
3846 }
3847 if ( increment ) {
3848 ++absZ0;
3849 if ( absZ0 == 0 ) goto overflow;
3850 if (!(absZ1 << 1) && roundNearestEven) {
3851 absZ0 &= ~1;
3852 }
3853 }
3854 z = absZ0;
3855 if ( zSign ) z = - z;
3856 if ( z && ( ( z < 0 ) ^ zSign ) ) {
3857 overflow:
3858 float_raise(float_flag_invalid, status);
3859 return zSign ? INT64_MIN : INT64_MAX;
3860 }
3861 if (absZ1) {
3862 status->float_exception_flags |= float_flag_inexact;
3863 }
3864 return z;
3865
3866 }
3867
3868 /*----------------------------------------------------------------------------
3869 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3870 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3871 | and returns the properly rounded 64-bit unsigned integer corresponding to the
3872 | input. Ordinarily, the fixed-point input is simply rounded to an integer,
3873 | with the inexact exception raised if the input cannot be represented exactly
3874 | as an integer. However, if the fixed-point input is too large, the invalid
3875 | exception is raised and the largest unsigned integer is returned.
3876 *----------------------------------------------------------------------------*/
3877
3878 static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
3879 uint64_t absZ1, float_status *status)
3880 {
3881 int8_t roundingMode;
3882 bool roundNearestEven, increment;
3883
3884 roundingMode = status->float_rounding_mode;
3885 roundNearestEven = (roundingMode == float_round_nearest_even);
3886 switch (roundingMode) {
3887 case float_round_nearest_even:
3888 case float_round_ties_away:
3889 increment = ((int64_t)absZ1 < 0);
3890 break;
3891 case float_round_to_zero:
3892 increment = 0;
3893 break;
3894 case float_round_up:
3895 increment = !zSign && absZ1;
3896 break;
3897 case float_round_down:
3898 increment = zSign && absZ1;
3899 break;
3900 case float_round_to_odd:
3901 increment = !(absZ0 & 1) && absZ1;
3902 break;
3903 default:
3904 abort();
3905 }
3906 if (increment) {
3907 ++absZ0;
3908 if (absZ0 == 0) {
3909 float_raise(float_flag_invalid, status);
3910 return UINT64_MAX;
3911 }
3912 if (!(absZ1 << 1) && roundNearestEven) {
3913 absZ0 &= ~1;
3914 }
3915 }
3916
3917 if (zSign && absZ0) {
3918 float_raise(float_flag_invalid, status);
3919 return 0;
3920 }
3921
3922 if (absZ1) {
3923 status->float_exception_flags |= float_flag_inexact;
3924 }
3925 return absZ0;
3926 }
3927
3928 /*----------------------------------------------------------------------------
3929 | Normalizes the subnormal single-precision floating-point value represented
3930 | by the denormalized significand `aSig'. The normalized exponent and
3931 | significand are stored at the locations pointed to by `zExpPtr' and
3932 | `zSigPtr', respectively.
3933 *----------------------------------------------------------------------------*/
3934
3935 static void
3936 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
3937 {
3938 int8_t shiftCount;
3939
3940 shiftCount = clz32(aSig) - 8;
3941 *zSigPtr = aSig<<shiftCount;
3942 *zExpPtr = 1 - shiftCount;
3943
3944 }
3945
3946 /*----------------------------------------------------------------------------
3947 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3948 | and significand `zSig', and returns the proper single-precision floating-
3949 | point value corresponding to the abstract input. Ordinarily, the abstract
3950 | value is simply rounded and packed into the single-precision format, with
3951 | the inexact exception raised if the abstract input cannot be represented
3952 | exactly. However, if the abstract value is too large, the overflow and
3953 | inexact exceptions are raised and an infinity or maximal finite value is
3954 | returned. If the abstract value is too small, the input value is rounded to
3955 | a subnormal number, and the underflow and inexact exceptions are raised if
3956 | the abstract input cannot be represented exactly as a subnormal single-
3957 | precision floating-point number.
3958 | The input significand `zSig' has its binary point between bits 30
3959 | and 29, which is 7 bits to the left of the usual location. This shifted
3960 | significand must be normalized or smaller. If `zSig' is not normalized,
3961 | `zExp' must be 0; in that case, the result returned is a subnormal number,
3962 | and it must not require rounding. In the usual case that `zSig' is
3963 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3964 | The handling of underflow and overflow follows the IEC/IEEE Standard for
3965 | Binary Floating-Point Arithmetic.
3966 *----------------------------------------------------------------------------*/
3967
3968 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
3969 float_status *status)
3970 {
3971 int8_t roundingMode;
3972 bool roundNearestEven;
3973 int8_t roundIncrement, roundBits;
3974 bool isTiny;
3975
3976 roundingMode = status->float_rounding_mode;
3977 roundNearestEven = ( roundingMode == float_round_nearest_even );
3978 switch (roundingMode) {
3979 case float_round_nearest_even:
3980 case float_round_ties_away:
3981 roundIncrement = 0x40;
3982 break;
3983 case float_round_to_zero:
3984 roundIncrement = 0;
3985 break;
3986 case float_round_up:
3987 roundIncrement = zSign ? 0 : 0x7f;
3988 break;
3989 case float_round_down:
3990 roundIncrement = zSign ? 0x7f : 0;
3991 break;
3992 case float_round_to_odd:
3993 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3994 break;
3995 default:
3996 abort();
3997 break;
3998 }
3999 roundBits = zSig & 0x7F;
4000 if ( 0xFD <= (uint16_t) zExp ) {
4001 if ( ( 0xFD < zExp )
4002 || ( ( zExp == 0xFD )
4003 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4004 ) {
4005 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4006 roundIncrement != 0;
4007 float_raise(float_flag_overflow | float_flag_inexact, status);
4008 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4009 }
4010 if ( zExp < 0 ) {
4011 if (status->flush_to_zero) {
4012 float_raise(float_flag_output_denormal, status);
4013 return packFloat32(zSign, 0, 0);
4014 }
4015 isTiny = status->tininess_before_rounding
4016 || (zExp < -1)
4017 || (zSig + roundIncrement < 0x80000000);
4018 shift32RightJamming( zSig, - zExp, &zSig );
4019 zExp = 0;
4020 roundBits = zSig & 0x7F;
4021 if (isTiny && roundBits) {
4022 float_raise(float_flag_underflow, status);
4023 }
4024 if (roundingMode == float_round_to_odd) {
4025 /*
4026 * For round-to-odd case, the roundIncrement depends on
4027 * zSig which just changed.
4028 */
4029 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4030 }
4031 }
4032 }
4033 if (roundBits) {
4034 status->float_exception_flags |= float_flag_inexact;
4035 }
4036 zSig = ( zSig + roundIncrement )>>7;
4037 if (!(roundBits ^ 0x40) && roundNearestEven) {
4038 zSig &= ~1;
4039 }
4040 if ( zSig == 0 ) zExp = 0;
4041 return packFloat32( zSign, zExp, zSig );
4042
4043 }
4044
4045 /*----------------------------------------------------------------------------
4046 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4047 | and significand `zSig', and returns the proper single-precision floating-
4048 | point value corresponding to the abstract input. This routine is just like
4049 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4050 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4051 | floating-point exponent.
4052 *----------------------------------------------------------------------------*/
4053
4054 static float32
4055 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4056 float_status *status)
4057 {
4058 int8_t shiftCount;
4059
4060 shiftCount = clz32(zSig) - 1;
4061 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4062 status);
4063
4064 }
4065
4066 /*----------------------------------------------------------------------------
4067 | Normalizes the subnormal double-precision floating-point value represented
4068 | by the denormalized significand `aSig'. The normalized exponent and
4069 | significand are stored at the locations pointed to by `zExpPtr' and
4070 | `zSigPtr', respectively.
4071 *----------------------------------------------------------------------------*/
4072
4073 static void
4074 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4075 {
4076 int8_t shiftCount;
4077
4078 shiftCount = clz64(aSig) - 11;
4079 *zSigPtr = aSig<<shiftCount;
4080 *zExpPtr = 1 - shiftCount;
4081
4082 }
4083
4084 /*----------------------------------------------------------------------------
4085 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4086 | double-precision floating-point value, returning the result. After being
4087 | shifted into the proper positions, the three fields are simply added
4088 | together to form the result. This means that any integer portion of `zSig'
4089 | will be added into the exponent. Since a properly normalized significand
4090 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4091 | than the desired result exponent whenever `zSig' is a complete, normalized
4092 | significand.
4093 *----------------------------------------------------------------------------*/
4094
4095 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4096 {
4097
4098 return make_float64(
4099 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4100
4101 }
4102
4103 /*----------------------------------------------------------------------------
4104 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4105 | and significand `zSig', and returns the proper double-precision floating-
4106 | point value corresponding to the abstract input. Ordinarily, the abstract
4107 | value is simply rounded and packed into the double-precision format, with
4108 | the inexact exception raised if the abstract input cannot be represented
4109 | exactly. However, if the abstract value is too large, the overflow and
4110 | inexact exceptions are raised and an infinity or maximal finite value is
4111 | returned. If the abstract value is too small, the input value is rounded to
4112 | a subnormal number, and the underflow and inexact exceptions are raised if
4113 | the abstract input cannot be represented exactly as a subnormal double-
4114 | precision floating-point number.
4115 | The input significand `zSig' has its binary point between bits 62
4116 | and 61, which is 10 bits to the left of the usual location. This shifted
4117 | significand must be normalized or smaller. If `zSig' is not normalized,
4118 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4119 | and it must not require rounding. In the usual case that `zSig' is
4120 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4121 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4122 | Binary Floating-Point Arithmetic.
4123 *----------------------------------------------------------------------------*/
4124
4125 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4126 float_status *status)
4127 {
4128 int8_t roundingMode;
4129 bool roundNearestEven;
4130 int roundIncrement, roundBits;
4131 bool isTiny;
4132
4133 roundingMode = status->float_rounding_mode;
4134 roundNearestEven = ( roundingMode == float_round_nearest_even );
4135 switch (roundingMode) {
4136 case float_round_nearest_even:
4137 case float_round_ties_away:
4138 roundIncrement = 0x200;
4139 break;
4140 case float_round_to_zero:
4141 roundIncrement = 0;
4142 break;
4143 case float_round_up:
4144 roundIncrement = zSign ? 0 : 0x3ff;
4145 break;
4146 case float_round_down:
4147 roundIncrement = zSign ? 0x3ff : 0;
4148 break;
4149 case float_round_to_odd:
4150 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4151 break;
4152 default:
4153 abort();
4154 }
4155 roundBits = zSig & 0x3FF;
4156 if ( 0x7FD <= (uint16_t) zExp ) {
4157 if ( ( 0x7FD < zExp )
4158 || ( ( zExp == 0x7FD )
4159 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4160 ) {
4161 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4162 roundIncrement != 0;
4163 float_raise(float_flag_overflow | float_flag_inexact, status);
4164 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4165 }
4166 if ( zExp < 0 ) {
4167 if (status->flush_to_zero) {
4168 float_raise(float_flag_output_denormal, status);
4169 return packFloat64(zSign, 0, 0);
4170 }
4171 isTiny = status->tininess_before_rounding
4172 || (zExp < -1)
4173 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4174 shift64RightJamming( zSig, - zExp, &zSig );
4175 zExp = 0;
4176 roundBits = zSig & 0x3FF;
4177 if (isTiny && roundBits) {
4178 float_raise(float_flag_underflow, status);
4179 }
4180 if (roundingMode == float_round_to_odd) {
4181 /*
4182 * For round-to-odd case, the roundIncrement depends on
4183 * zSig which just changed.
4184 */
4185 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4186 }
4187 }
4188 }
4189 if (roundBits) {
4190 status->float_exception_flags |= float_flag_inexact;
4191 }
4192 zSig = ( zSig + roundIncrement )>>10;
4193 if (!(roundBits ^ 0x200) && roundNearestEven) {
4194 zSig &= ~1;
4195 }
4196 if ( zSig == 0 ) zExp = 0;
4197 return packFloat64( zSign, zExp, zSig );
4198
4199 }
4200
4201 /*----------------------------------------------------------------------------
4202 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4203 | and significand `zSig', and returns the proper double-precision floating-
4204 | point value corresponding to the abstract input. This routine is just like
4205 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4206 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4207 | floating-point exponent.
4208 *----------------------------------------------------------------------------*/
4209
4210 static float64
4211 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4212 float_status *status)
4213 {
4214 int8_t shiftCount;
4215
4216 shiftCount = clz64(zSig) - 1;
4217 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4218 status);
4219
4220 }
4221
4222 /*----------------------------------------------------------------------------
4223 | Normalizes the subnormal extended double-precision floating-point value
4224 | represented by the denormalized significand `aSig'. The normalized exponent
4225 | and significand are stored at the locations pointed to by `zExpPtr' and
4226 | `zSigPtr', respectively.
4227 *----------------------------------------------------------------------------*/
4228
4229 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4230 uint64_t *zSigPtr)
4231 {
4232 int8_t shiftCount;
4233
4234 shiftCount = clz64(aSig);
4235 *zSigPtr = aSig<<shiftCount;
4236 *zExpPtr = 1 - shiftCount;
4237 }
4238
4239 /*----------------------------------------------------------------------------
4240 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4241 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4242 | and returns the proper extended double-precision floating-point value
4243 | corresponding to the abstract input. Ordinarily, the abstract value is
4244 | rounded and packed into the extended double-precision format, with the
4245 | inexact exception raised if the abstract input cannot be represented
4246 | exactly. However, if the abstract value is too large, the overflow and
4247 | inexact exceptions are raised and an infinity or maximal finite value is
4248 | returned. If the abstract value is too small, the input value is rounded to
4249 | a subnormal number, and the underflow and inexact exceptions are raised if
4250 | the abstract input cannot be represented exactly as a subnormal extended
4251 | double-precision floating-point number.
4252 | If `roundingPrecision' is 32 or 64, the result is rounded to the same
4253 | number of bits as single or double precision, respectively. Otherwise, the
4254 | result is rounded to the full precision of the extended double-precision
4255 | format.
4256 | The input significand must be normalized or smaller. If the input
4257 | significand is not normalized, `zExp' must be 0; in that case, the result
4258 | returned is a subnormal number, and it must not require rounding. The
4259 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4260 | Floating-Point Arithmetic.
4261 *----------------------------------------------------------------------------*/
4262
4263 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4264 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4265 float_status *status)
4266 {
4267 int8_t roundingMode;
4268 bool roundNearestEven, increment, isTiny;
4269 int64_t roundIncrement, roundMask, roundBits;
4270
4271 roundingMode = status->float_rounding_mode;
4272 roundNearestEven = ( roundingMode == float_round_nearest_even );
4273 if ( roundingPrecision == 80 ) goto precision80;
4274 if ( roundingPrecision == 64 ) {
4275 roundIncrement = UINT64_C(0x0000000000000400);
4276 roundMask = UINT64_C(0x00000000000007FF);
4277 }
4278 else if ( roundingPrecision == 32 ) {
4279 roundIncrement = UINT64_C(0x0000008000000000);
4280 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4281 }
4282 else {
4283 goto precision80;
4284 }
4285 zSig0 |= ( zSig1 != 0 );
4286 switch (roundingMode) {
4287 case float_round_nearest_even:
4288 case float_round_ties_away:
4289 break;
4290 case float_round_to_zero:
4291 roundIncrement = 0;
4292 break;
4293 case float_round_up:
4294 roundIncrement = zSign ? 0 : roundMask;
4295 break;
4296 case float_round_down:
4297 roundIncrement = zSign ? roundMask : 0;
4298 break;
4299 default:
4300 abort();
4301 }
4302 roundBits = zSig0 & roundMask;
4303 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4304 if ( ( 0x7FFE < zExp )
4305 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4306 ) {
4307 goto overflow;
4308 }
4309 if ( zExp <= 0 ) {
4310 if (status->flush_to_zero) {
4311 float_raise(float_flag_output_denormal, status);
4312 return packFloatx80(zSign, 0, 0);
4313 }
4314 isTiny = status->tininess_before_rounding
4315 || (zExp < 0 )
4316 || (zSig0 <= zSig0 + roundIncrement);
4317 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4318 zExp = 0;
4319 roundBits = zSig0 & roundMask;
4320 if (isTiny && roundBits) {
4321 float_raise(float_flag_underflow, status);
4322 }
4323 if (roundBits) {
4324 status->float_exception_flags |= float_flag_inexact;
4325 }
4326 zSig0 += roundIncrement;
4327 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4328 roundIncrement = roundMask + 1;
4329 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4330 roundMask |= roundIncrement;
4331 }
4332 zSig0 &= ~ roundMask;
4333 return packFloatx80( zSign, zExp, zSig0 );
4334 }
4335 }
4336 if (roundBits) {
4337 status->float_exception_flags |= float_flag_inexact;
4338 }
4339 zSig0 += roundIncrement;
4340 if ( zSig0 < roundIncrement ) {
4341 ++zExp;
4342 zSig0 = UINT64_C(0x8000000000000000);
4343 }
4344 roundIncrement = roundMask + 1;
4345 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4346 roundMask |= roundIncrement;
4347 }
4348 zSig0 &= ~ roundMask;
4349 if ( zSig0 == 0 ) zExp = 0;
4350 return packFloatx80( zSign, zExp, zSig0 );
4351 precision80:
4352 switch (roundingMode) {
4353 case float_round_nearest_even:
4354 case float_round_ties_away:
4355 increment = ((int64_t)zSig1 < 0);
4356 break;
4357 case float_round_to_zero:
4358 increment = 0;
4359 break;
4360 case float_round_up:
4361 increment = !zSign && zSig1;
4362 break;
4363 case float_round_down:
4364 increment = zSign && zSig1;
4365 break;
4366 default:
4367 abort();
4368 }
4369 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4370 if ( ( 0x7FFE < zExp )
4371 || ( ( zExp == 0x7FFE )
4372 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4373 && increment
4374 )
4375 ) {
4376 roundMask = 0;
4377 overflow:
4378 float_raise(float_flag_overflow | float_flag_inexact, status);
4379 if ( ( roundingMode == float_round_to_zero )
4380 || ( zSign && ( roundingMode == float_round_up ) )
4381 || ( ! zSign && ( roundingMode == float_round_down ) )
4382 ) {
4383 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4384 }
4385 return packFloatx80(zSign,
4386 floatx80_infinity_high,
4387 floatx80_infinity_low);
4388 }
4389 if ( zExp <= 0 ) {
4390 isTiny = status->tininess_before_rounding
4391 || (zExp < 0)
4392 || !increment
4393 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4394 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4395 zExp = 0;
4396 if (isTiny && zSig1) {
4397 float_raise(float_flag_underflow, status);
4398 }
4399 if (zSig1) {
4400 status->float_exception_flags |= float_flag_inexact;
4401 }
4402 switch (roundingMode) {
4403 case float_round_nearest_even:
4404 case float_round_ties_away:
4405 increment = ((int64_t)zSig1 < 0);
4406 break;
4407 case float_round_to_zero:
4408 increment = 0;
4409 break;
4410 case float_round_up:
4411 increment = !zSign && zSig1;
4412 break;
4413 case float_round_down:
4414 increment = zSign && zSig1;
4415 break;
4416 default:
4417 abort();
4418 }
4419 if ( increment ) {
4420 ++zSig0;
4421 if (!(zSig1 << 1) && roundNearestEven) {
4422 zSig0 &= ~1;
4423 }
4424 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4425 }
4426 return packFloatx80( zSign, zExp, zSig0 );
4427 }
4428 }
4429 if (zSig1) {
4430 status->float_exception_flags |= float_flag_inexact;
4431 }
4432 if ( increment ) {
4433 ++zSig0;
4434 if ( zSig0 == 0 ) {
4435 ++zExp;
4436 zSig0 = UINT64_C(0x8000000000000000);
4437 }
4438 else {
4439 if (!(zSig1 << 1) && roundNearestEven) {
4440 zSig0 &= ~1;
4441 }
4442 }
4443 }
4444 else {
4445 if ( zSig0 == 0 ) zExp = 0;
4446 }
4447 return packFloatx80( zSign, zExp, zSig0 );
4448
4449 }
4450
4451 /*----------------------------------------------------------------------------
4452 | Takes an abstract floating-point value having sign `zSign', exponent
4453 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4454 | and returns the proper extended double-precision floating-point value
4455 | corresponding to the abstract input. This routine is just like
4456 | `roundAndPackFloatx80' except that the input significand does not have to be
4457 | normalized.
4458 *----------------------------------------------------------------------------*/
4459
4460 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4461 bool zSign, int32_t zExp,
4462 uint64_t zSig0, uint64_t zSig1,
4463 float_status *status)
4464 {
4465 int8_t shiftCount;
4466
4467 if ( zSig0 == 0 ) {
4468 zSig0 = zSig1;
4469 zSig1 = 0;
4470 zExp -= 64;
4471 }
4472 shiftCount = clz64(zSig0);
4473 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4474 zExp -= shiftCount;
4475 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4476 zSig0, zSig1, status);
4477
4478 }
4479
4480 /*----------------------------------------------------------------------------
4481 | Returns the least-significant 64 fraction bits of the quadruple-precision
4482 | floating-point value `a'.
4483 *----------------------------------------------------------------------------*/
4484
4485 static inline uint64_t extractFloat128Frac1( float128 a )
4486 {
4487
4488 return a.low;
4489
4490 }
4491
4492 /*----------------------------------------------------------------------------
4493 | Returns the most-significant 48 fraction bits of the quadruple-precision
4494 | floating-point value `a'.
4495 *----------------------------------------------------------------------------*/
4496
4497 static inline uint64_t extractFloat128Frac0( float128 a )
4498 {
4499
4500 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4501
4502 }
4503
4504 /*----------------------------------------------------------------------------
4505 | Returns the exponent bits of the quadruple-precision floating-point value
4506 | `a'.
4507 *----------------------------------------------------------------------------*/
4508
4509 static inline int32_t extractFloat128Exp( float128 a )
4510 {
4511
4512 return ( a.high>>48 ) & 0x7FFF;
4513
4514 }
4515
4516 /*----------------------------------------------------------------------------
4517 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4518 *----------------------------------------------------------------------------*/
4519
4520 static inline bool extractFloat128Sign(float128 a)
4521 {
4522 return a.high >> 63;
4523 }
4524
4525 /*----------------------------------------------------------------------------
4526 | Normalizes the subnormal quadruple-precision floating-point value
4527 | represented by the denormalized significand formed by the concatenation of
4528 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4529 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4530 | significand are stored at the location pointed to by `zSig0Ptr', and the
4531 | least significant 64 bits of the normalized significand are stored at the
4532 | location pointed to by `zSig1Ptr'.
4533 *----------------------------------------------------------------------------*/
4534
4535 static void
4536 normalizeFloat128Subnormal(
4537 uint64_t aSig0,
4538 uint64_t aSig1,
4539 int32_t *zExpPtr,
4540 uint64_t *zSig0Ptr,
4541 uint64_t *zSig1Ptr
4542 )
4543 {
4544 int8_t shiftCount;
4545
4546 if ( aSig0 == 0 ) {
4547 shiftCount = clz64(aSig1) - 15;
4548 if ( shiftCount < 0 ) {
4549 *zSig0Ptr = aSig1>>( - shiftCount );
4550 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4551 }
4552 else {
4553 *zSig0Ptr = aSig1<<shiftCount;
4554 *zSig1Ptr = 0;
4555 }
4556 *zExpPtr = - shiftCount - 63;
4557 }
4558 else {
4559 shiftCount = clz64(aSig0) - 15;
4560 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4561