target/arm: Convert Neon VCVT fixed-point to gvec
[qemu.git] / target / i386 / fpu_helper.c
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
28 #include "fpu/softfloat-macros.h"
29
30 #ifdef CONFIG_SOFTMMU
31 #include "hw/irq.h"
32 #endif
33
34 #define FPU_RC_MASK 0xc00
35 #define FPU_RC_NEAR 0x000
36 #define FPU_RC_DOWN 0x400
37 #define FPU_RC_UP 0x800
38 #define FPU_RC_CHOP 0xc00
39
40 #define MAXTAN 9223372036854775808.0
41
42 /* the following deal with x86 long double-precision numbers */
43 #define MAXEXPD 0x7fff
44 #define EXPBIAS 16383
45 #define EXPD(fp) (fp.l.upper & 0x7fff)
46 #define SIGND(fp) ((fp.l.upper) & 0x8000)
47 #define MANTD(fp) (fp.l.lower)
48 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
49
50 #define FPUS_IE (1 << 0)
51 #define FPUS_DE (1 << 1)
52 #define FPUS_ZE (1 << 2)
53 #define FPUS_OE (1 << 3)
54 #define FPUS_UE (1 << 4)
55 #define FPUS_PE (1 << 5)
56 #define FPUS_SF (1 << 6)
57 #define FPUS_SE (1 << 7)
58 #define FPUS_B (1 << 15)
59
60 #define FPUC_EM 0x3f
61
62 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
63 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
64 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
65 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
66 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
67 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
68 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
69 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
70
71 #if !defined(CONFIG_USER_ONLY)
72 static qemu_irq ferr_irq;
73
74 void x86_register_ferr_irq(qemu_irq irq)
75 {
76 ferr_irq = irq;
77 }
78
79 static void cpu_clear_ignne(void)
80 {
81 CPUX86State *env = &X86_CPU(first_cpu)->env;
82 env->hflags2 &= ~HF2_IGNNE_MASK;
83 }
84
85 void cpu_set_ignne(void)
86 {
87 CPUX86State *env = &X86_CPU(first_cpu)->env;
88 env->hflags2 |= HF2_IGNNE_MASK;
89 /*
90 * We get here in response to a write to port F0h. The chipset should
91 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
92 * cleared, because FERR# and FP_IRQ are two separate pins on real
93 * hardware. However, we don't model FERR# as a qemu_irq, so we just
94 * do directly what the chipset would do, i.e. deassert FP_IRQ.
95 */
96 qemu_irq_lower(ferr_irq);
97 }
98 #endif
99
100
101 static inline void fpush(CPUX86State *env)
102 {
103 env->fpstt = (env->fpstt - 1) & 7;
104 env->fptags[env->fpstt] = 0; /* validate stack entry */
105 }
106
107 static inline void fpop(CPUX86State *env)
108 {
109 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
110 env->fpstt = (env->fpstt + 1) & 7;
111 }
112
113 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
114 uintptr_t retaddr)
115 {
116 CPU_LDoubleU temp;
117
118 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
119 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
120 return temp.d;
121 }
122
123 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
124 uintptr_t retaddr)
125 {
126 CPU_LDoubleU temp;
127
128 temp.d = f;
129 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
130 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
131 }
132
133 /* x87 FPU helpers */
134
135 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
136 {
137 union {
138 float64 f64;
139 double d;
140 } u;
141
142 u.f64 = floatx80_to_float64(a, &env->fp_status);
143 return u.d;
144 }
145
146 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
147 {
148 union {
149 float64 f64;
150 double d;
151 } u;
152
153 u.d = a;
154 return float64_to_floatx80(u.f64, &env->fp_status);
155 }
156
157 static void fpu_set_exception(CPUX86State *env, int mask)
158 {
159 env->fpus |= mask;
160 if (env->fpus & (~env->fpuc & FPUC_EM)) {
161 env->fpus |= FPUS_SE | FPUS_B;
162 }
163 }
164
165 static inline uint8_t save_exception_flags(CPUX86State *env)
166 {
167 uint8_t old_flags = get_float_exception_flags(&env->fp_status);
168 set_float_exception_flags(0, &env->fp_status);
169 return old_flags;
170 }
171
172 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
173 {
174 uint8_t new_flags = get_float_exception_flags(&env->fp_status);
175 float_raise(old_flags, &env->fp_status);
176 fpu_set_exception(env,
177 ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
178 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
179 (new_flags & float_flag_overflow ? FPUS_OE : 0) |
180 (new_flags & float_flag_underflow ? FPUS_UE : 0) |
181 (new_flags & float_flag_inexact ? FPUS_PE : 0) |
182 (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
183 }
184
185 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
186 {
187 uint8_t old_flags = save_exception_flags(env);
188 floatx80 ret = floatx80_div(a, b, &env->fp_status);
189 merge_exception_flags(env, old_flags);
190 return ret;
191 }
192
193 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
194 {
195 if (env->cr[0] & CR0_NE_MASK) {
196 raise_exception_ra(env, EXCP10_COPR, retaddr);
197 }
198 #if !defined(CONFIG_USER_ONLY)
199 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
200 qemu_irq_raise(ferr_irq);
201 }
202 #endif
203 }
204
205 void helper_flds_FT0(CPUX86State *env, uint32_t val)
206 {
207 uint8_t old_flags = save_exception_flags(env);
208 union {
209 float32 f;
210 uint32_t i;
211 } u;
212
213 u.i = val;
214 FT0 = float32_to_floatx80(u.f, &env->fp_status);
215 merge_exception_flags(env, old_flags);
216 }
217
218 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
219 {
220 uint8_t old_flags = save_exception_flags(env);
221 union {
222 float64 f;
223 uint64_t i;
224 } u;
225
226 u.i = val;
227 FT0 = float64_to_floatx80(u.f, &env->fp_status);
228 merge_exception_flags(env, old_flags);
229 }
230
231 void helper_fildl_FT0(CPUX86State *env, int32_t val)
232 {
233 FT0 = int32_to_floatx80(val, &env->fp_status);
234 }
235
236 void helper_flds_ST0(CPUX86State *env, uint32_t val)
237 {
238 uint8_t old_flags = save_exception_flags(env);
239 int new_fpstt;
240 union {
241 float32 f;
242 uint32_t i;
243 } u;
244
245 new_fpstt = (env->fpstt - 1) & 7;
246 u.i = val;
247 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
248 env->fpstt = new_fpstt;
249 env->fptags[new_fpstt] = 0; /* validate stack entry */
250 merge_exception_flags(env, old_flags);
251 }
252
253 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
254 {
255 uint8_t old_flags = save_exception_flags(env);
256 int new_fpstt;
257 union {
258 float64 f;
259 uint64_t i;
260 } u;
261
262 new_fpstt = (env->fpstt - 1) & 7;
263 u.i = val;
264 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
265 env->fpstt = new_fpstt;
266 env->fptags[new_fpstt] = 0; /* validate stack entry */
267 merge_exception_flags(env, old_flags);
268 }
269
270 void helper_fildl_ST0(CPUX86State *env, int32_t val)
271 {
272 int new_fpstt;
273
274 new_fpstt = (env->fpstt - 1) & 7;
275 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
276 env->fpstt = new_fpstt;
277 env->fptags[new_fpstt] = 0; /* validate stack entry */
278 }
279
280 void helper_fildll_ST0(CPUX86State *env, int64_t val)
281 {
282 int new_fpstt;
283
284 new_fpstt = (env->fpstt - 1) & 7;
285 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
286 env->fpstt = new_fpstt;
287 env->fptags[new_fpstt] = 0; /* validate stack entry */
288 }
289
290 uint32_t helper_fsts_ST0(CPUX86State *env)
291 {
292 uint8_t old_flags = save_exception_flags(env);
293 union {
294 float32 f;
295 uint32_t i;
296 } u;
297
298 u.f = floatx80_to_float32(ST0, &env->fp_status);
299 merge_exception_flags(env, old_flags);
300 return u.i;
301 }
302
303 uint64_t helper_fstl_ST0(CPUX86State *env)
304 {
305 uint8_t old_flags = save_exception_flags(env);
306 union {
307 float64 f;
308 uint64_t i;
309 } u;
310
311 u.f = floatx80_to_float64(ST0, &env->fp_status);
312 merge_exception_flags(env, old_flags);
313 return u.i;
314 }
315
316 int32_t helper_fist_ST0(CPUX86State *env)
317 {
318 uint8_t old_flags = save_exception_flags(env);
319 int32_t val;
320
321 val = floatx80_to_int32(ST0, &env->fp_status);
322 if (val != (int16_t)val) {
323 set_float_exception_flags(float_flag_invalid, &env->fp_status);
324 val = -32768;
325 }
326 merge_exception_flags(env, old_flags);
327 return val;
328 }
329
330 int32_t helper_fistl_ST0(CPUX86State *env)
331 {
332 uint8_t old_flags = save_exception_flags(env);
333 int32_t val;
334
335 val = floatx80_to_int32(ST0, &env->fp_status);
336 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
337 val = 0x80000000;
338 }
339 merge_exception_flags(env, old_flags);
340 return val;
341 }
342
343 int64_t helper_fistll_ST0(CPUX86State *env)
344 {
345 uint8_t old_flags = save_exception_flags(env);
346 int64_t val;
347
348 val = floatx80_to_int64(ST0, &env->fp_status);
349 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
350 val = 0x8000000000000000ULL;
351 }
352 merge_exception_flags(env, old_flags);
353 return val;
354 }
355
356 int32_t helper_fistt_ST0(CPUX86State *env)
357 {
358 uint8_t old_flags = save_exception_flags(env);
359 int32_t val;
360
361 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
362 if (val != (int16_t)val) {
363 set_float_exception_flags(float_flag_invalid, &env->fp_status);
364 val = -32768;
365 }
366 merge_exception_flags(env, old_flags);
367 return val;
368 }
369
370 int32_t helper_fisttl_ST0(CPUX86State *env)
371 {
372 uint8_t old_flags = save_exception_flags(env);
373 int32_t val;
374
375 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
376 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
377 val = 0x80000000;
378 }
379 merge_exception_flags(env, old_flags);
380 return val;
381 }
382
383 int64_t helper_fisttll_ST0(CPUX86State *env)
384 {
385 uint8_t old_flags = save_exception_flags(env);
386 int64_t val;
387
388 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
389 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
390 val = 0x8000000000000000ULL;
391 }
392 merge_exception_flags(env, old_flags);
393 return val;
394 }
395
396 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
397 {
398 int new_fpstt;
399
400 new_fpstt = (env->fpstt - 1) & 7;
401 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
402 env->fpstt = new_fpstt;
403 env->fptags[new_fpstt] = 0; /* validate stack entry */
404 }
405
406 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
407 {
408 helper_fstt(env, ST0, ptr, GETPC());
409 }
410
411 void helper_fpush(CPUX86State *env)
412 {
413 fpush(env);
414 }
415
416 void helper_fpop(CPUX86State *env)
417 {
418 fpop(env);
419 }
420
421 void helper_fdecstp(CPUX86State *env)
422 {
423 env->fpstt = (env->fpstt - 1) & 7;
424 env->fpus &= ~0x4700;
425 }
426
427 void helper_fincstp(CPUX86State *env)
428 {
429 env->fpstt = (env->fpstt + 1) & 7;
430 env->fpus &= ~0x4700;
431 }
432
433 /* FPU move */
434
435 void helper_ffree_STN(CPUX86State *env, int st_index)
436 {
437 env->fptags[(env->fpstt + st_index) & 7] = 1;
438 }
439
440 void helper_fmov_ST0_FT0(CPUX86State *env)
441 {
442 ST0 = FT0;
443 }
444
445 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
446 {
447 FT0 = ST(st_index);
448 }
449
450 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
451 {
452 ST0 = ST(st_index);
453 }
454
455 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
456 {
457 ST(st_index) = ST0;
458 }
459
460 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
461 {
462 floatx80 tmp;
463
464 tmp = ST(st_index);
465 ST(st_index) = ST0;
466 ST0 = tmp;
467 }
468
469 /* FPU operations */
470
471 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
472
473 void helper_fcom_ST0_FT0(CPUX86State *env)
474 {
475 uint8_t old_flags = save_exception_flags(env);
476 FloatRelation ret;
477
478 ret = floatx80_compare(ST0, FT0, &env->fp_status);
479 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
480 merge_exception_flags(env, old_flags);
481 }
482
483 void helper_fucom_ST0_FT0(CPUX86State *env)
484 {
485 uint8_t old_flags = save_exception_flags(env);
486 FloatRelation ret;
487
488 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
489 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
490 merge_exception_flags(env, old_flags);
491 }
492
493 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
494
495 void helper_fcomi_ST0_FT0(CPUX86State *env)
496 {
497 uint8_t old_flags = save_exception_flags(env);
498 int eflags;
499 FloatRelation ret;
500
501 ret = floatx80_compare(ST0, FT0, &env->fp_status);
502 eflags = cpu_cc_compute_all(env, CC_OP);
503 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
504 CC_SRC = eflags;
505 merge_exception_flags(env, old_flags);
506 }
507
508 void helper_fucomi_ST0_FT0(CPUX86State *env)
509 {
510 uint8_t old_flags = save_exception_flags(env);
511 int eflags;
512 FloatRelation ret;
513
514 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
515 eflags = cpu_cc_compute_all(env, CC_OP);
516 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
517 CC_SRC = eflags;
518 merge_exception_flags(env, old_flags);
519 }
520
521 void helper_fadd_ST0_FT0(CPUX86State *env)
522 {
523 uint8_t old_flags = save_exception_flags(env);
524 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
525 merge_exception_flags(env, old_flags);
526 }
527
528 void helper_fmul_ST0_FT0(CPUX86State *env)
529 {
530 uint8_t old_flags = save_exception_flags(env);
531 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
532 merge_exception_flags(env, old_flags);
533 }
534
535 void helper_fsub_ST0_FT0(CPUX86State *env)
536 {
537 uint8_t old_flags = save_exception_flags(env);
538 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
539 merge_exception_flags(env, old_flags);
540 }
541
542 void helper_fsubr_ST0_FT0(CPUX86State *env)
543 {
544 uint8_t old_flags = save_exception_flags(env);
545 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
546 merge_exception_flags(env, old_flags);
547 }
548
549 void helper_fdiv_ST0_FT0(CPUX86State *env)
550 {
551 ST0 = helper_fdiv(env, ST0, FT0);
552 }
553
554 void helper_fdivr_ST0_FT0(CPUX86State *env)
555 {
556 ST0 = helper_fdiv(env, FT0, ST0);
557 }
558
559 /* fp operations between STN and ST0 */
560
561 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
562 {
563 uint8_t old_flags = save_exception_flags(env);
564 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
565 merge_exception_flags(env, old_flags);
566 }
567
568 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
569 {
570 uint8_t old_flags = save_exception_flags(env);
571 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
572 merge_exception_flags(env, old_flags);
573 }
574
575 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
576 {
577 uint8_t old_flags = save_exception_flags(env);
578 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
579 merge_exception_flags(env, old_flags);
580 }
581
582 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
583 {
584 uint8_t old_flags = save_exception_flags(env);
585 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
586 merge_exception_flags(env, old_flags);
587 }
588
589 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
590 {
591 floatx80 *p;
592
593 p = &ST(st_index);
594 *p = helper_fdiv(env, *p, ST0);
595 }
596
597 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
598 {
599 floatx80 *p;
600
601 p = &ST(st_index);
602 *p = helper_fdiv(env, ST0, *p);
603 }
604
605 /* misc FPU operations */
606 void helper_fchs_ST0(CPUX86State *env)
607 {
608 ST0 = floatx80_chs(ST0);
609 }
610
611 void helper_fabs_ST0(CPUX86State *env)
612 {
613 ST0 = floatx80_abs(ST0);
614 }
615
616 void helper_fld1_ST0(CPUX86State *env)
617 {
618 ST0 = floatx80_one;
619 }
620
621 void helper_fldl2t_ST0(CPUX86State *env)
622 {
623 switch (env->fpuc & FPU_RC_MASK) {
624 case FPU_RC_UP:
625 ST0 = floatx80_l2t_u;
626 break;
627 default:
628 ST0 = floatx80_l2t;
629 break;
630 }
631 }
632
633 void helper_fldl2e_ST0(CPUX86State *env)
634 {
635 switch (env->fpuc & FPU_RC_MASK) {
636 case FPU_RC_DOWN:
637 case FPU_RC_CHOP:
638 ST0 = floatx80_l2e_d;
639 break;
640 default:
641 ST0 = floatx80_l2e;
642 break;
643 }
644 }
645
646 void helper_fldpi_ST0(CPUX86State *env)
647 {
648 switch (env->fpuc & FPU_RC_MASK) {
649 case FPU_RC_DOWN:
650 case FPU_RC_CHOP:
651 ST0 = floatx80_pi_d;
652 break;
653 default:
654 ST0 = floatx80_pi;
655 break;
656 }
657 }
658
659 void helper_fldlg2_ST0(CPUX86State *env)
660 {
661 switch (env->fpuc & FPU_RC_MASK) {
662 case FPU_RC_DOWN:
663 case FPU_RC_CHOP:
664 ST0 = floatx80_lg2_d;
665 break;
666 default:
667 ST0 = floatx80_lg2;
668 break;
669 }
670 }
671
672 void helper_fldln2_ST0(CPUX86State *env)
673 {
674 switch (env->fpuc & FPU_RC_MASK) {
675 case FPU_RC_DOWN:
676 case FPU_RC_CHOP:
677 ST0 = floatx80_ln2_d;
678 break;
679 default:
680 ST0 = floatx80_ln2;
681 break;
682 }
683 }
684
685 void helper_fldz_ST0(CPUX86State *env)
686 {
687 ST0 = floatx80_zero;
688 }
689
690 void helper_fldz_FT0(CPUX86State *env)
691 {
692 FT0 = floatx80_zero;
693 }
694
695 uint32_t helper_fnstsw(CPUX86State *env)
696 {
697 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
698 }
699
700 uint32_t helper_fnstcw(CPUX86State *env)
701 {
702 return env->fpuc;
703 }
704
705 void update_fp_status(CPUX86State *env)
706 {
707 int rnd_type;
708
709 /* set rounding mode */
710 switch (env->fpuc & FPU_RC_MASK) {
711 default:
712 case FPU_RC_NEAR:
713 rnd_type = float_round_nearest_even;
714 break;
715 case FPU_RC_DOWN:
716 rnd_type = float_round_down;
717 break;
718 case FPU_RC_UP:
719 rnd_type = float_round_up;
720 break;
721 case FPU_RC_CHOP:
722 rnd_type = float_round_to_zero;
723 break;
724 }
725 set_float_rounding_mode(rnd_type, &env->fp_status);
726 switch ((env->fpuc >> 8) & 3) {
727 case 0:
728 rnd_type = 32;
729 break;
730 case 2:
731 rnd_type = 64;
732 break;
733 case 3:
734 default:
735 rnd_type = 80;
736 break;
737 }
738 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
739 }
740
741 void helper_fldcw(CPUX86State *env, uint32_t val)
742 {
743 cpu_set_fpuc(env, val);
744 }
745
746 void helper_fclex(CPUX86State *env)
747 {
748 env->fpus &= 0x7f00;
749 }
750
751 void helper_fwait(CPUX86State *env)
752 {
753 if (env->fpus & FPUS_SE) {
754 fpu_raise_exception(env, GETPC());
755 }
756 }
757
758 void helper_fninit(CPUX86State *env)
759 {
760 env->fpus = 0;
761 env->fpstt = 0;
762 cpu_set_fpuc(env, 0x37f);
763 env->fptags[0] = 1;
764 env->fptags[1] = 1;
765 env->fptags[2] = 1;
766 env->fptags[3] = 1;
767 env->fptags[4] = 1;
768 env->fptags[5] = 1;
769 env->fptags[6] = 1;
770 env->fptags[7] = 1;
771 }
772
773 /* BCD ops */
774
775 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
776 {
777 floatx80 tmp;
778 uint64_t val;
779 unsigned int v;
780 int i;
781
782 val = 0;
783 for (i = 8; i >= 0; i--) {
784 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
785 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
786 }
787 tmp = int64_to_floatx80(val, &env->fp_status);
788 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
789 tmp = floatx80_chs(tmp);
790 }
791 fpush(env);
792 ST0 = tmp;
793 }
794
795 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
796 {
797 uint8_t old_flags = save_exception_flags(env);
798 int v;
799 target_ulong mem_ref, mem_end;
800 int64_t val;
801 CPU_LDoubleU temp;
802
803 temp.d = ST0;
804
805 val = floatx80_to_int64(ST0, &env->fp_status);
806 mem_ref = ptr;
807 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
808 set_float_exception_flags(float_flag_invalid, &env->fp_status);
809 while (mem_ref < ptr + 7) {
810 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
811 }
812 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
813 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
814 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
815 merge_exception_flags(env, old_flags);
816 return;
817 }
818 mem_end = mem_ref + 9;
819 if (SIGND(temp)) {
820 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
821 val = -val;
822 } else {
823 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
824 }
825 while (mem_ref < mem_end) {
826 if (val == 0) {
827 break;
828 }
829 v = val % 100;
830 val = val / 100;
831 v = ((v / 10) << 4) | (v % 10);
832 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
833 }
834 while (mem_ref < mem_end) {
835 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
836 }
837 merge_exception_flags(env, old_flags);
838 }
839
840 /* 128-bit significand of log(2). */
841 #define ln2_sig_high 0xb17217f7d1cf79abULL
842 #define ln2_sig_low 0xc9e3b39803f2f6afULL
843
844 /*
845 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
846 * the interval [-1/64, 1/64].
847 */
848 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
849 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
850 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
851 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
852 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
853 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
854 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
855 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
856 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
857
858 struct f2xm1_data {
859 /*
860 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
861 * are very close to exact floatx80 values.
862 */
863 floatx80 t;
864 /* The value of 2^t. */
865 floatx80 exp2;
866 /* The value of 2^t - 1. */
867 floatx80 exp2m1;
868 };
869
870 static const struct f2xm1_data f2xm1_table[65] = {
871 { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
872 make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
873 make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
874 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
875 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
876 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
877 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
878 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
879 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
880 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
881 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
882 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
883 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
884 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
885 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
886 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
887 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
888 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
889 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
890 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
891 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
892 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
893 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
894 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
895 { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
896 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
897 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
898 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
899 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
900 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
901 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
902 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
903 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
904 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
905 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
906 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
907 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
908 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
909 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
910 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
911 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
912 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
913 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
914 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
915 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
916 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
917 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
918 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
919 { make_floatx80_init(0xbffe, 0x800000000000227dULL),
920 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
921 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
922 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
923 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
924 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
925 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
926 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
927 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
928 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
929 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
930 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
931 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
932 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
933 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
934 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
935 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
936 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
937 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
938 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
939 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
940 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
941 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
942 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
943 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
944 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
945 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
946 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
947 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
948 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
949 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
950 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
951 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
952 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
953 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
954 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
955 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
956 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
957 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
958 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
959 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
960 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
961 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
962 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
963 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
964 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
965 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
966 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
967 { floatx80_zero_init,
968 make_floatx80_init(0x3fff, 0x8000000000000000ULL),
969 floatx80_zero_init },
970 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
971 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
972 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
973 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
974 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
975 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
976 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
977 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
978 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
979 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
980 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
981 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
982 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
983 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
984 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
985 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
986 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
987 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
988 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
989 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
990 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
991 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
992 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
993 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
994 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
995 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
996 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
997 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
998 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
999 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
1000 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
1001 make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
1002 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
1003 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
1004 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
1005 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
1006 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
1007 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1008 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1009 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
1010 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
1011 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
1012 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
1013 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
1014 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1015 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
1016 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
1017 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
1018 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
1019 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1020 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1021 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
1022 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
1023 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
1024 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
1025 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
1026 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
1027 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
1028 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
1029 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
1030 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
1031 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
1032 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
1033 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
1034 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
1035 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
1036 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
1037 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
1038 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
1039 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
1040 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
1041 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
1042 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
1043 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
1044 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1045 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
1046 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
1047 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
1048 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
1049 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
1050 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
1051 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
1052 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
1053 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
1054 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
1055 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
1056 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1057 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
1058 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
1059 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
1060 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
1061 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
1062 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
1063 { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1064 make_floatx80_init(0x4000, 0x8000000000000000ULL),
1065 make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
1066 };
1067
1068 void helper_f2xm1(CPUX86State *env)
1069 {
1070 uint8_t old_flags = save_exception_flags(env);
1071 uint64_t sig = extractFloatx80Frac(ST0);
1072 int32_t exp = extractFloatx80Exp(ST0);
1073 bool sign = extractFloatx80Sign(ST0);
1074
1075 if (floatx80_invalid_encoding(ST0)) {
1076 float_raise(float_flag_invalid, &env->fp_status);
1077 ST0 = floatx80_default_nan(&env->fp_status);
1078 } else if (floatx80_is_any_nan(ST0)) {
1079 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1080 float_raise(float_flag_invalid, &env->fp_status);
1081 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1082 }
1083 } else if (exp > 0x3fff ||
1084 (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1085 /* Out of range for the instruction, treat as invalid. */
1086 float_raise(float_flag_invalid, &env->fp_status);
1087 ST0 = floatx80_default_nan(&env->fp_status);
1088 } else if (exp == 0x3fff) {
1089 /* Argument 1 or -1, exact result 1 or -0.5. */
1090 if (sign) {
1091 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1092 }
1093 } else if (exp < 0x3fb0) {
1094 if (!floatx80_is_zero(ST0)) {
1095 /*
1096 * Multiplying the argument by an extra-precision version
1097 * of log(2) is sufficiently precise. Zero arguments are
1098 * returned unchanged.
1099 */
1100 uint64_t sig0, sig1, sig2;
1101 if (exp == 0) {
1102 normalizeFloatx80Subnormal(sig, &exp, &sig);
1103 }
1104 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1105 &sig2);
1106 /* This result is inexact. */
1107 sig1 |= 1;
1108 ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1,
1109 &env->fp_status);
1110 }
1111 } else {
1112 floatx80 tmp, y, accum;
1113 bool asign, bsign;
1114 int32_t n, aexp, bexp;
1115 uint64_t asig0, asig1, asig2, bsig0, bsig1;
1116 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1117 signed char save_prec = env->fp_status.floatx80_rounding_precision;
1118 env->fp_status.float_rounding_mode = float_round_nearest_even;
1119 env->fp_status.floatx80_rounding_precision = 80;
1120
1121 /* Find the nearest multiple of 1/32 to the argument. */
1122 tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1123 n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1124 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1125
1126 if (floatx80_is_zero(y)) {
1127 /*
1128 * Use the value of 2^t - 1 from the table, to avoid
1129 * needing to special-case zero as a result of
1130 * multiplication below.
1131 */
1132 ST0 = f2xm1_table[n].t;
1133 set_float_exception_flags(float_flag_inexact, &env->fp_status);
1134 env->fp_status.float_rounding_mode = save_mode;
1135 } else {
1136 /*
1137 * Compute the lower parts of a polynomial expansion for
1138 * (2^y - 1) / y.
1139 */
1140 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1141 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1142 accum = floatx80_mul(accum, y, &env->fp_status);
1143 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1144 accum = floatx80_mul(accum, y, &env->fp_status);
1145 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1146 accum = floatx80_mul(accum, y, &env->fp_status);
1147 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1148 accum = floatx80_mul(accum, y, &env->fp_status);
1149 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1150 accum = floatx80_mul(accum, y, &env->fp_status);
1151 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1152 accum = floatx80_mul(accum, y, &env->fp_status);
1153 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1154
1155 /*
1156 * The full polynomial expansion is f2xm1_coeff_0 + accum
1157 * (where accum has much lower magnitude, and so, in
1158 * particular, carry out of the addition is not possible).
1159 * (This expansion is only accurate to about 70 bits, not
1160 * 128 bits.)
1161 */
1162 aexp = extractFloatx80Exp(f2xm1_coeff_0);
1163 asign = extractFloatx80Sign(f2xm1_coeff_0);
1164 shift128RightJamming(extractFloatx80Frac(accum), 0,
1165 aexp - extractFloatx80Exp(accum),
1166 &asig0, &asig1);
1167 bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1168 bsig1 = 0;
1169 if (asign == extractFloatx80Sign(accum)) {
1170 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1171 } else {
1172 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1173 }
1174 /* And thus compute an approximation to 2^y - 1. */
1175 mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1176 &asig0, &asig1, &asig2);
1177 aexp += extractFloatx80Exp(y) - 0x3ffe;
1178 asign ^= extractFloatx80Sign(y);
1179 if (n != 32) {
1180 /*
1181 * Multiply this by the precomputed value of 2^t and
1182 * add that of 2^t - 1.
1183 */
1184 mul128By64To192(asig0, asig1,
1185 extractFloatx80Frac(f2xm1_table[n].exp2),
1186 &asig0, &asig1, &asig2);
1187 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1188 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1189 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1190 bsig1 = 0;
1191 if (bexp < aexp) {
1192 shift128RightJamming(bsig0, bsig1, aexp - bexp,
1193 &bsig0, &bsig1);
1194 } else if (aexp < bexp) {
1195 shift128RightJamming(asig0, asig1, bexp - aexp,
1196 &asig0, &asig1);
1197 aexp = bexp;
1198 }
1199 /* The sign of 2^t - 1 is always that of the result. */
1200 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1201 if (asign == bsign) {
1202 /* Avoid possible carry out of the addition. */
1203 shift128RightJamming(asig0, asig1, 1,
1204 &asig0, &asig1);
1205 shift128RightJamming(bsig0, bsig1, 1,
1206 &bsig0, &bsig1);
1207 ++aexp;
1208 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1209 } else {
1210 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1211 asign = bsign;
1212 }
1213 }
1214 env->fp_status.float_rounding_mode = save_mode;
1215 /* This result is inexact. */
1216 asig1 |= 1;
1217 ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1,
1218 &env->fp_status);
1219 }
1220
1221 env->fp_status.floatx80_rounding_precision = save_prec;
1222 }
1223 merge_exception_flags(env, old_flags);
1224 }
1225
1226 void helper_fptan(CPUX86State *env)
1227 {
1228 double fptemp = floatx80_to_double(env, ST0);
1229
1230 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1231 env->fpus |= 0x400;
1232 } else {
1233 fptemp = tan(fptemp);
1234 ST0 = double_to_floatx80(env, fptemp);
1235 fpush(env);
1236 ST0 = floatx80_one;
1237 env->fpus &= ~0x400; /* C2 <-- 0 */
1238 /* the above code is for |arg| < 2**52 only */
1239 }
1240 }
1241
1242 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */
1243 #define pi_4_exp 0x3ffe
1244 #define pi_4_sig_high 0xc90fdaa22168c234ULL
1245 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1246 #define pi_2_exp 0x3fff
1247 #define pi_2_sig_high 0xc90fdaa22168c234ULL
1248 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1249 #define pi_34_exp 0x4000
1250 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1251 #define pi_34_sig_low 0x9394c9e8a0a5159dULL
1252 #define pi_exp 0x4000
1253 #define pi_sig_high 0xc90fdaa22168c234ULL
1254 #define pi_sig_low 0xc4c6628b80dc1cd1ULL
1255
1256 /*
1257 * Polynomial coefficients for an approximation to atan(x), with only
1258 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike
1259 * for some other approximations, no low part is needed for the first
1260 * coefficient here to achieve a sufficiently accurate result, because
1261 * the coefficient in this minimax approximation is very close to
1262 * exactly 1.)
1263 */
1264 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1265 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1266 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1267 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1268 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1269 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1270 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1271
1272 struct fpatan_data {
1273 /* High and low parts of atan(x). */
1274 floatx80 atan_high, atan_low;
1275 };
1276
1277 static const struct fpatan_data fpatan_table[9] = {
1278 { floatx80_zero_init,
1279 floatx80_zero_init },
1280 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
1281 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
1282 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
1283 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
1284 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
1285 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
1286 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
1287 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
1288 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
1289 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
1290 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
1291 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
1292 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
1293 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
1294 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
1295 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
1296 };
1297
1298 void helper_fpatan(CPUX86State *env)
1299 {
1300 uint8_t old_flags = save_exception_flags(env);
1301 uint64_t arg0_sig = extractFloatx80Frac(ST0);
1302 int32_t arg0_exp = extractFloatx80Exp(ST0);
1303 bool arg0_sign = extractFloatx80Sign(ST0);
1304 uint64_t arg1_sig = extractFloatx80Frac(ST1);
1305 int32_t arg1_exp = extractFloatx80Exp(ST1);
1306 bool arg1_sign = extractFloatx80Sign(ST1);
1307
1308 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1309 float_raise(float_flag_invalid, &env->fp_status);
1310 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1311 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1312 float_raise(float_flag_invalid, &env->fp_status);
1313 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1314 } else if (floatx80_invalid_encoding(ST0) ||
1315 floatx80_invalid_encoding(ST1)) {
1316 float_raise(float_flag_invalid, &env->fp_status);
1317 ST1 = floatx80_default_nan(&env->fp_status);
1318 } else if (floatx80_is_any_nan(ST0)) {
1319 ST1 = ST0;
1320 } else if (floatx80_is_any_nan(ST1)) {
1321 /* Pass this NaN through. */
1322 } else if (floatx80_is_zero(ST1) && !arg0_sign) {
1323 /* Pass this zero through. */
1324 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
1325 arg0_exp - arg1_exp >= 80) &&
1326 !arg0_sign) {
1327 /*
1328 * Dividing ST1 by ST0 gives the correct result up to
1329 * rounding, and avoids spurious underflow exceptions that
1330 * might result from passing some small values through the
1331 * polynomial approximation, but if a finite nonzero result of
1332 * division is exact, the result of fpatan is still inexact
1333 * (and underflowing where appropriate).
1334 */
1335 signed char save_prec = env->fp_status.floatx80_rounding_precision;
1336 env->fp_status.floatx80_rounding_precision = 80;
1337 ST1 = floatx80_div(ST1, ST0, &env->fp_status);
1338 env->fp_status.floatx80_rounding_precision = save_prec;
1339 if (!floatx80_is_zero(ST1) &&
1340 !(get_float_exception_flags(&env->fp_status) &
1341 float_flag_inexact)) {
1342 /*
1343 * The mathematical result is very slightly closer to zero
1344 * than this exact result. Round a value with the
1345 * significand adjusted accordingly to get the correct
1346 * exceptions, and possibly an adjusted result depending
1347 * on the rounding mode.
1348 */
1349 uint64_t sig = extractFloatx80Frac(ST1);
1350 int32_t exp = extractFloatx80Exp(ST1);
1351 bool sign = extractFloatx80Sign(ST1);
1352 if (exp == 0) {
1353 normalizeFloatx80Subnormal(sig, &exp, &sig);
1354 }
1355 ST1 = normalizeRoundAndPackFloatx80(80, sign, exp, sig - 1,
1356 -1, &env->fp_status);
1357 }
1358 } else {
1359 /* The result is inexact. */
1360 bool rsign = arg1_sign;
1361 int32_t rexp;
1362 uint64_t rsig0, rsig1;
1363 if (floatx80_is_zero(ST1)) {
1364 /*
1365 * ST0 is negative. The result is pi with the sign of
1366 * ST1.
1367 */
1368 rexp = pi_exp;
1369 rsig0 = pi_sig_high;
1370 rsig1 = pi_sig_low;
1371 } else if (floatx80_is_infinity(ST1)) {
1372 if (floatx80_is_infinity(ST0)) {
1373 if (arg0_sign) {
1374 rexp = pi_34_exp;
1375 rsig0 = pi_34_sig_high;
1376 rsig1 = pi_34_sig_low;
1377 } else {
1378 rexp = pi_4_exp;
1379 rsig0 = pi_4_sig_high;
1380 rsig1 = pi_4_sig_low;
1381 }
1382 } else {
1383 rexp = pi_2_exp;
1384 rsig0 = pi_2_sig_high;
1385 rsig1 = pi_2_sig_low;
1386 }
1387 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
1388 rexp = pi_2_exp;
1389 rsig0 = pi_2_sig_high;
1390 rsig1 = pi_2_sig_low;
1391 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
1392 /* ST0 is negative. */
1393 rexp = pi_exp;
1394 rsig0 = pi_sig_high;
1395 rsig1 = pi_sig_low;
1396 } else {
1397 /*
1398 * ST0 and ST1 are finite, nonzero and with exponents not
1399 * too far apart.
1400 */
1401 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
1402 int32_t azexp, axexp;
1403 bool adj_sub, ysign, zsign;
1404 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
1405 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
1406 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
1407 uint64_t azsig0, azsig1;
1408 uint64_t azsig2, azsig3, axsig0, axsig1;
1409 floatx80 x8;
1410 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1411 signed char save_prec = env->fp_status.floatx80_rounding_precision;
1412 env->fp_status.float_rounding_mode = float_round_nearest_even;
1413 env->fp_status.floatx80_rounding_precision = 80;
1414
1415 if (arg0_exp == 0) {
1416 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1417 }
1418 if (arg1_exp == 0) {
1419 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1420 }
1421 if (arg0_exp > arg1_exp ||
1422 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
1423 /* Work with abs(ST1) / abs(ST0). */
1424 num_exp = arg1_exp;
1425 num_sig = arg1_sig;
1426 den_exp = arg0_exp;
1427 den_sig = arg0_sig;
1428 if (arg0_sign) {
1429 /* The result is subtracted from pi. */
1430 adj_exp = pi_exp;
1431 adj_sig0 = pi_sig_high;
1432 adj_sig1 = pi_sig_low;
1433 adj_sub = true;
1434 } else {
1435 /* The result is used as-is. */
1436 adj_exp = 0;
1437 adj_sig0 = 0;
1438 adj_sig1 = 0;
1439 adj_sub = false;
1440 }
1441 } else {
1442 /* Work with abs(ST0) / abs(ST1). */
1443 num_exp = arg0_exp;
1444 num_sig = arg0_sig;
1445 den_exp = arg1_exp;
1446 den_sig = arg1_sig;
1447 /* The result is added to or subtracted from pi/2. */
1448 adj_exp = pi_2_exp;
1449 adj_sig0 = pi_2_sig_high;
1450 adj_sig1 = pi_2_sig_low;
1451 adj_sub = !arg0_sign;
1452 }
1453
1454 /*
1455 * Compute x = num/den, where 0 < x <= 1 and x is not too
1456 * small.
1457 */
1458 xexp = num_exp - den_exp + 0x3ffe;
1459 remsig0 = num_sig;
1460 remsig1 = 0;
1461 if (den_sig <= remsig0) {
1462 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1463 ++xexp;
1464 }
1465 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
1466 mul64To128(den_sig, xsig0, &msig0, &msig1);
1467 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
1468 while ((int64_t) remsig0 < 0) {
1469 --xsig0;
1470 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
1471 }
1472 xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
1473 /*
1474 * No need to correct any estimation error in xsig1; even
1475 * with such error, it is accurate enough.
1476 */
1477
1478 /*
1479 * Split x as x = t + y, where t = n/8 is the nearest
1480 * multiple of 1/8 to x.
1481 */
1482 x8 = normalizeRoundAndPackFloatx80(80, false, xexp + 3, xsig0,
1483 xsig1, &env->fp_status);
1484 n = floatx80_to_int32(x8, &env->fp_status);
1485 if (n == 0) {
1486 ysign = false;
1487 yexp = xexp;
1488 ysig0 = xsig0;
1489 ysig1 = xsig1;
1490 texp = 0;
1491 tsig = 0;
1492 } else {
1493 int shift = clz32(n) + 32;
1494 texp = 0x403b - shift;
1495 tsig = n;
1496 tsig <<= shift;
1497 if (texp == xexp) {
1498 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
1499 if ((int64_t) ysig0 >= 0) {
1500 ysign = false;
1501 if (ysig0 == 0) {
1502 if (ysig1 == 0) {
1503 yexp = 0;
1504 } else {
1505 shift = clz64(ysig1) + 64;
1506 yexp = xexp - shift;
1507 shift128Left(ysig0, ysig1, shift,
1508 &ysig0, &ysig1);
1509 }
1510 } else {
1511 shift = clz64(ysig0);
1512 yexp = xexp - shift;
1513 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1514 }
1515 } else {
1516 ysign = true;
1517 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
1518 if (ysig0 == 0) {
1519 shift = clz64(ysig1) + 64;
1520 } else {
1521 shift = clz64(ysig0);
1522 }
1523 yexp = xexp - shift;
1524 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1525 }
1526 } else {
1527 /*
1528 * t's exponent must be greater than x's because t
1529 * is positive and the nearest multiple of 1/8 to
1530 * x, and if x has a greater exponent, the power
1531 * of 2 with that exponent is also a multiple of
1532 * 1/8.
1533 */
1534 uint64_t usig0, usig1;
1535 shift128RightJamming(xsig0, xsig1, texp - xexp,
1536 &usig0, &usig1);
1537 ysign = true;
1538 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
1539 if (ysig0 == 0) {
1540 shift = clz64(ysig1) + 64;
1541 } else {
1542 shift = clz64(ysig0);
1543 }
1544 yexp = texp - shift;
1545 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1546 }
1547 }
1548
1549 /*
1550 * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1551 * arctan(z).
1552 */
1553 zsign = ysign;
1554 if (texp == 0 || yexp == 0) {
1555 zexp = yexp;
1556 zsig0 = ysig0;
1557 zsig1 = ysig1;
1558 } else {
1559 /*
1560 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1561 */
1562 int32_t dexp = texp + xexp - 0x3ffe;
1563 uint64_t dsig0, dsig1, dsig2;
1564 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
1565 /*
1566 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1567 * bit). Add 1 to produce the denominator 1+tx.
1568 */
1569 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
1570 &dsig0, &dsig1);
1571 dsig0 |= 0x8000000000000000ULL;
1572 zexp = yexp - 1;
1573 remsig0 = ysig0;
1574 remsig1 = ysig1;
1575 remsig2 = 0;
1576 if (dsig0 <= remsig0) {
1577 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1578 ++zexp;
1579 }
1580 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
1581 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
1582 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
1583 &remsig0, &remsig1, &remsig2);
1584 while ((int64_t) remsig0 < 0) {
1585 --zsig0;
1586 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
1587 &remsig0, &remsig1, &remsig2);
1588 }
1589 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
1590 /* No need to correct any estimation error in zsig1. */
1591 }
1592
1593 if (zexp == 0) {
1594 azexp = 0;
1595 azsig0 = 0;
1596 azsig1 = 0;
1597 } else {
1598 floatx80 z2, accum;
1599 uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
1600 /* Compute z^2. */
1601 mul128To256(zsig0, zsig1, zsig0, zsig1,
1602 &z2sig0, &z2sig1, &z2sig2, &z2sig3);
1603 z2 = normalizeRoundAndPackFloatx80(80, false,
1604 zexp + zexp - 0x3ffe,
1605 z2sig0, z2sig1,
1606 &env->fp_status);
1607
1608 /* Compute the lower parts of the polynomial expansion. */
1609 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
1610 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
1611 accum = floatx80_mul(accum, z2, &env->fp_status);
1612 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
1613 accum = floatx80_mul(accum, z2, &env->fp_status);
1614 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
1615 accum = floatx80_mul(accum, z2, &env->fp_status);
1616 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
1617 accum = floatx80_mul(accum, z2, &env->fp_status);
1618 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
1619 accum = floatx80_mul(accum, z2, &env->fp_status);
1620
1621 /*
1622 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1623 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1624 */
1625 aexp = extractFloatx80Exp(fpatan_coeff_0);
1626 shift128RightJamming(extractFloatx80Frac(accum), 0,
1627 aexp - extractFloatx80Exp(accum),
1628 &asig0, &asig1);
1629 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
1630 &asig0, &asig1);
1631 /* Multiply by z to compute arctan(z). */
1632 azexp = aexp + zexp - 0x3ffe;
1633 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
1634 &azsig2, &azsig3);
1635 }
1636
1637 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */
1638 if (texp == 0) {
1639 /* z is positive. */
1640 axexp = azexp;
1641 axsig0 = azsig0;
1642 axsig1 = azsig1;
1643 } else {
1644 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
1645 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
1646 uint64_t low_sig0 =
1647 extractFloatx80Frac(fpatan_table[n].atan_low);
1648 uint64_t low_sig1 = 0;
1649 axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
1650 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
1651 axsig1 = 0;
1652 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
1653 &low_sig0, &low_sig1);
1654 if (low_sign) {
1655 sub128(axsig0, axsig1, low_sig0, low_sig1,
1656 &axsig0, &axsig1);
1657 } else {
1658 add128(axsig0, axsig1, low_sig0, low_sig1,
1659 &axsig0, &axsig1);
1660 }
1661 if (azexp >= axexp) {
1662 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
1663 &axsig0, &axsig1);
1664 axexp = azexp + 1;
1665 shift128RightJamming(azsig0, azsig1, 1,
1666 &azsig0, &azsig1);
1667 } else {
1668 shift128RightJamming(axsig0, axsig1, 1,
1669 &axsig0, &axsig1);
1670 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
1671 &azsig0, &azsig1);
1672 ++axexp;
1673 }
1674 if (zsign) {
1675 sub128(axsig0, axsig1, azsig0, azsig1,
1676 &axsig0, &axsig1);
1677 } else {
1678 add128(axsig0, axsig1, azsig0, azsig1,
1679 &axsig0, &axsig1);
1680 }
1681 }
1682
1683 if (adj_exp == 0) {
1684 rexp = axexp;
1685 rsig0 = axsig0;
1686 rsig1 = axsig1;
1687 } else {
1688 /*
1689 * Add or subtract arctan(x) (exponent axexp,
1690 * significand axsig0 and axsig1, positive, not
1691 * necessarily normalized) to the number given by
1692 * adj_exp, adj_sig0 and adj_sig1, according to
1693 * adj_sub.
1694 */
1695 if (adj_exp >= axexp) {
1696 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
1697 &axsig0, &axsig1);
1698 rexp = adj_exp + 1;
1699 shift128RightJamming(adj_sig0, adj_sig1, 1,
1700 &adj_sig0, &adj_sig1);
1701 } else {
1702 shift128RightJamming(axsig0, axsig1, 1,
1703 &axsig0, &axsig1);
1704 shift128RightJamming(adj_sig0, adj_sig1,
1705 axexp - adj_exp + 1,
1706 &adj_sig0, &adj_sig1);
1707 rexp = axexp + 1;
1708 }
1709 if (adj_sub) {
1710 sub128(adj_sig0, adj_sig1, axsig0, axsig1,
1711 &rsig0, &rsig1);
1712 } else {
1713 add128(adj_sig0, adj_sig1, axsig0, axsig1,
1714 &rsig0, &rsig1);
1715 }
1716 }
1717
1718 env->fp_status.float_rounding_mode = save_mode;
1719 env->fp_status.floatx80_rounding_precision = save_prec;
1720 }
1721 /* This result is inexact. */
1722 rsig1 |= 1;
1723 ST1 = normalizeRoundAndPackFloatx80(80, rsign, rexp,
1724 rsig0, rsig1, &env->fp_status);
1725 }
1726
1727 fpop(env);
1728 merge_exception_flags(env, old_flags);
1729 }
1730
1731 void helper_fxtract(CPUX86State *env)
1732 {
1733 uint8_t old_flags = save_exception_flags(env);
1734 CPU_LDoubleU temp;
1735
1736 temp.d = ST0;
1737
1738 if (floatx80_is_zero(ST0)) {
1739 /* Easy way to generate -inf and raising division by 0 exception */
1740 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1741 &env->fp_status);
1742 fpush(env);
1743 ST0 = temp.d;
1744 } else if (floatx80_invalid_encoding(ST0)) {
1745 float_raise(float_flag_invalid, &env->fp_status);
1746 ST0 = floatx80_default_nan(&env->fp_status);
1747 fpush(env);
1748 ST0 = ST1;
1749 } else if (floatx80_is_any_nan(ST0)) {
1750 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1751 float_raise(float_flag_invalid, &env->fp_status);
1752 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1753 }
1754 fpush(env);
1755 ST0 = ST1;
1756 } else if (floatx80_is_infinity(ST0)) {
1757 fpush(env);
1758 ST0 = ST1;
1759 ST1 = floatx80_infinity;
1760 } else {
1761 int expdif;
1762
1763 if (EXPD(temp) == 0) {
1764 int shift = clz64(temp.l.lower);
1765 temp.l.lower <<= shift;
1766 expdif = 1 - EXPBIAS - shift;
1767 float_raise(float_flag_input_denormal, &env->fp_status);
1768 } else {
1769 expdif = EXPD(temp) - EXPBIAS;
1770 }
1771 /* DP exponent bias */
1772 ST0 = int32_to_floatx80(expdif, &env->fp_status);
1773 fpush(env);
1774 BIASEXPONENT(temp);
1775 ST0 = temp.d;
1776 }
1777 merge_exception_flags(env, old_flags);
1778 }
1779
1780 static void helper_fprem_common(CPUX86State *env, bool mod)
1781 {
1782 uint8_t old_flags = save_exception_flags(env);
1783 uint64_t quotient;
1784 CPU_LDoubleU temp0, temp1;
1785 int exp0, exp1, expdiff;
1786
1787 temp0.d = ST0;
1788 temp1.d = ST1;
1789 exp0 = EXPD(temp0);
1790 exp1 = EXPD(temp1);
1791
1792 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1793 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1794 exp0 == 0x7fff || exp1 == 0x7fff ||
1795 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
1796 ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1797 } else {
1798 if (exp0 == 0) {
1799 exp0 = 1 - clz64(temp0.l.lower);
1800 }
1801 if (exp1 == 0) {
1802 exp1 = 1 - clz64(temp1.l.lower);
1803 }
1804 expdiff = exp0 - exp1;
1805 if (expdiff < 64) {
1806 ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1807 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */
1808 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1809 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */
1810 } else {
1811 /*
1812 * Partial remainder. This choice of how many bits to
1813 * process at once is specified in AMD instruction set
1814 * manuals, and empirically is followed by Intel
1815 * processors as well; it ensures that the final remainder
1816 * operation in a loop does produce the correct low three
1817 * bits of the quotient. AMD manuals specify that the
1818 * flags other than C2 are cleared, and empirically Intel
1819 * processors clear them as well.
1820 */
1821 int n = 32 + (expdiff % 32);
1822 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1823 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1824 env->fpus |= 0x400; /* C2 <-- 1 */
1825 }
1826 }
1827 merge_exception_flags(env, old_flags);
1828 }
1829
1830 void helper_fprem1(CPUX86State *env)
1831 {
1832 helper_fprem_common(env, false);
1833 }
1834
1835 void helper_fprem(CPUX86State *env)
1836 {
1837 helper_fprem_common(env, true);
1838 }
1839
1840 /* 128-bit significand of log2(e). */
1841 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1842 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1843
1844 /*
1845 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1846 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1847 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1848 * interval [sqrt(2)/2, sqrt(2)].
1849 */
1850 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1851 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1852 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1853 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1854 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1855 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1856 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1857 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1858 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1859 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1860 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1861
1862 /*
1863 * Compute an approximation of log2(1+arg), where 1+arg is in the
1864 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
1865 * function is called, rounding precision is set to 80 and the
1866 * round-to-nearest mode is in effect. arg must not be exactly zero,
1867 * and must not be so close to zero that underflow might occur.
1868 */
1869 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1870 uint64_t *sig0, uint64_t *sig1)
1871 {
1872 uint64_t arg0_sig = extractFloatx80Frac(arg);
1873 int32_t arg0_exp = extractFloatx80Exp(arg);
1874 bool arg0_sign = extractFloatx80Sign(arg);
1875 bool asign;
1876 int32_t dexp, texp, aexp;
1877 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1878 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1879 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1880 floatx80 t2, accum;
1881
1882 /*
1883 * Compute an approximation of arg/(2+arg), with extra precision,
1884 * as the argument to a polynomial approximation. The extra
1885 * precision is only needed for the first term of the
1886 * approximation, with subsequent terms being significantly
1887 * smaller; the approximation only uses odd exponents, and the
1888 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1889 */
1890 if (arg0_sign) {
1891 dexp = 0x3fff;
1892 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1893 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1894 } else {
1895 dexp = 0x4000;
1896 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1897 dsig0 |= 0x8000000000000000ULL;
1898 }
1899 texp = arg0_exp - dexp + 0x3ffe;
1900 rsig0 = arg0_sig;
1901 rsig1 = 0;
1902 rsig2 = 0;
1903 if (dsig0 <= rsig0) {
1904 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1905 ++texp;
1906 }
1907 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1908 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1909 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1910 &rsig0, &rsig1, &rsig2);
1911 while ((int64_t) rsig0 < 0) {
1912 --tsig0;
1913 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1914 &rsig0, &rsig1, &rsig2);
1915 }
1916 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1917 /*
1918 * No need to correct any estimation error in tsig1; even with
1919 * such error, it is accurate enough. Now compute the square of
1920 * that approximation.
1921 */
1922 mul128To256(tsig0, tsig1, tsig0, tsig1,
1923 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
1924 t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe,
1925 t2sig0, t2sig1, &env->fp_status);
1926
1927 /* Compute the lower parts of the polynomial expansion. */
1928 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
1929 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
1930 accum = floatx80_mul(accum, t2, &env->fp_status);
1931 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
1932 accum = floatx80_mul(accum, t2, &env->fp_status);
1933 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
1934 accum = floatx80_mul(accum, t2, &env->fp_status);
1935 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
1936 accum = floatx80_mul(accum, t2, &env->fp_status);
1937 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
1938 accum = floatx80_mul(accum, t2, &env->fp_status);
1939 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
1940 accum = floatx80_mul(accum, t2, &env->fp_status);
1941 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
1942 accum = floatx80_mul(accum, t2, &env->fp_status);
1943 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
1944 accum = floatx80_mul(accum, t2, &env->fp_status);
1945 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
1946
1947 /*
1948 * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1949 * accum has much lower magnitude, and so, in particular, carry
1950 * out of the addition is not possible), multiplied by t. (This
1951 * expansion is only accurate to about 70 bits, not 128 bits.)
1952 */
1953 aexp = extractFloatx80Exp(fyl2x_coeff_0);
1954 asign = extractFloatx80Sign(fyl2x_coeff_0);
1955 shift128RightJamming(extractFloatx80Frac(accum), 0,
1956 aexp - extractFloatx80Exp(accum),
1957 &asig0, &asig1);
1958 bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
1959 bsig1 = 0;
1960 if (asign == extractFloatx80Sign(accum)) {
1961 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1962 } else {
1963 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1964 }
1965 /* Multiply by t to compute the required result. */
1966 mul128To256(asig0, asig1, tsig0, tsig1,
1967 &asig0, &asig1, &asig2, &asig3);
1968 aexp += texp - 0x3ffe;
1969 *exp = aexp;
1970 *sig0 = asig0;
1971 *sig1 = asig1;
1972 }
1973
1974 void helper_fyl2xp1(CPUX86State *env)
1975 {
1976 uint8_t old_flags = save_exception_flags(env);
1977 uint64_t arg0_sig = extractFloatx80Frac(ST0);
1978 int32_t arg0_exp = extractFloatx80Exp(ST0);
1979 bool arg0_sign = extractFloatx80Sign(ST0);
1980 uint64_t arg1_sig = extractFloatx80Frac(ST1);
1981 int32_t arg1_exp = extractFloatx80Exp(ST1);
1982 bool arg1_sign = extractFloatx80Sign(ST1);
1983
1984 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1985 float_raise(float_flag_invalid, &env->fp_status);
1986 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1987 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1988 float_raise(float_flag_invalid, &env->fp_status);
1989 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1990 } else if (floatx80_invalid_encoding(ST0) ||
1991 floatx80_invalid_encoding(ST1)) {
1992 float_raise(float_flag_invalid, &env->fp_status);
1993 ST1 = floatx80_default_nan(&env->fp_status);
1994 } else if (floatx80_is_any_nan(ST0)) {
1995 ST1 = ST0;
1996 } else if (floatx80_is_any_nan(ST1)) {
1997 /* Pass this NaN through. */
1998 } else if (arg0_exp > 0x3ffd ||
1999 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
2000 0x95f619980c4336f7ULL :
2001 0xd413cccfe7799211ULL))) {
2002 /*
2003 * Out of range for the instruction (ST0 must have absolute
2004 * value less than 1 - sqrt(2)/2 = 0.292..., according to
2005 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2006 * to sqrt(2) - 1, which we allow here), treat as invalid.
2007 */
2008 float_raise(float_flag_invalid, &env->fp_status);
2009 ST1 = floatx80_default_nan(&env->fp_status);
2010 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
2011 arg1_exp == 0x7fff) {
2012 /*
2013 * One argument is zero, or multiplying by infinity; correct
2014 * result is exact and can be obtained by multiplying the
2015 * arguments.
2016 */
2017 ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
2018 } else if (arg0_exp < 0x3fb0) {
2019 /*
2020 * Multiplying both arguments and an extra-precision version
2021 * of log2(e) is sufficiently precise.
2022 */
2023 uint64_t sig0, sig1, sig2;
2024 int32_t exp;
2025 if (arg0_exp == 0) {
2026 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2027 }
2028 if (arg1_exp == 0) {
2029 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2030 }
2031 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
2032 &sig0, &sig1, &sig2);
2033 exp = arg0_exp + 1;
2034 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
2035 exp += arg1_exp - 0x3ffe;
2036 /* This result is inexact. */
2037 sig1 |= 1;
2038 ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp,
2039 sig0, sig1, &env->fp_status);
2040 } else {
2041 int32_t aexp;
2042 uint64_t asig0, asig1, asig2;
2043 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2044 signed char save_prec = env->fp_status.floatx80_rounding_precision;
2045 env->fp_status.float_rounding_mode = float_round_nearest_even;
2046 env->fp_status.floatx80_rounding_precision = 80;
2047
2048 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
2049 /*
2050 * Multiply by the second argument to compute the required
2051 * result.
2052 */
2053 if (arg1_exp == 0) {
2054 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2055 }
2056 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2057 aexp += arg1_exp - 0x3ffe;
2058 /* This result is inexact. */
2059 asig1 |= 1;
2060 env->fp_status.float_rounding_mode = save_mode;
2061 ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp,
2062 asig0, asig1, &env->fp_status);
2063 env->fp_status.floatx80_rounding_precision = save_prec;
2064 }
2065 fpop(env);
2066 merge_exception_flags(env, old_flags);
2067 }
2068
2069 void helper_fyl2x(CPUX86State *env)
2070 {
2071 uint8_t old_flags = save_exception_flags(env);
2072 uint64_t arg0_sig = extractFloatx80Frac(ST0);
2073 int32_t arg0_exp = extractFloatx80Exp(ST0);
2074 bool arg0_sign = extractFloatx80Sign(ST0);
2075 uint64_t arg1_sig = extractFloatx80Frac(ST1);
2076 int32_t arg1_exp = extractFloatx80Exp(ST1);
2077 bool arg1_sign = extractFloatx80Sign(ST1);
2078
2079 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2080 float_raise(float_flag_invalid, &env->fp_status);
2081 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2082 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2083 float_raise(float_flag_invalid, &env->fp_status);
2084 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2085 } else if (floatx80_invalid_encoding(ST0) ||
2086 floatx80_invalid_encoding(ST1)) {
2087 float_raise(float_flag_invalid, &env->fp_status);
2088 ST1 = floatx80_default_nan(&env->fp_status);
2089 } else if (floatx80_is_any_nan(ST0)) {
2090 ST1 = ST0;
2091 } else if (floatx80_is_any_nan(ST1)) {
2092 /* Pass this NaN through. */
2093 } else if (arg0_sign && !floatx80_is_zero(ST0)) {
2094 float_raise(float_flag_invalid, &env->fp_status);
2095 ST1 = floatx80_default_nan(&env->fp_status);
2096 } else if (floatx80_is_infinity(ST1)) {
2097 FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
2098 &env->fp_status);
2099 switch (cmp) {
2100 case float_relation_less:
2101 ST1 = floatx80_chs(ST1);
2102 break;
2103 case float_relation_greater:
2104 /* Result is infinity of the same sign as ST1. */
2105 break;
2106 default:
2107 float_raise(float_flag_invalid, &env->fp_status);
2108 ST1 = floatx80_default_nan(&env->fp_status);
2109 break;
2110 }
2111 } else if (floatx80_is_infinity(ST0)) {
2112 if (floatx80_is_zero(ST1)) {
2113 float_raise(float_flag_invalid, &env->fp_status);
2114 ST1 = floatx80_default_nan(&env->fp_status);
2115 } else if (arg1_sign) {
2116 ST1 = floatx80_chs(ST0);
2117 } else {
2118 ST1 = ST0;
2119 }
2120 } else if (floatx80_is_zero(ST0)) {
2121 if (floatx80_is_zero(ST1)) {
2122 float_raise(float_flag_invalid, &env->fp_status);
2123 ST1 = floatx80_default_nan(&env->fp_status);
2124 } else {
2125 /* Result is infinity with opposite sign to ST1. */
2126 float_raise(float_flag_divbyzero, &env->fp_status);
2127 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
2128 0x8000000000000000ULL);
2129 }
2130 } else if (floatx80_is_zero(ST1)) {
2131 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
2132 ST1 = floatx80_chs(ST1);
2133 }
2134 /* Otherwise, ST1 is already the correct result. */
2135 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
2136 if (arg1_sign) {
2137 ST1 = floatx80_chs(floatx80_zero);
2138 } else {
2139 ST1 = floatx80_zero;
2140 }
2141 } else {
2142 int32_t int_exp;
2143 floatx80 arg0_m1;
2144 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2145 signed char save_prec = env->fp_status.floatx80_rounding_precision;
2146 env->fp_status.float_rounding_mode = float_round_nearest_even;
2147 env->fp_status.floatx80_rounding_precision = 80;
2148
2149 if (arg0_exp == 0) {
2150 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2151 }
2152 if (arg1_exp == 0) {
2153 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2154 }
2155 int_exp = arg0_exp - 0x3fff;
2156 if (arg0_sig > 0xb504f333f9de6484ULL) {
2157 ++int_exp;
2158 }
2159 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
2160 &env->fp_status),
2161 floatx80_one, &env->fp_status);
2162 if (floatx80_is_zero(arg0_m1)) {
2163 /* Exact power of 2; multiply by ST1. */
2164 env->fp_status.float_rounding_mode = save_mode;
2165 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
2166 ST1, &env->fp_status);
2167 } else {
2168 bool asign = extractFloatx80Sign(arg0_m1);
2169 int32_t aexp;
2170 uint64_t asig0, asig1, asig2;
2171 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
2172 if (int_exp != 0) {
2173 bool isign = (int_exp < 0);
2174 int32_t iexp;
2175 uint64_t isig;
2176 int shift;
2177 int_exp = isign ? -int_exp : int_exp;
2178 shift = clz32(int_exp) + 32;
2179 isig = int_exp;
2180 isig <<= shift;
2181 iexp = 0x403e - shift;
2182 shift128RightJamming(asig0, asig1, iexp - aexp,
2183 &asig0, &asig1);
2184 if (asign == isign) {
2185 add128(isig, 0, asig0, asig1, &asig0, &asig1);
2186 } else {
2187 sub128(isig, 0, asig0, asig1, &asig0, &asig1);
2188 }
2189 aexp = iexp;
2190 asign = isign;
2191 }
2192 /*
2193 * Multiply by the second argument to compute the required
2194 * result.
2195 */
2196 if (arg1_exp == 0) {
2197 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2198 }
2199 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2200 aexp += arg1_exp - 0x3ffe;
2201 /* This result is inexact. */
2202 asig1 |= 1;
2203 env->fp_status.float_rounding_mode = save_mode;
2204 ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp,
2205 asig0, asig1, &env->fp_status);
2206 }
2207
2208 env->fp_status.floatx80_rounding_precision = save_prec;
2209 }
2210 fpop(env);
2211 merge_exception_flags(env, old_flags);
2212 }
2213
2214 void helper_fsqrt(CPUX86State *env)
2215 {
2216 uint8_t old_flags = save_exception_flags(env);
2217 if (floatx80_is_neg(ST0)) {
2218 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2219 env->fpus |= 0x400;
2220 }
2221 ST0 = floatx80_sqrt(ST0, &env->fp_status);
2222 merge_exception_flags(env, old_flags);
2223 }
2224
2225 void helper_fsincos(CPUX86State *env)
2226 {
2227 double fptemp = floatx80_to_double(env, ST0);
2228
2229 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2230 env->fpus |= 0x400;
2231 } else {
2232 ST0 = double_to_floatx80(env, sin(fptemp));
2233 fpush(env);
2234 ST0 = double_to_floatx80(env, cos(fptemp));
2235 env->fpus &= ~0x400; /* C2 <-- 0 */
2236 /* the above code is for |arg| < 2**63 only */
2237 }
2238 }
2239
2240 void helper_frndint(CPUX86State *env)
2241 {
2242 uint8_t old_flags = save_exception_flags(env);
2243 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
2244 merge_exception_flags(env, old_flags);
2245 }
2246
2247 void helper_fscale(CPUX86State *env)
2248 {
2249 uint8_t old_flags = save_exception_flags(env);
2250 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
2251 float_raise(float_flag_invalid, &env->fp_status);
2252 ST0 = floatx80_default_nan(&env->fp_status);
2253 } else if (floatx80_is_any_nan(ST1)) {
2254 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2255 float_raise(float_flag_invalid, &env->fp_status);
2256 }
2257 ST0 = ST1;
2258 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2259 float_raise(float_flag_invalid, &env->fp_status);
2260 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
2261 }
2262 } else if (floatx80_is_infinity(ST1) &&
2263 !floatx80_invalid_encoding(ST0) &&
2264 !floatx80_is_any_nan(ST0)) {
2265 if (floatx80_is_neg(ST1)) {
2266 if (floatx80_is_infinity(ST0)) {
2267 float_raise(float_flag_invalid, &env->fp_status);
2268 ST0 = floatx80_default_nan(&env->fp_status);
2269 } else {
2270 ST0 = (floatx80_is_neg(ST0) ?
2271 floatx80_chs(floatx80_zero) :
2272 floatx80_zero);
2273 }
2274 } else {
2275 if (floatx80_is_zero(ST0)) {
2276 float_raise(float_flag_invalid, &env->fp_status);
2277 ST0 = floatx80_default_nan(&env->fp_status);
2278 } else {
2279 ST0 = (floatx80_is_neg(ST0) ?
2280 floatx80_chs(floatx80_infinity) :
2281 floatx80_infinity);
2282 }
2283 }
2284 } else {
2285 int n;
2286 signed char save = env->fp_status.floatx80_rounding_precision;
2287 uint8_t save_flags = get_float_exception_flags(&env->fp_status);
2288 set_float_exception_flags(0, &env->fp_status);
2289 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
2290 set_float_exception_flags(save_flags, &env->fp_status);
2291 env->fp_status.floatx80_rounding_precision = 80;
2292 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
2293 env->fp_status.floatx80_rounding_precision = save;
2294 }
2295 merge_exception_flags(env, old_flags);
2296 }
2297
2298 void helper_fsin(CPUX86State *env)
2299 {
2300 double fptemp = floatx80_to_double(env, ST0);
2301
2302 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2303 env->fpus |= 0x400;
2304 } else {
2305 ST0 = double_to_floatx80(env, sin(fptemp));
2306 env->fpus &= ~0x400; /* C2 <-- 0 */
2307 /* the above code is for |arg| < 2**53 only */
2308 }
2309 }
2310
2311 void helper_fcos(CPUX86State *env)
2312 {
2313 double fptemp = floatx80_to_double(env, ST0);
2314
2315 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2316 env->fpus |= 0x400;
2317 } else {
2318 ST0 = double_to_floatx80(env, cos(fptemp));
2319 env->fpus &= ~0x400; /* C2 <-- 0 */
2320 /* the above code is for |arg| < 2**63 only */
2321 }
2322 }
2323
2324 void helper_fxam_ST0(CPUX86State *env)
2325 {
2326 CPU_LDoubleU temp;
2327 int expdif;
2328
2329 temp.d = ST0;
2330
2331 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2332 if (SIGND(temp)) {
2333 env->fpus |= 0x200; /* C1 <-- 1 */
2334 }
2335
2336 if (env->fptags[env->fpstt]) {
2337 env->fpus |= 0x4100; /* Empty */
2338 return;
2339 }
2340
2341 expdif = EXPD(temp);
2342 if (expdif == MAXEXPD) {
2343 if (MANTD(temp) == 0x8000000000000000ULL) {
2344 env->fpus |= 0x500; /* Infinity */
2345 } else if (MANTD(temp) & 0x8000000000000000ULL) {
2346 env->fpus |= 0x100; /* NaN */
2347 }
2348 } else if (expdif == 0) {
2349 if (MANTD(temp) == 0) {
2350 env->fpus |= 0x4000; /* Zero */
2351 } else {
2352 env->fpus |= 0x4400; /* Denormal */
2353 }
2354 } else if (MANTD(temp) & 0x8000000000000000ULL) {
2355 env->fpus |= 0x400;
2356 }
2357 }
2358
2359 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
2360 uintptr_t retaddr)
2361 {
2362 int fpus, fptag, exp, i;
2363 uint64_t mant;
2364 CPU_LDoubleU tmp;
2365
2366 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2367 fptag = 0;
2368 for (i = 7; i >= 0; i--) {
2369 fptag <<= 2;
2370 if (env->fptags[i]) {
2371 fptag |= 3;
2372 } else {
2373 tmp.d = env->fpregs[i].d;
2374 exp = EXPD(tmp);
2375 mant = MANTD(tmp);
2376 if (exp == 0 && mant == 0) {
2377 /* zero */
2378 fptag |= 1;
2379 } else if (exp == 0 || exp == MAXEXPD
2380 || (mant & (1LL << 63)) == 0) {
2381 /* NaNs, infinity, denormal */
2382 fptag |= 2;
2383 }
2384 }
2385 }
2386 if (data32) {
2387 /* 32 bit */
2388 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
2389 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
2390 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
2391 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
2392 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
2393 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
2394 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
2395 } else {
2396 /* 16 bit */
2397 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
2398 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
2399 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
2400 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
2401 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
2402 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
2403 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
2404 }
2405 }
2406
2407 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
2408 {
2409 do_fstenv(env, ptr, data32, GETPC());
2410 }
2411
2412 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
2413 {
2414 env->fpstt = (fpus >> 11) & 7;
2415 env->fpus = fpus & ~0x3800 & ~FPUS_B;
2416 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
2417 #if !defined(CONFIG_USER_ONLY)
2418 if (!(env->fpus & FPUS_SE)) {
2419 /*
2420 * Here the processor deasserts FERR#; in response, the chipset deasserts
2421 * IGNNE#.
2422 */
2423 cpu_clear_ignne();
2424 }
2425 #endif
2426 }
2427
2428 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
2429 uintptr_t retaddr)
2430 {
2431 int i, fpus, fptag;
2432
2433 if (data32) {
2434 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2435 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2436 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
2437 } else {
2438 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2439 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
2440 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2441 }
2442 cpu_set_fpus(env, fpus);
2443 for (i = 0; i < 8; i++) {
2444 env->fptags[i] = ((fptag & 3) == 3);
2445 fptag >>= 2;
2446 }
2447 }
2448
2449 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
2450 {
2451 do_fldenv(env, ptr, data32, GETPC());
2452 }
2453
2454 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
2455 {
2456 floatx80 tmp;
2457 int i;
2458
2459 do_fstenv(env, ptr, data32, GETPC());
2460
2461 ptr += (14 << data32);
2462 for (i = 0; i < 8; i++) {
2463 tmp = ST(i);
2464 helper_fstt(env, tmp, ptr, GETPC());
2465 ptr += 10;
2466 }
2467
2468 /* fninit */
2469 env->fpus = 0;
2470 env->fpstt = 0;
2471 cpu_set_fpuc(env, 0x37f);
2472 env->fptags[0] = 1;
2473 env->fptags[1] = 1;
2474 env->fptags[2] = 1;
2475 env->fptags[3] = 1;
2476 env->fptags[4] = 1;
2477 env->fptags[5] = 1;
2478 env->fptags[6] = 1;
2479 env->fptags[7] = 1;
2480 }
2481
2482 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2483 {
2484 floatx80 tmp;
2485 int i;
2486
2487 do_fldenv(env, ptr, data32, GETPC());
2488 ptr += (14 << data32);
2489
2490 for (i = 0; i < 8; i++) {
2491 tmp = helper_fldt(env, ptr, GETPC());
2492 ST(i) = tmp;
2493 ptr += 10;
2494 }
2495 }
2496
2497 #if defined(CONFIG_USER_ONLY)
2498 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
2499 {
2500 helper_fsave(env, ptr, data32);
2501 }
2502
2503 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
2504 {
2505 helper_frstor(env, ptr, data32);
2506 }
2507 #endif
2508
2509 #define XO(X) offsetof(X86XSaveArea, X)
2510
2511 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2512 {
2513 int fpus, fptag, i;
2514 target_ulong addr;
2515
2516 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2517 fptag = 0;
2518 for (i = 0; i < 8; i++) {
2519 fptag |= (env->fptags[i] << i);
2520 }
2521
2522 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
2523 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
2524 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
2525
2526 /* In 32-bit mode this is eip, sel, dp, sel.
2527 In 64-bit mode this is rip, rdp.
2528 But in either case we don't write actual data, just zeros. */
2529 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
2530 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
2531
2532 addr = ptr + XO(legacy.fpregs);
2533 for (i = 0; i < 8; i++) {
2534 floatx80 tmp = ST(i);
2535 helper_fstt(env, tmp, addr, ra);
2536 addr += 16;
2537 }
2538 }
2539
2540 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2541 {
2542 update_mxcsr_from_sse_status(env);
2543 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
2544 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
2545 }
2546
2547 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2548 {
2549 int i, nb_xmm_regs;
2550 target_ulong addr;
2551
2552 if (env->hflags & HF_CS64_MASK) {
2553 nb_xmm_regs = 16;
2554 } else {
2555 nb_xmm_regs = 8;
2556 }
2557
2558 addr = ptr + XO(legacy.xmm_regs);
2559 for (i = 0; i < nb_xmm_regs; i++) {
2560 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
2561 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
2562 addr += 16;
2563 }
2564 }
2565
2566 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2567 {
2568 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2569 int i;
2570
2571 for (i = 0; i < 4; i++, addr += 16) {
2572 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
2573 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
2574 }
2575 }
2576
2577 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2578 {
2579 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2580 env->bndcs_regs.cfgu, ra);
2581 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2582 env->bndcs_regs.sts, ra);
2583 }
2584
2585 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2586 {
2587 cpu_stq_data_ra(env, ptr, env->pkru, ra);
2588 }
2589
2590 void helper_fxsave(CPUX86State *env, target_ulong ptr)
2591 {
2592 uintptr_t ra = GETPC();
2593
2594 /* The operand must be 16 byte aligned */
2595 if (ptr & 0xf) {
2596 raise_exception_ra(env, EXCP0D_GPF, ra);
2597 }
2598
2599 do_xsave_fpu(env, ptr, ra);
2600
2601 if (env->cr[4] & CR4_OSFXSR_MASK) {
2602 do_xsave_mxcsr(env, ptr, ra);
2603 /* Fast FXSAVE leaves out the XMM registers */
2604 if (!(env->efer & MSR_EFER_FFXSR)
2605 || (env->hflags & HF_CPL_MASK)
2606 || !(env->hflags & HF_LMA_MASK)) {
2607 do_xsave_sse(env, ptr, ra);
2608 }
2609 }
2610 }
2611
2612 static uint64_t get_xinuse(CPUX86State *env)
2613 {
2614 uint64_t inuse = -1;
2615
2616 /* For the most part, we don't track XINUSE. We could calculate it
2617 here for all components, but it's probably less work to simply
2618 indicate in use. That said, the state of BNDREGS is important
2619 enough to track in HFLAGS, so we might as well use that here. */
2620 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2621 inuse &= ~XSTATE_BNDREGS_MASK;
2622 }
2623 return inuse;
2624 }
2625
2626 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2627 uint64_t inuse, uint64_t opt, uintptr_t ra)
2628 {
2629 uint64_t old_bv, new_bv;
2630
2631 /* The OS must have enabled XSAVE. */
2632 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2633 raise_exception_ra(env, EXCP06_ILLOP, ra);
2634 }
2635
2636 /* The operand must be 64 byte aligned. */
2637 if (ptr & 63) {
2638 raise_exception_ra(env, EXCP0D_GPF, ra);
2639 }
2640
2641 /* Never save anything not enabled by XCR0. */
2642 rfbm &= env->xcr0;
2643 opt &= rfbm;
2644
2645 if (opt & XSTATE_FP_MASK) {
2646 do_xsave_fpu(env, ptr, ra);
2647 }
2648 if (rfbm & XSTATE_SSE_MASK) {
2649 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2650 do_xsave_mxcsr(env, ptr, ra);
2651 }
2652 if (opt & XSTATE_SSE_MASK) {
2653 do_xsave_sse(env, ptr, ra);
2654 }
2655 if (opt & XSTATE_BNDREGS_MASK) {
2656 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
2657 }
2658 if (opt & XSTATE_BNDCSR_MASK) {
2659 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
2660 }
2661 if (opt & XSTATE_PKRU_MASK) {
2662 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
2663 }
2664
2665 /* Update the XSTATE_BV field. */
2666 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2667 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2668 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
2669 }
2670
2671 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2672 {
2673 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
2674 }
2675
2676 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2677 {
2678 uint64_t inuse = get_xinuse(env);
2679 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2680 }
2681
2682 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2683 {
2684 int i, fpuc, fpus, fptag;
2685 target_ulong addr;
2686
2687 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
2688 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
2689 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
2690 cpu_set_fpuc(env, fpuc);
2691 cpu_set_fpus(env, fpus);
2692 fptag ^= 0xff;
2693 for (i = 0; i < 8; i++) {
2694 env->fptags[i] = ((fptag >> i) & 1);
2695 }
2696
2697 addr = ptr + XO(legacy.fpregs);
2698 for (i = 0; i < 8; i++) {
2699 floatx80 tmp = helper_fldt(env, addr, ra);
2700 ST(i) = tmp;
2701 addr += 16;
2702 }
2703 }
2704
2705 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2706 {
2707 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
2708 }
2709
2710 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2711 {
2712 int i, nb_xmm_regs;
2713 target_ulong addr;
2714
2715 if (env->hflags & HF_CS64_MASK) {
2716 nb_xmm_regs = 16;
2717 } else {
2718 nb_xmm_regs = 8;
2719 }
2720
2721 addr = ptr + XO(legacy.xmm_regs);
2722 for (i = 0; i < nb_xmm_regs; i++) {
2723 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
2724 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
2725 addr += 16;
2726 }
2727 }
2728
2729 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2730 {
2731 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2732 int i;
2733
2734 for (i = 0; i < 4; i++, addr += 16) {
2735 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
2736 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
2737 }
2738 }
2739
2740 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2741 {
2742 /* FIXME: Extend highest implemented bit of linear address. */
2743 env->bndcs_regs.cfgu
2744 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
2745 env->bndcs_regs.sts
2746 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
2747 }
2748
2749 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2750 {
2751 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
2752 }
2753
2754 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2755 {
2756 uintptr_t ra = GETPC();
2757
2758 /* The operand must be 16 byte aligned */
2759 if (ptr & 0xf) {
2760 raise_exception_ra(env, EXCP0D_GPF, ra);
2761 }
2762
2763 do_xrstor_fpu(env, ptr, ra);
2764
2765 if (env->cr[4] & CR4_OSFXSR_MASK) {
2766 do_xrstor_mxcsr(env, ptr, ra);
2767 /* Fast FXRSTOR leaves out the XMM registers */
2768 if (!(env->efer & MSR_EFER_FFXSR)
2769 || (env->hflags & HF_CPL_MASK)
2770 || !(env->hflags & HF_LMA_MASK)) {
2771 do_xrstor_sse(env, ptr, ra);
2772 }
2773 }
2774 }
2775
2776 #if defined(CONFIG_USER_ONLY)
2777 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
2778 {
2779 helper_fxsave(env, ptr);
2780 }
2781
2782 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
2783 {
2784 helper_fxrstor(env, ptr);
2785 }
2786 #endif
2787
2788 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2789 {
2790 uintptr_t ra = GETPC();
2791 uint64_t xstate_bv, xcomp_bv, reserve0;
2792
2793 rfbm &= env->xcr0;
2794
2795 /* The OS must have enabled XSAVE. */
2796 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2797 raise_exception_ra(env, EXCP06_ILLOP, ra);
2798 }
2799
2800 /* The operand must be 64 byte aligned. */
2801 if (ptr & 63) {
2802 raise_exception_ra(env, EXCP0D_GPF, ra);
2803 }
2804
2805 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2806
2807 if ((int64_t)xstate_bv < 0) {
2808 /* FIXME: Compact form. */
2809 raise_exception_ra(env, EXCP0D_GPF, ra);
2810 }
2811
2812 /* Standard form. */
2813
2814 /* The XSTATE_BV field must not set bits not present in XCR0. */
2815 if (xstate_bv & ~env->xcr0) {
2816 raise_exception_ra(env, EXCP0D_GPF, ra);
2817 }
2818
2819 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
2820 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2821 describes only XCOMP_BV, but the description of the standard form
2822 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2823 includes the next 64-bit field. */
2824 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
2825 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
2826 if (xcomp_bv || reserve0) {
2827 raise_exception_ra(env, EXCP0D_GPF, ra);
2828 }
2829
2830 if (rfbm & XSTATE_FP_MASK) {
2831 if (xstate_bv & XSTATE_FP_MASK) {
2832 do_xrstor_fpu(env, ptr, ra);
2833 } else {
2834 helper_fninit(env);
2835 memset(env->fpregs, 0, sizeof(env->fpregs));
2836 }
2837 }
2838 if (rfbm & XSTATE_SSE_MASK) {
2839 /* Note that the standard form of XRSTOR loads MXCSR from memory
2840 whether or not the XSTATE_BV bit is set. */
2841 do_xrstor_mxcsr(env, ptr, ra);
2842 if (xstate_bv & XSTATE_SSE_MASK) {
2843 do_xrstor_sse(env, ptr, ra);
2844 } else {
2845 /* ??? When AVX is implemented, we may have to be more
2846 selective in the clearing. */
2847 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
2848 }
2849 }
2850 if (rfbm & XSTATE_BNDREGS_MASK) {
2851 if (xstate_bv & XSTATE_BNDREGS_MASK) {
2852 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
2853 env->hflags |= HF_MPX_IU_MASK;
2854 } else {
2855 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
2856 env->hflags &= ~HF_MPX_IU_MASK;
2857 }
2858 }
2859 if (rfbm & XSTATE_BNDCSR_MASK) {
2860 if (xstate_bv & XSTATE_BNDCSR_MASK) {
2861 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
2862 } else {
2863 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
2864 }
2865 cpu_sync_bndcs_hflags(env);
2866 }
2867 if (rfbm & XSTATE_PKRU_MASK) {
2868 uint64_t old_pkru = env->pkru;
2869 if (xstate_bv & XSTATE_PKRU_MASK) {
2870 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
2871 } else {
2872 env->pkru = 0;
2873 }
2874 if (env->pkru != old_pkru) {
2875 CPUState *cs = env_cpu(env);
2876 tlb_flush(cs);
2877 }
2878 }
2879 }
2880
2881 #undef XO
2882
2883 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
2884 {
2885 /* The OS must have enabled XSAVE. */
2886 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2887 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2888 }
2889
2890 switch (ecx) {
2891 case 0:
2892 return env->xcr0;
2893 case 1:
2894 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
2895 return env->xcr0 & get_xinuse(env);
2896 }
2897 break;
2898 }
2899 raise_exception_ra(env, EXCP0D_GPF, GETPC());
2900 }
2901
2902 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
2903 {
2904 uint32_t dummy, ena_lo, ena_hi;
2905 uint64_t ena;
2906
2907 /* The OS must have enabled XSAVE. */
2908 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2909 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2910 }
2911
2912 /* Only XCR0 is defined at present; the FPU may not be disabled. */
2913 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
2914 goto do_gpf;
2915 }
2916
2917 /* Disallow enabling unimplemented features. */
2918 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
2919 ena = ((uint64_t)ena_hi << 32) | ena_lo;
2920 if (mask & ~ena) {
2921 goto do_gpf;
2922 }
2923
2924 /* Disallow enabling only half of MPX. */
2925 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
2926 & XSTATE_BNDCSR_MASK) {
2927 goto do_gpf;
2928 }
2929
2930 env->xcr0 = mask;
2931 cpu_sync_bndcs_hflags(env);
2932 return;
2933
2934 do_gpf:
2935 raise_exception_ra(env, EXCP0D_GPF, GETPC());
2936 }
2937
2938 /* MMX/SSE */
2939 /* XXX: optimize by storing fptt and fptags in the static cpu state */
2940
2941 #define SSE_DAZ 0x0040
2942 #define SSE_RC_MASK 0x6000
2943 #define SSE_RC_NEAR 0x0000
2944 #define SSE_RC_DOWN 0x2000
2945 #define SSE_RC_UP 0x4000
2946 #define SSE_RC_CHOP 0x6000
2947 #define SSE_FZ 0x8000
2948
2949 void update_mxcsr_status(CPUX86State *env)
2950 {
2951 uint32_t mxcsr = env->mxcsr;
2952 int rnd_type;
2953
2954 /* set rounding mode */
2955 switch (mxcsr & SSE_RC_MASK) {
2956 default:
2957 case SSE_RC_NEAR:
2958 rnd_type = float_round_nearest_even;
2959 break;
2960 case SSE_RC_DOWN:
2961 rnd_type = float_round_down;
2962 break;
2963 case SSE_RC_UP:
2964 rnd_type = float_round_up;
2965 break;
2966 case SSE_RC_CHOP:
2967 rnd_type = float_round_to_zero;
2968 break;
2969 }
2970 set_float_rounding_mode(rnd_type, &env->sse_status);
2971
2972 /* Set exception flags. */
2973 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
2974 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
2975 (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
2976 (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
2977 (mxcsr & FPUS_PE ? float_flag_inexact : 0),
2978 &env->sse_status);
2979
2980 /* set denormals are zero */
2981 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
2982
2983 /* set flush to zero */
2984 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
2985 }
2986
2987 void update_mxcsr_from_sse_status(CPUX86State *env)
2988 {
2989 if (tcg_enabled()) {
2990 uint8_t flags = get_float_exception_flags(&env->sse_status);
2991 /*
2992 * The MXCSR denormal flag has opposite semantics to
2993 * float_flag_input_denormal (the softfloat code sets that flag
2994 * only when flushing input denormals to zero, but SSE sets it
2995 * only when not flushing them to zero), so is not converted
2996 * here.
2997 */
2998 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
2999 (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
3000 (flags & float_flag_overflow ? FPUS_OE : 0) |
3001 (flags & float_flag_underflow ? FPUS_UE : 0) |
3002 (flags & float_flag_inexact ? FPUS_PE : 0) |
3003 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
3004 0));
3005 }
3006 }
3007
3008 void helper_update_mxcsr(CPUX86State *env)
3009 {
3010 update_mxcsr_from_sse_status(env);
3011 }
3012
3013 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
3014 {
3015 cpu_set_mxcsr(env, val);
3016 }
3017
3018 void helper_enter_mmx(CPUX86State *env)
3019 {
3020 env->fpstt = 0;
3021 *(uint32_t *)(env->fptags) = 0;
3022 *(uint32_t *)(env->fptags + 4) = 0;
3023 }
3024
3025 void helper_emms(CPUX86State *env)
3026 {
3027 /* set to empty state */
3028 *(uint32_t *)(env->fptags) = 0x01010101;
3029 *(uint32_t *)(env->fptags + 4) = 0x01010101;
3030 }
3031
3032 /* XXX: suppress */
3033 void helper_movq(CPUX86State *env, void *d, void *s)
3034 {
3035 *(uint64_t *)d = *(uint64_t *)s;
3036 }
3037
3038 #define SHIFT 0
3039 #include "ops_sse.h"
3040
3041 #define SHIFT 1
3042 #include "ops_sse.h"