target/i386: reimplement f2xm1 using floatx80 operations
[qemu.git] / target / i386 / fpu_helper.c
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
28 #include "fpu/softfloat-macros.h"
29
30 #ifdef CONFIG_SOFTMMU
31 #include "hw/irq.h"
32 #endif
33
34 #define FPU_RC_MASK 0xc00
35 #define FPU_RC_NEAR 0x000
36 #define FPU_RC_DOWN 0x400
37 #define FPU_RC_UP 0x800
38 #define FPU_RC_CHOP 0xc00
39
40 #define MAXTAN 9223372036854775808.0
41
42 /* the following deal with x86 long double-precision numbers */
43 #define MAXEXPD 0x7fff
44 #define EXPBIAS 16383
45 #define EXPD(fp) (fp.l.upper & 0x7fff)
46 #define SIGND(fp) ((fp.l.upper) & 0x8000)
47 #define MANTD(fp) (fp.l.lower)
48 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
49
50 #define FPUS_IE (1 << 0)
51 #define FPUS_DE (1 << 1)
52 #define FPUS_ZE (1 << 2)
53 #define FPUS_OE (1 << 3)
54 #define FPUS_UE (1 << 4)
55 #define FPUS_PE (1 << 5)
56 #define FPUS_SF (1 << 6)
57 #define FPUS_SE (1 << 7)
58 #define FPUS_B (1 << 15)
59
60 #define FPUC_EM 0x3f
61
62 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
63 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
64 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
65 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
66 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
67 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
68 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
69 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
70
71 #if !defined(CONFIG_USER_ONLY)
72 static qemu_irq ferr_irq;
73
74 void x86_register_ferr_irq(qemu_irq irq)
75 {
76 ferr_irq = irq;
77 }
78
79 static void cpu_clear_ignne(void)
80 {
81 CPUX86State *env = &X86_CPU(first_cpu)->env;
82 env->hflags2 &= ~HF2_IGNNE_MASK;
83 }
84
85 void cpu_set_ignne(void)
86 {
87 CPUX86State *env = &X86_CPU(first_cpu)->env;
88 env->hflags2 |= HF2_IGNNE_MASK;
89 /*
90 * We get here in response to a write to port F0h. The chipset should
91 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
92 * cleared, because FERR# and FP_IRQ are two separate pins on real
93 * hardware. However, we don't model FERR# as a qemu_irq, so we just
94 * do directly what the chipset would do, i.e. deassert FP_IRQ.
95 */
96 qemu_irq_lower(ferr_irq);
97 }
98 #endif
99
100
101 static inline void fpush(CPUX86State *env)
102 {
103 env->fpstt = (env->fpstt - 1) & 7;
104 env->fptags[env->fpstt] = 0; /* validate stack entry */
105 }
106
107 static inline void fpop(CPUX86State *env)
108 {
109 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
110 env->fpstt = (env->fpstt + 1) & 7;
111 }
112
113 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
114 uintptr_t retaddr)
115 {
116 CPU_LDoubleU temp;
117
118 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
119 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
120 return temp.d;
121 }
122
123 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
124 uintptr_t retaddr)
125 {
126 CPU_LDoubleU temp;
127
128 temp.d = f;
129 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
130 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
131 }
132
133 /* x87 FPU helpers */
134
135 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
136 {
137 union {
138 float64 f64;
139 double d;
140 } u;
141
142 u.f64 = floatx80_to_float64(a, &env->fp_status);
143 return u.d;
144 }
145
146 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
147 {
148 union {
149 float64 f64;
150 double d;
151 } u;
152
153 u.d = a;
154 return float64_to_floatx80(u.f64, &env->fp_status);
155 }
156
157 static void fpu_set_exception(CPUX86State *env, int mask)
158 {
159 env->fpus |= mask;
160 if (env->fpus & (~env->fpuc & FPUC_EM)) {
161 env->fpus |= FPUS_SE | FPUS_B;
162 }
163 }
164
165 static inline uint8_t save_exception_flags(CPUX86State *env)
166 {
167 uint8_t old_flags = get_float_exception_flags(&env->fp_status);
168 set_float_exception_flags(0, &env->fp_status);
169 return old_flags;
170 }
171
172 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
173 {
174 uint8_t new_flags = get_float_exception_flags(&env->fp_status);
175 float_raise(old_flags, &env->fp_status);
176 fpu_set_exception(env,
177 ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
178 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
179 (new_flags & float_flag_overflow ? FPUS_OE : 0) |
180 (new_flags & float_flag_underflow ? FPUS_UE : 0) |
181 (new_flags & float_flag_inexact ? FPUS_PE : 0) |
182 (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
183 }
184
185 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
186 {
187 uint8_t old_flags = save_exception_flags(env);
188 floatx80 ret = floatx80_div(a, b, &env->fp_status);
189 merge_exception_flags(env, old_flags);
190 return ret;
191 }
192
193 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
194 {
195 if (env->cr[0] & CR0_NE_MASK) {
196 raise_exception_ra(env, EXCP10_COPR, retaddr);
197 }
198 #if !defined(CONFIG_USER_ONLY)
199 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
200 qemu_irq_raise(ferr_irq);
201 }
202 #endif
203 }
204
205 void helper_flds_FT0(CPUX86State *env, uint32_t val)
206 {
207 uint8_t old_flags = save_exception_flags(env);
208 union {
209 float32 f;
210 uint32_t i;
211 } u;
212
213 u.i = val;
214 FT0 = float32_to_floatx80(u.f, &env->fp_status);
215 merge_exception_flags(env, old_flags);
216 }
217
218 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
219 {
220 uint8_t old_flags = save_exception_flags(env);
221 union {
222 float64 f;
223 uint64_t i;
224 } u;
225
226 u.i = val;
227 FT0 = float64_to_floatx80(u.f, &env->fp_status);
228 merge_exception_flags(env, old_flags);
229 }
230
231 void helper_fildl_FT0(CPUX86State *env, int32_t val)
232 {
233 FT0 = int32_to_floatx80(val, &env->fp_status);
234 }
235
236 void helper_flds_ST0(CPUX86State *env, uint32_t val)
237 {
238 uint8_t old_flags = save_exception_flags(env);
239 int new_fpstt;
240 union {
241 float32 f;
242 uint32_t i;
243 } u;
244
245 new_fpstt = (env->fpstt - 1) & 7;
246 u.i = val;
247 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
248 env->fpstt = new_fpstt;
249 env->fptags[new_fpstt] = 0; /* validate stack entry */
250 merge_exception_flags(env, old_flags);
251 }
252
253 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
254 {
255 uint8_t old_flags = save_exception_flags(env);
256 int new_fpstt;
257 union {
258 float64 f;
259 uint64_t i;
260 } u;
261
262 new_fpstt = (env->fpstt - 1) & 7;
263 u.i = val;
264 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
265 env->fpstt = new_fpstt;
266 env->fptags[new_fpstt] = 0; /* validate stack entry */
267 merge_exception_flags(env, old_flags);
268 }
269
270 void helper_fildl_ST0(CPUX86State *env, int32_t val)
271 {
272 int new_fpstt;
273
274 new_fpstt = (env->fpstt - 1) & 7;
275 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
276 env->fpstt = new_fpstt;
277 env->fptags[new_fpstt] = 0; /* validate stack entry */
278 }
279
280 void helper_fildll_ST0(CPUX86State *env, int64_t val)
281 {
282 int new_fpstt;
283
284 new_fpstt = (env->fpstt - 1) & 7;
285 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
286 env->fpstt = new_fpstt;
287 env->fptags[new_fpstt] = 0; /* validate stack entry */
288 }
289
290 uint32_t helper_fsts_ST0(CPUX86State *env)
291 {
292 uint8_t old_flags = save_exception_flags(env);
293 union {
294 float32 f;
295 uint32_t i;
296 } u;
297
298 u.f = floatx80_to_float32(ST0, &env->fp_status);
299 merge_exception_flags(env, old_flags);
300 return u.i;
301 }
302
303 uint64_t helper_fstl_ST0(CPUX86State *env)
304 {
305 uint8_t old_flags = save_exception_flags(env);
306 union {
307 float64 f;
308 uint64_t i;
309 } u;
310
311 u.f = floatx80_to_float64(ST0, &env->fp_status);
312 merge_exception_flags(env, old_flags);
313 return u.i;
314 }
315
316 int32_t helper_fist_ST0(CPUX86State *env)
317 {
318 uint8_t old_flags = save_exception_flags(env);
319 int32_t val;
320
321 val = floatx80_to_int32(ST0, &env->fp_status);
322 if (val != (int16_t)val) {
323 set_float_exception_flags(float_flag_invalid, &env->fp_status);
324 val = -32768;
325 }
326 merge_exception_flags(env, old_flags);
327 return val;
328 }
329
330 int32_t helper_fistl_ST0(CPUX86State *env)
331 {
332 uint8_t old_flags = save_exception_flags(env);
333 int32_t val;
334
335 val = floatx80_to_int32(ST0, &env->fp_status);
336 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
337 val = 0x80000000;
338 }
339 merge_exception_flags(env, old_flags);
340 return val;
341 }
342
343 int64_t helper_fistll_ST0(CPUX86State *env)
344 {
345 uint8_t old_flags = save_exception_flags(env);
346 int64_t val;
347
348 val = floatx80_to_int64(ST0, &env->fp_status);
349 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
350 val = 0x8000000000000000ULL;
351 }
352 merge_exception_flags(env, old_flags);
353 return val;
354 }
355
356 int32_t helper_fistt_ST0(CPUX86State *env)
357 {
358 uint8_t old_flags = save_exception_flags(env);
359 int32_t val;
360
361 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
362 if (val != (int16_t)val) {
363 set_float_exception_flags(float_flag_invalid, &env->fp_status);
364 val = -32768;
365 }
366 merge_exception_flags(env, old_flags);
367 return val;
368 }
369
370 int32_t helper_fisttl_ST0(CPUX86State *env)
371 {
372 uint8_t old_flags = save_exception_flags(env);
373 int32_t val;
374
375 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
376 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
377 val = 0x80000000;
378 }
379 merge_exception_flags(env, old_flags);
380 return val;
381 }
382
383 int64_t helper_fisttll_ST0(CPUX86State *env)
384 {
385 uint8_t old_flags = save_exception_flags(env);
386 int64_t val;
387
388 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
389 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
390 val = 0x8000000000000000ULL;
391 }
392 merge_exception_flags(env, old_flags);
393 return val;
394 }
395
396 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
397 {
398 int new_fpstt;
399
400 new_fpstt = (env->fpstt - 1) & 7;
401 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
402 env->fpstt = new_fpstt;
403 env->fptags[new_fpstt] = 0; /* validate stack entry */
404 }
405
406 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
407 {
408 helper_fstt(env, ST0, ptr, GETPC());
409 }
410
411 void helper_fpush(CPUX86State *env)
412 {
413 fpush(env);
414 }
415
416 void helper_fpop(CPUX86State *env)
417 {
418 fpop(env);
419 }
420
421 void helper_fdecstp(CPUX86State *env)
422 {
423 env->fpstt = (env->fpstt - 1) & 7;
424 env->fpus &= ~0x4700;
425 }
426
427 void helper_fincstp(CPUX86State *env)
428 {
429 env->fpstt = (env->fpstt + 1) & 7;
430 env->fpus &= ~0x4700;
431 }
432
433 /* FPU move */
434
435 void helper_ffree_STN(CPUX86State *env, int st_index)
436 {
437 env->fptags[(env->fpstt + st_index) & 7] = 1;
438 }
439
440 void helper_fmov_ST0_FT0(CPUX86State *env)
441 {
442 ST0 = FT0;
443 }
444
445 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
446 {
447 FT0 = ST(st_index);
448 }
449
450 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
451 {
452 ST0 = ST(st_index);
453 }
454
455 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
456 {
457 ST(st_index) = ST0;
458 }
459
460 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
461 {
462 floatx80 tmp;
463
464 tmp = ST(st_index);
465 ST(st_index) = ST0;
466 ST0 = tmp;
467 }
468
469 /* FPU operations */
470
471 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
472
473 void helper_fcom_ST0_FT0(CPUX86State *env)
474 {
475 uint8_t old_flags = save_exception_flags(env);
476 FloatRelation ret;
477
478 ret = floatx80_compare(ST0, FT0, &env->fp_status);
479 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
480 merge_exception_flags(env, old_flags);
481 }
482
483 void helper_fucom_ST0_FT0(CPUX86State *env)
484 {
485 uint8_t old_flags = save_exception_flags(env);
486 FloatRelation ret;
487
488 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
489 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
490 merge_exception_flags(env, old_flags);
491 }
492
493 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
494
495 void helper_fcomi_ST0_FT0(CPUX86State *env)
496 {
497 uint8_t old_flags = save_exception_flags(env);
498 int eflags;
499 FloatRelation ret;
500
501 ret = floatx80_compare(ST0, FT0, &env->fp_status);
502 eflags = cpu_cc_compute_all(env, CC_OP);
503 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
504 CC_SRC = eflags;
505 merge_exception_flags(env, old_flags);
506 }
507
508 void helper_fucomi_ST0_FT0(CPUX86State *env)
509 {
510 uint8_t old_flags = save_exception_flags(env);
511 int eflags;
512 FloatRelation ret;
513
514 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
515 eflags = cpu_cc_compute_all(env, CC_OP);
516 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
517 CC_SRC = eflags;
518 merge_exception_flags(env, old_flags);
519 }
520
521 void helper_fadd_ST0_FT0(CPUX86State *env)
522 {
523 uint8_t old_flags = save_exception_flags(env);
524 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
525 merge_exception_flags(env, old_flags);
526 }
527
528 void helper_fmul_ST0_FT0(CPUX86State *env)
529 {
530 uint8_t old_flags = save_exception_flags(env);
531 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
532 merge_exception_flags(env, old_flags);
533 }
534
535 void helper_fsub_ST0_FT0(CPUX86State *env)
536 {
537 uint8_t old_flags = save_exception_flags(env);
538 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
539 merge_exception_flags(env, old_flags);
540 }
541
542 void helper_fsubr_ST0_FT0(CPUX86State *env)
543 {
544 uint8_t old_flags = save_exception_flags(env);
545 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
546 merge_exception_flags(env, old_flags);
547 }
548
549 void helper_fdiv_ST0_FT0(CPUX86State *env)
550 {
551 ST0 = helper_fdiv(env, ST0, FT0);
552 }
553
554 void helper_fdivr_ST0_FT0(CPUX86State *env)
555 {
556 ST0 = helper_fdiv(env, FT0, ST0);
557 }
558
559 /* fp operations between STN and ST0 */
560
561 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
562 {
563 uint8_t old_flags = save_exception_flags(env);
564 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
565 merge_exception_flags(env, old_flags);
566 }
567
568 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
569 {
570 uint8_t old_flags = save_exception_flags(env);
571 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
572 merge_exception_flags(env, old_flags);
573 }
574
575 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
576 {
577 uint8_t old_flags = save_exception_flags(env);
578 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
579 merge_exception_flags(env, old_flags);
580 }
581
582 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
583 {
584 uint8_t old_flags = save_exception_flags(env);
585 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
586 merge_exception_flags(env, old_flags);
587 }
588
589 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
590 {
591 floatx80 *p;
592
593 p = &ST(st_index);
594 *p = helper_fdiv(env, *p, ST0);
595 }
596
597 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
598 {
599 floatx80 *p;
600
601 p = &ST(st_index);
602 *p = helper_fdiv(env, ST0, *p);
603 }
604
605 /* misc FPU operations */
606 void helper_fchs_ST0(CPUX86State *env)
607 {
608 ST0 = floatx80_chs(ST0);
609 }
610
611 void helper_fabs_ST0(CPUX86State *env)
612 {
613 ST0 = floatx80_abs(ST0);
614 }
615
616 void helper_fld1_ST0(CPUX86State *env)
617 {
618 ST0 = floatx80_one;
619 }
620
621 void helper_fldl2t_ST0(CPUX86State *env)
622 {
623 switch (env->fpuc & FPU_RC_MASK) {
624 case FPU_RC_UP:
625 ST0 = floatx80_l2t_u;
626 break;
627 default:
628 ST0 = floatx80_l2t;
629 break;
630 }
631 }
632
633 void helper_fldl2e_ST0(CPUX86State *env)
634 {
635 switch (env->fpuc & FPU_RC_MASK) {
636 case FPU_RC_DOWN:
637 case FPU_RC_CHOP:
638 ST0 = floatx80_l2e_d;
639 break;
640 default:
641 ST0 = floatx80_l2e;
642 break;
643 }
644 }
645
646 void helper_fldpi_ST0(CPUX86State *env)
647 {
648 switch (env->fpuc & FPU_RC_MASK) {
649 case FPU_RC_DOWN:
650 case FPU_RC_CHOP:
651 ST0 = floatx80_pi_d;
652 break;
653 default:
654 ST0 = floatx80_pi;
655 break;
656 }
657 }
658
659 void helper_fldlg2_ST0(CPUX86State *env)
660 {
661 switch (env->fpuc & FPU_RC_MASK) {
662 case FPU_RC_DOWN:
663 case FPU_RC_CHOP:
664 ST0 = floatx80_lg2_d;
665 break;
666 default:
667 ST0 = floatx80_lg2;
668 break;
669 }
670 }
671
672 void helper_fldln2_ST0(CPUX86State *env)
673 {
674 switch (env->fpuc & FPU_RC_MASK) {
675 case FPU_RC_DOWN:
676 case FPU_RC_CHOP:
677 ST0 = floatx80_ln2_d;
678 break;
679 default:
680 ST0 = floatx80_ln2;
681 break;
682 }
683 }
684
685 void helper_fldz_ST0(CPUX86State *env)
686 {
687 ST0 = floatx80_zero;
688 }
689
690 void helper_fldz_FT0(CPUX86State *env)
691 {
692 FT0 = floatx80_zero;
693 }
694
695 uint32_t helper_fnstsw(CPUX86State *env)
696 {
697 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
698 }
699
700 uint32_t helper_fnstcw(CPUX86State *env)
701 {
702 return env->fpuc;
703 }
704
705 void update_fp_status(CPUX86State *env)
706 {
707 int rnd_type;
708
709 /* set rounding mode */
710 switch (env->fpuc & FPU_RC_MASK) {
711 default:
712 case FPU_RC_NEAR:
713 rnd_type = float_round_nearest_even;
714 break;
715 case FPU_RC_DOWN:
716 rnd_type = float_round_down;
717 break;
718 case FPU_RC_UP:
719 rnd_type = float_round_up;
720 break;
721 case FPU_RC_CHOP:
722 rnd_type = float_round_to_zero;
723 break;
724 }
725 set_float_rounding_mode(rnd_type, &env->fp_status);
726 switch ((env->fpuc >> 8) & 3) {
727 case 0:
728 rnd_type = 32;
729 break;
730 case 2:
731 rnd_type = 64;
732 break;
733 case 3:
734 default:
735 rnd_type = 80;
736 break;
737 }
738 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
739 }
740
741 void helper_fldcw(CPUX86State *env, uint32_t val)
742 {
743 cpu_set_fpuc(env, val);
744 }
745
746 void helper_fclex(CPUX86State *env)
747 {
748 env->fpus &= 0x7f00;
749 }
750
751 void helper_fwait(CPUX86State *env)
752 {
753 if (env->fpus & FPUS_SE) {
754 fpu_raise_exception(env, GETPC());
755 }
756 }
757
758 void helper_fninit(CPUX86State *env)
759 {
760 env->fpus = 0;
761 env->fpstt = 0;
762 cpu_set_fpuc(env, 0x37f);
763 env->fptags[0] = 1;
764 env->fptags[1] = 1;
765 env->fptags[2] = 1;
766 env->fptags[3] = 1;
767 env->fptags[4] = 1;
768 env->fptags[5] = 1;
769 env->fptags[6] = 1;
770 env->fptags[7] = 1;
771 }
772
773 /* BCD ops */
774
775 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
776 {
777 floatx80 tmp;
778 uint64_t val;
779 unsigned int v;
780 int i;
781
782 val = 0;
783 for (i = 8; i >= 0; i--) {
784 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
785 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
786 }
787 tmp = int64_to_floatx80(val, &env->fp_status);
788 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
789 tmp = floatx80_chs(tmp);
790 }
791 fpush(env);
792 ST0 = tmp;
793 }
794
795 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
796 {
797 uint8_t old_flags = save_exception_flags(env);
798 int v;
799 target_ulong mem_ref, mem_end;
800 int64_t val;
801 CPU_LDoubleU temp;
802
803 temp.d = ST0;
804
805 val = floatx80_to_int64(ST0, &env->fp_status);
806 mem_ref = ptr;
807 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
808 set_float_exception_flags(float_flag_invalid, &env->fp_status);
809 while (mem_ref < ptr + 7) {
810 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
811 }
812 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
813 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
814 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
815 merge_exception_flags(env, old_flags);
816 return;
817 }
818 mem_end = mem_ref + 9;
819 if (SIGND(temp)) {
820 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
821 val = -val;
822 } else {
823 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
824 }
825 while (mem_ref < mem_end) {
826 if (val == 0) {
827 break;
828 }
829 v = val % 100;
830 val = val / 100;
831 v = ((v / 10) << 4) | (v % 10);
832 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
833 }
834 while (mem_ref < mem_end) {
835 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
836 }
837 merge_exception_flags(env, old_flags);
838 }
839
840 /* 128-bit significand of log(2). */
841 #define ln2_sig_high 0xb17217f7d1cf79abULL
842 #define ln2_sig_low 0xc9e3b39803f2f6afULL
843
844 /*
845 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
846 * the interval [-1/64, 1/64].
847 */
848 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
849 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
850 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
851 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
852 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
853 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
854 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
855 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
856 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
857
858 struct f2xm1_data {
859 /*
860 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
861 * are very close to exact floatx80 values.
862 */
863 floatx80 t;
864 /* The value of 2^t. */
865 floatx80 exp2;
866 /* The value of 2^t - 1. */
867 floatx80 exp2m1;
868 };
869
870 static const struct f2xm1_data f2xm1_table[65] = {
871 { make_floatx80(0xbfff, 0x8000000000000000ULL),
872 make_floatx80(0x3ffe, 0x8000000000000000ULL),
873 make_floatx80(0xbffe, 0x8000000000000000ULL) },
874 { make_floatx80(0xbffe, 0xf800000000002e7eULL),
875 make_floatx80(0x3ffe, 0x82cd8698ac2b9160ULL),
876 make_floatx80(0xbffd, 0xfa64f2cea7a8dd40ULL) },
877 { make_floatx80(0xbffe, 0xefffffffffffe960ULL),
878 make_floatx80(0x3ffe, 0x85aac367cc488345ULL),
879 make_floatx80(0xbffd, 0xf4aa7930676ef976ULL) },
880 { make_floatx80(0xbffe, 0xe800000000006f10ULL),
881 make_floatx80(0x3ffe, 0x88980e8092da5c14ULL),
882 make_floatx80(0xbffd, 0xeecfe2feda4b47d8ULL) },
883 { make_floatx80(0xbffe, 0xe000000000008a45ULL),
884 make_floatx80(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
885 make_floatx80(0xbffd, 0xe8d47c382ae8bab6ULL) },
886 { make_floatx80(0xbffe, 0xd7ffffffffff8a9eULL),
887 make_floatx80(0x3ffe, 0x8ea4398b45cd8116ULL),
888 make_floatx80(0xbffd, 0xe2b78ce97464fdd4ULL) },
889 { make_floatx80(0xbffe, 0xd0000000000019a0ULL),
890 make_floatx80(0x3ffe, 0x91c3d373ab11b919ULL),
891 make_floatx80(0xbffd, 0xdc785918a9dc8dceULL) },
892 { make_floatx80(0xbffe, 0xc7ffffffffff14dfULL),
893 make_floatx80(0x3ffe, 0x94f4efa8fef76836ULL),
894 make_floatx80(0xbffd, 0xd61620ae02112f94ULL) },
895 { make_floatx80(0xbffe, 0xc000000000006530ULL),
896 make_floatx80(0x3ffe, 0x9837f0518db87fbbULL),
897 make_floatx80(0xbffd, 0xcf901f5ce48f008aULL) },
898 { make_floatx80(0xbffe, 0xb7ffffffffff1723ULL),
899 make_floatx80(0x3ffe, 0x9b8d39b9d54eb74cULL),
900 make_floatx80(0xbffd, 0xc8e58c8c55629168ULL) },
901 { make_floatx80(0xbffe, 0xb00000000000b5e1ULL),
902 make_floatx80(0x3ffe, 0x9ef5326091a0c366ULL),
903 make_floatx80(0xbffd, 0xc2159b3edcbe7934ULL) },
904 { make_floatx80(0xbffe, 0xa800000000006f8aULL),
905 make_floatx80(0x3ffe, 0xa27043030c49370aULL),
906 make_floatx80(0xbffd, 0xbb1f79f9e76d91ecULL) },
907 { make_floatx80(0xbffe, 0x9fffffffffff816aULL),
908 make_floatx80(0x3ffe, 0xa5fed6a9b15171cfULL),
909 make_floatx80(0xbffd, 0xb40252ac9d5d1c62ULL) },
910 { make_floatx80(0xbffe, 0x97ffffffffffb621ULL),
911 make_floatx80(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
912 make_floatx80(0xbffd, 0xacbd4a962b079e34ULL) },
913 { make_floatx80(0xbffe, 0x8fffffffffff162bULL),
914 make_floatx80(0x3ffe, 0xad583eea42a1b886ULL),
915 make_floatx80(0xbffd, 0xa54f822b7abc8ef4ULL) },
916 { make_floatx80(0xbffe, 0x87ffffffffff4d34ULL),
917 make_floatx80(0x3ffe, 0xb123f581d2ac7b51ULL),
918 make_floatx80(0xbffd, 0x9db814fc5aa7095eULL) },
919 { make_floatx80(0xbffe, 0x800000000000227dULL),
920 make_floatx80(0x3ffe, 0xb504f333f9de539dULL),
921 make_floatx80(0xbffd, 0x95f619980c4358c6ULL) },
922 { make_floatx80(0xbffd, 0xefffffffffff3978ULL),
923 make_floatx80(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
924 make_floatx80(0xbffd, 0x8e08a1713a085ebeULL) },
925 { make_floatx80(0xbffd, 0xe00000000000df81ULL),
926 make_floatx80(0x3ffe, 0xbd08a39f580bfd8cULL),
927 make_floatx80(0xbffd, 0x85eeb8c14fe804e8ULL) },
928 { make_floatx80(0xbffd, 0xd00000000000bccfULL),
929 make_floatx80(0x3ffe, 0xc12c4cca667062f6ULL),
930 make_floatx80(0xbffc, 0xfb4eccd6663e7428ULL) },
931 { make_floatx80(0xbffd, 0xc00000000000eff0ULL),
932 make_floatx80(0x3ffe, 0xc5672a1155069abeULL),
933 make_floatx80(0xbffc, 0xea6357baabe59508ULL) },
934 { make_floatx80(0xbffd, 0xb000000000000fe6ULL),
935 make_floatx80(0x3ffe, 0xc9b9bd866e2f234bULL),
936 make_floatx80(0xbffc, 0xd91909e6474372d4ULL) },
937 { make_floatx80(0xbffd, 0x9fffffffffff2172ULL),
938 make_floatx80(0x3ffe, 0xce248c151f84bf00ULL),
939 make_floatx80(0xbffc, 0xc76dcfab81ed0400ULL) },
940 { make_floatx80(0xbffd, 0x8fffffffffffafffULL),
941 make_floatx80(0x3ffe, 0xd2a81d91f12afb2bULL),
942 make_floatx80(0xbffc, 0xb55f89b83b541354ULL) },
943 { make_floatx80(0xbffc, 0xffffffffffff81a3ULL),
944 make_floatx80(0x3ffe, 0xd744fccad69d7d5eULL),
945 make_floatx80(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
946 { make_floatx80(0xbffc, 0xdfffffffffff1568ULL),
947 make_floatx80(0x3ffe, 0xdbfbb797daf25a44ULL),
948 make_floatx80(0xbffc, 0x901121a0943696f0ULL) },
949 { make_floatx80(0xbffc, 0xbfffffffffff68daULL),
950 make_floatx80(0x3ffe, 0xe0ccdeec2a94f811ULL),
951 make_floatx80(0xbffb, 0xf999089eab583f78ULL) },
952 { make_floatx80(0xbffc, 0x9fffffffffff4690ULL),
953 make_floatx80(0x3ffe, 0xe5b906e77c83657eULL),
954 make_floatx80(0xbffb, 0xd237c8c41be4d410ULL) },
955 { make_floatx80(0xbffb, 0xffffffffffff8aeeULL),
956 make_floatx80(0x3ffe, 0xeac0c6e7dd24427cULL),
957 make_floatx80(0xbffb, 0xa9f9c8c116ddec20ULL) },
958 { make_floatx80(0xbffb, 0xbfffffffffff2d18ULL),
959 make_floatx80(0x3ffe, 0xefe4b99bdcdb06ebULL),
960 make_floatx80(0xbffb, 0x80da33211927c8a8ULL) },
961 { make_floatx80(0xbffa, 0xffffffffffff8ccbULL),
962 make_floatx80(0x3ffe, 0xf5257d152486d0f4ULL),
963 make_floatx80(0xbffa, 0xada82eadb792f0c0ULL) },
964 { make_floatx80(0xbff9, 0xffffffffffff11feULL),
965 make_floatx80(0x3ffe, 0xfa83b2db722a0846ULL),
966 make_floatx80(0xbff9, 0xaf89a491babef740ULL) },
967 { floatx80_zero,
968 make_floatx80(0x3fff, 0x8000000000000000ULL),
969 floatx80_zero },
970 { make_floatx80(0x3ff9, 0xffffffffffff2680ULL),
971 make_floatx80(0x3fff, 0x82cd8698ac2b9f6fULL),
972 make_floatx80(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
973 { make_floatx80(0x3ffb, 0x800000000000b500ULL),
974 make_floatx80(0x3fff, 0x85aac367cc488345ULL),
975 make_floatx80(0x3ffa, 0xb5586cf9891068a0ULL) },
976 { make_floatx80(0x3ffb, 0xbfffffffffff4b67ULL),
977 make_floatx80(0x3fff, 0x88980e8092da7cceULL),
978 make_floatx80(0x3ffb, 0x8980e8092da7cce0ULL) },
979 { make_floatx80(0x3ffb, 0xffffffffffffff57ULL),
980 make_floatx80(0x3fff, 0x8b95c1e3ea8bd6dfULL),
981 make_floatx80(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
982 { make_floatx80(0x3ffc, 0x9fffffffffff811fULL),
983 make_floatx80(0x3fff, 0x8ea4398b45cd4780ULL),
984 make_floatx80(0x3ffb, 0xea4398b45cd47800ULL) },
985 { make_floatx80(0x3ffc, 0xbfffffffffff9980ULL),
986 make_floatx80(0x3fff, 0x91c3d373ab11b919ULL),
987 make_floatx80(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
988 { make_floatx80(0x3ffc, 0xdffffffffffff631ULL),
989 make_floatx80(0x3fff, 0x94f4efa8fef70864ULL),
990 make_floatx80(0x3ffc, 0xa7a77d47f7b84320ULL) },
991 { make_floatx80(0x3ffc, 0xffffffffffff2499ULL),
992 make_floatx80(0x3fff, 0x9837f0518db892d4ULL),
993 make_floatx80(0x3ffc, 0xc1bf828c6dc496a0ULL) },
994 { make_floatx80(0x3ffd, 0x8fffffffffff80fbULL),
995 make_floatx80(0x3fff, 0x9b8d39b9d54e3a79ULL),
996 make_floatx80(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
997 { make_floatx80(0x3ffd, 0x9fffffffffffbc23ULL),
998 make_floatx80(0x3fff, 0x9ef5326091a10313ULL),
999 make_floatx80(0x3ffc, 0xf7a993048d081898ULL) },
1000 { make_floatx80(0x3ffd, 0xafffffffffff20ecULL),
1001 make_floatx80(0x3fff, 0xa27043030c49370aULL),
1002 make_floatx80(0x3ffd, 0x89c10c0c3124dc28ULL) },
1003 { make_floatx80(0x3ffd, 0xc00000000000fd2cULL),
1004 make_floatx80(0x3fff, 0xa5fed6a9b15171cfULL),
1005 make_floatx80(0x3ffd, 0x97fb5aa6c545c73cULL) },
1006 { make_floatx80(0x3ffd, 0xd0000000000093beULL),
1007 make_floatx80(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1008 make_floatx80(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1009 { make_floatx80(0x3ffd, 0xe00000000000c2aeULL),
1010 make_floatx80(0x3fff, 0xad583eea42a17876ULL),
1011 make_floatx80(0x3ffd, 0xb560fba90a85e1d8ULL) },
1012 { make_floatx80(0x3ffd, 0xefffffffffff1e3fULL),
1013 make_floatx80(0x3fff, 0xb123f581d2abef6cULL),
1014 make_floatx80(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1015 { make_floatx80(0x3ffd, 0xffffffffffff1c23ULL),
1016 make_floatx80(0x3fff, 0xb504f333f9de2cadULL),
1017 make_floatx80(0x3ffd, 0xd413cccfe778b2b4ULL) },
1018 { make_floatx80(0x3ffe, 0x8800000000006344ULL),
1019 make_floatx80(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1020 make_floatx80(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1021 { make_floatx80(0x3ffe, 0x9000000000005d67ULL),
1022 make_floatx80(0x3fff, 0xbd08a39f580c668dULL),
1023 make_floatx80(0x3ffd, 0xf4228e7d60319a34ULL) },
1024 { make_floatx80(0x3ffe, 0x9800000000009127ULL),
1025 make_floatx80(0x3fff, 0xc12c4cca6670e042ULL),
1026 make_floatx80(0x3ffe, 0x82589994cce1c084ULL) },
1027 { make_floatx80(0x3ffe, 0x9fffffffffff06f9ULL),
1028 make_floatx80(0x3fff, 0xc5672a11550655c3ULL),
1029 make_floatx80(0x3ffe, 0x8ace5422aa0cab86ULL) },
1030 { make_floatx80(0x3ffe, 0xa7fffffffffff80dULL),
1031 make_floatx80(0x3fff, 0xc9b9bd866e2f234bULL),
1032 make_floatx80(0x3ffe, 0x93737b0cdc5e4696ULL) },
1033 { make_floatx80(0x3ffe, 0xafffffffffff1470ULL),
1034 make_floatx80(0x3fff, 0xce248c151f83fd69ULL),
1035 make_floatx80(0x3ffe, 0x9c49182a3f07fad2ULL) },
1036 { make_floatx80(0x3ffe, 0xb800000000000e0aULL),
1037 make_floatx80(0x3fff, 0xd2a81d91f12aec5cULL),
1038 make_floatx80(0x3ffe, 0xa5503b23e255d8b8ULL) },
1039 { make_floatx80(0x3ffe, 0xc00000000000b7faULL),
1040 make_floatx80(0x3fff, 0xd744fccad69dd630ULL),
1041 make_floatx80(0x3ffe, 0xae89f995ad3bac60ULL) },
1042 { make_floatx80(0x3ffe, 0xc800000000003aa6ULL),
1043 make_floatx80(0x3fff, 0xdbfbb797daf25a44ULL),
1044 make_floatx80(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1045 { make_floatx80(0x3ffe, 0xd00000000000a6aeULL),
1046 make_floatx80(0x3fff, 0xe0ccdeec2a954685ULL),
1047 make_floatx80(0x3ffe, 0xc199bdd8552a8d0aULL) },
1048 { make_floatx80(0x3ffe, 0xd800000000004165ULL),
1049 make_floatx80(0x3fff, 0xe5b906e77c837155ULL),
1050 make_floatx80(0x3ffe, 0xcb720dcef906e2aaULL) },
1051 { make_floatx80(0x3ffe, 0xe00000000000582cULL),
1052 make_floatx80(0x3fff, 0xeac0c6e7dd24713aULL),
1053 make_floatx80(0x3ffe, 0xd5818dcfba48e274ULL) },
1054 { make_floatx80(0x3ffe, 0xe800000000001a5dULL),
1055 make_floatx80(0x3fff, 0xefe4b99bdcdb06ebULL),
1056 make_floatx80(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1057 { make_floatx80(0x3ffe, 0xefffffffffffc1efULL),
1058 make_floatx80(0x3fff, 0xf5257d152486a2faULL),
1059 make_floatx80(0x3ffe, 0xea4afa2a490d45f4ULL) },
1060 { make_floatx80(0x3ffe, 0xf800000000001069ULL),
1061 make_floatx80(0x3fff, 0xfa83b2db722a0e5cULL),
1062 make_floatx80(0x3ffe, 0xf50765b6e4541cb8ULL) },
1063 { make_floatx80(0x3fff, 0x8000000000000000ULL),
1064 make_floatx80(0x4000, 0x8000000000000000ULL),
1065 make_floatx80(0x3fff, 0x8000000000000000ULL) },
1066 };
1067
1068 void helper_f2xm1(CPUX86State *env)
1069 {
1070 uint8_t old_flags = save_exception_flags(env);
1071 uint64_t sig = extractFloatx80Frac(ST0);
1072 int32_t exp = extractFloatx80Exp(ST0);
1073 bool sign = extractFloatx80Sign(ST0);
1074
1075 if (floatx80_invalid_encoding(ST0)) {
1076 float_raise(float_flag_invalid, &env->fp_status);
1077 ST0 = floatx80_default_nan(&env->fp_status);
1078 } else if (floatx80_is_any_nan(ST0)) {
1079 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1080 float_raise(float_flag_invalid, &env->fp_status);
1081 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1082 }
1083 } else if (exp > 0x3fff ||
1084 (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1085 /* Out of range for the instruction, treat as invalid. */
1086 float_raise(float_flag_invalid, &env->fp_status);
1087 ST0 = floatx80_default_nan(&env->fp_status);
1088 } else if (exp == 0x3fff) {
1089 /* Argument 1 or -1, exact result 1 or -0.5. */
1090 if (sign) {
1091 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1092 }
1093 } else if (exp < 0x3fb0) {
1094 if (!floatx80_is_zero(ST0)) {
1095 /*
1096 * Multiplying the argument by an extra-precision version
1097 * of log(2) is sufficiently precise. Zero arguments are
1098 * returned unchanged.
1099 */
1100 uint64_t sig0, sig1, sig2;
1101 if (exp == 0) {
1102 normalizeFloatx80Subnormal(sig, &exp, &sig);
1103 }
1104 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1105 &sig2);
1106 /* This result is inexact. */
1107 sig1 |= 1;
1108 ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1,
1109 &env->fp_status);
1110 }
1111 } else {
1112 floatx80 tmp, y, accum;
1113 bool asign, bsign;
1114 int32_t n, aexp, bexp;
1115 uint64_t asig0, asig1, asig2, bsig0, bsig1;
1116 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1117 signed char save_prec = env->fp_status.floatx80_rounding_precision;
1118 env->fp_status.float_rounding_mode = float_round_nearest_even;
1119 env->fp_status.floatx80_rounding_precision = 80;
1120
1121 /* Find the nearest multiple of 1/32 to the argument. */
1122 tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1123 n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1124 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1125
1126 if (floatx80_is_zero(y)) {
1127 /*
1128 * Use the value of 2^t - 1 from the table, to avoid
1129 * needing to special-case zero as a result of
1130 * multiplication below.
1131 */
1132 ST0 = f2xm1_table[n].t;
1133 set_float_exception_flags(float_flag_inexact, &env->fp_status);
1134 env->fp_status.float_rounding_mode = save_mode;
1135 } else {
1136 /*
1137 * Compute the lower parts of a polynomial expansion for
1138 * (2^y - 1) / y.
1139 */
1140 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1141 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1142 accum = floatx80_mul(accum, y, &env->fp_status);
1143 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1144 accum = floatx80_mul(accum, y, &env->fp_status);
1145 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1146 accum = floatx80_mul(accum, y, &env->fp_status);
1147 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1148 accum = floatx80_mul(accum, y, &env->fp_status);
1149 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1150 accum = floatx80_mul(accum, y, &env->fp_status);
1151 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1152 accum = floatx80_mul(accum, y, &env->fp_status);
1153 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1154
1155 /*
1156 * The full polynomial expansion is f2xm1_coeff_0 + accum
1157 * (where accum has much lower magnitude, and so, in
1158 * particular, carry out of the addition is not possible).
1159 * (This expansion is only accurate to about 70 bits, not
1160 * 128 bits.)
1161 */
1162 aexp = extractFloatx80Exp(f2xm1_coeff_0);
1163 asign = extractFloatx80Sign(f2xm1_coeff_0);
1164 shift128RightJamming(extractFloatx80Frac(accum), 0,
1165 aexp - extractFloatx80Exp(accum),
1166 &asig0, &asig1);
1167 bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1168 bsig1 = 0;
1169 if (asign == extractFloatx80Sign(accum)) {
1170 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1171 } else {
1172 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1173 }
1174 /* And thus compute an approximation to 2^y - 1. */
1175 mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1176 &asig0, &asig1, &asig2);
1177 aexp += extractFloatx80Exp(y) - 0x3ffe;
1178 asign ^= extractFloatx80Sign(y);
1179 if (n != 32) {
1180 /*
1181 * Multiply this by the precomputed value of 2^t and
1182 * add that of 2^t - 1.
1183 */
1184 mul128By64To192(asig0, asig1,
1185 extractFloatx80Frac(f2xm1_table[n].exp2),
1186 &asig0, &asig1, &asig2);
1187 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1188 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1189 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1190 bsig1 = 0;
1191 if (bexp < aexp) {
1192 shift128RightJamming(bsig0, bsig1, aexp - bexp,
1193 &bsig0, &bsig1);
1194 } else if (aexp < bexp) {
1195 shift128RightJamming(asig0, asig1, bexp - aexp,
1196 &asig0, &asig1);
1197 aexp = bexp;
1198 }
1199 /* The sign of 2^t - 1 is always that of the result. */
1200 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1201 if (asign == bsign) {
1202 /* Avoid possible carry out of the addition. */
1203 shift128RightJamming(asig0, asig1, 1,
1204 &asig0, &asig1);
1205 shift128RightJamming(bsig0, bsig1, 1,
1206 &bsig0, &bsig1);
1207 ++aexp;
1208 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1209 } else {
1210 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1211 asign = bsign;
1212 }
1213 }
1214 env->fp_status.float_rounding_mode = save_mode;
1215 /* This result is inexact. */
1216 asig1 |= 1;
1217 ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1,
1218 &env->fp_status);
1219 }
1220
1221 env->fp_status.floatx80_rounding_precision = save_prec;
1222 }
1223 merge_exception_flags(env, old_flags);
1224 }
1225
1226 void helper_fyl2x(CPUX86State *env)
1227 {
1228 double fptemp = floatx80_to_double(env, ST0);
1229
1230 if (fptemp > 0.0) {
1231 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
1232 fptemp *= floatx80_to_double(env, ST1);
1233 ST1 = double_to_floatx80(env, fptemp);
1234 fpop(env);
1235 } else {
1236 env->fpus &= ~0x4700;
1237 env->fpus |= 0x400;
1238 }
1239 }
1240
1241 void helper_fptan(CPUX86State *env)
1242 {
1243 double fptemp = floatx80_to_double(env, ST0);
1244
1245 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1246 env->fpus |= 0x400;
1247 } else {
1248 fptemp = tan(fptemp);
1249 ST0 = double_to_floatx80(env, fptemp);
1250 fpush(env);
1251 ST0 = floatx80_one;
1252 env->fpus &= ~0x400; /* C2 <-- 0 */
1253 /* the above code is for |arg| < 2**52 only */
1254 }
1255 }
1256
1257 void helper_fpatan(CPUX86State *env)
1258 {
1259 double fptemp, fpsrcop;
1260
1261 fpsrcop = floatx80_to_double(env, ST1);
1262 fptemp = floatx80_to_double(env, ST0);
1263 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
1264 fpop(env);
1265 }
1266
1267 void helper_fxtract(CPUX86State *env)
1268 {
1269 uint8_t old_flags = save_exception_flags(env);
1270 CPU_LDoubleU temp;
1271
1272 temp.d = ST0;
1273
1274 if (floatx80_is_zero(ST0)) {
1275 /* Easy way to generate -inf and raising division by 0 exception */
1276 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1277 &env->fp_status);
1278 fpush(env);
1279 ST0 = temp.d;
1280 } else if (floatx80_invalid_encoding(ST0)) {
1281 float_raise(float_flag_invalid, &env->fp_status);
1282 ST0 = floatx80_default_nan(&env->fp_status);
1283 fpush(env);
1284 ST0 = ST1;
1285 } else if (floatx80_is_any_nan(ST0)) {
1286 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1287 float_raise(float_flag_invalid, &env->fp_status);
1288 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1289 }
1290 fpush(env);
1291 ST0 = ST1;
1292 } else if (floatx80_is_infinity(ST0)) {
1293 fpush(env);
1294 ST0 = ST1;
1295 ST1 = floatx80_infinity;
1296 } else {
1297 int expdif;
1298
1299 if (EXPD(temp) == 0) {
1300 int shift = clz64(temp.l.lower);
1301 temp.l.lower <<= shift;
1302 expdif = 1 - EXPBIAS - shift;
1303 float_raise(float_flag_input_denormal, &env->fp_status);
1304 } else {
1305 expdif = EXPD(temp) - EXPBIAS;
1306 }
1307 /* DP exponent bias */
1308 ST0 = int32_to_floatx80(expdif, &env->fp_status);
1309 fpush(env);
1310 BIASEXPONENT(temp);
1311 ST0 = temp.d;
1312 }
1313 merge_exception_flags(env, old_flags);
1314 }
1315
1316 void helper_fprem1(CPUX86State *env)
1317 {
1318 double st0, st1, dblq, fpsrcop, fptemp;
1319 CPU_LDoubleU fpsrcop1, fptemp1;
1320 int expdif;
1321 signed long long int q;
1322
1323 st0 = floatx80_to_double(env, ST0);
1324 st1 = floatx80_to_double(env, ST1);
1325
1326 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
1327 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
1328 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1329 return;
1330 }
1331
1332 fpsrcop = st0;
1333 fptemp = st1;
1334 fpsrcop1.d = ST0;
1335 fptemp1.d = ST1;
1336 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
1337
1338 if (expdif < 0) {
1339 /* optimisation? taken from the AMD docs */
1340 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1341 /* ST0 is unchanged */
1342 return;
1343 }
1344
1345 if (expdif < 53) {
1346 dblq = fpsrcop / fptemp;
1347 /* round dblq towards nearest integer */
1348 dblq = rint(dblq);
1349 st0 = fpsrcop - fptemp * dblq;
1350
1351 /* convert dblq to q by truncating towards zero */
1352 if (dblq < 0.0) {
1353 q = (signed long long int)(-dblq);
1354 } else {
1355 q = (signed long long int)dblq;
1356 }
1357
1358 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1359 /* (C0,C3,C1) <-- (q2,q1,q0) */
1360 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
1361 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
1362 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
1363 } else {
1364 env->fpus |= 0x400; /* C2 <-- 1 */
1365 fptemp = pow(2.0, expdif - 50);
1366 fpsrcop = (st0 / st1) / fptemp;
1367 /* fpsrcop = integer obtained by chopping */
1368 fpsrcop = (fpsrcop < 0.0) ?
1369 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
1370 st0 -= (st1 * fpsrcop * fptemp);
1371 }
1372 ST0 = double_to_floatx80(env, st0);
1373 }
1374
1375 void helper_fprem(CPUX86State *env)
1376 {
1377 double st0, st1, dblq, fpsrcop, fptemp;
1378 CPU_LDoubleU fpsrcop1, fptemp1;
1379 int expdif;
1380 signed long long int q;
1381
1382 st0 = floatx80_to_double(env, ST0);
1383 st1 = floatx80_to_double(env, ST1);
1384
1385 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
1386 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
1387 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1388 return;
1389 }
1390
1391 fpsrcop = st0;
1392 fptemp = st1;
1393 fpsrcop1.d = ST0;
1394 fptemp1.d = ST1;
1395 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
1396
1397 if (expdif < 0) {
1398 /* optimisation? taken from the AMD docs */
1399 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1400 /* ST0 is unchanged */
1401 return;
1402 }
1403
1404 if (expdif < 53) {
1405 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
1406 /* round dblq towards zero */
1407 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
1408 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
1409
1410 /* convert dblq to q by truncating towards zero */
1411 if (dblq < 0.0) {
1412 q = (signed long long int)(-dblq);
1413 } else {
1414 q = (signed long long int)dblq;
1415 }
1416
1417 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1418 /* (C0,C3,C1) <-- (q2,q1,q0) */
1419 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
1420 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
1421 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
1422 } else {
1423 int N = 32 + (expdif % 32); /* as per AMD docs */
1424
1425 env->fpus |= 0x400; /* C2 <-- 1 */
1426 fptemp = pow(2.0, (double)(expdif - N));
1427 fpsrcop = (st0 / st1) / fptemp;
1428 /* fpsrcop = integer obtained by chopping */
1429 fpsrcop = (fpsrcop < 0.0) ?
1430 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
1431 st0 -= (st1 * fpsrcop * fptemp);
1432 }
1433 ST0 = double_to_floatx80(env, st0);
1434 }
1435
1436 void helper_fyl2xp1(CPUX86State *env)
1437 {
1438 double fptemp = floatx80_to_double(env, ST0);
1439
1440 if ((fptemp + 1.0) > 0.0) {
1441 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
1442 fptemp *= floatx80_to_double(env, ST1);
1443 ST1 = double_to_floatx80(env, fptemp);
1444 fpop(env);
1445 } else {
1446 env->fpus &= ~0x4700;
1447 env->fpus |= 0x400;
1448 }
1449 }
1450
1451 void helper_fsqrt(CPUX86State *env)
1452 {
1453 uint8_t old_flags = save_exception_flags(env);
1454 if (floatx80_is_neg(ST0)) {
1455 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1456 env->fpus |= 0x400;
1457 }
1458 ST0 = floatx80_sqrt(ST0, &env->fp_status);
1459 merge_exception_flags(env, old_flags);
1460 }
1461
1462 void helper_fsincos(CPUX86State *env)
1463 {
1464 double fptemp = floatx80_to_double(env, ST0);
1465
1466 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1467 env->fpus |= 0x400;
1468 } else {
1469 ST0 = double_to_floatx80(env, sin(fptemp));
1470 fpush(env);
1471 ST0 = double_to_floatx80(env, cos(fptemp));
1472 env->fpus &= ~0x400; /* C2 <-- 0 */
1473 /* the above code is for |arg| < 2**63 only */
1474 }
1475 }
1476
1477 void helper_frndint(CPUX86State *env)
1478 {
1479 uint8_t old_flags = save_exception_flags(env);
1480 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
1481 merge_exception_flags(env, old_flags);
1482 }
1483
1484 void helper_fscale(CPUX86State *env)
1485 {
1486 uint8_t old_flags = save_exception_flags(env);
1487 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
1488 float_raise(float_flag_invalid, &env->fp_status);
1489 ST0 = floatx80_default_nan(&env->fp_status);
1490 } else if (floatx80_is_any_nan(ST1)) {
1491 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1492 float_raise(float_flag_invalid, &env->fp_status);
1493 }
1494 ST0 = ST1;
1495 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1496 float_raise(float_flag_invalid, &env->fp_status);
1497 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1498 }
1499 } else if (floatx80_is_infinity(ST1) &&
1500 !floatx80_invalid_encoding(ST0) &&
1501 !floatx80_is_any_nan(ST0)) {
1502 if (floatx80_is_neg(ST1)) {
1503 if (floatx80_is_infinity(ST0)) {
1504 float_raise(float_flag_invalid, &env->fp_status);
1505 ST0 = floatx80_default_nan(&env->fp_status);
1506 } else {
1507 ST0 = (floatx80_is_neg(ST0) ?
1508 floatx80_chs(floatx80_zero) :
1509 floatx80_zero);
1510 }
1511 } else {
1512 if (floatx80_is_zero(ST0)) {
1513 float_raise(float_flag_invalid, &env->fp_status);
1514 ST0 = floatx80_default_nan(&env->fp_status);
1515 } else {
1516 ST0 = (floatx80_is_neg(ST0) ?
1517 floatx80_chs(floatx80_infinity) :
1518 floatx80_infinity);
1519 }
1520 }
1521 } else {
1522 int n;
1523 signed char save = env->fp_status.floatx80_rounding_precision;
1524 uint8_t save_flags = get_float_exception_flags(&env->fp_status);
1525 set_float_exception_flags(0, &env->fp_status);
1526 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
1527 set_float_exception_flags(save_flags, &env->fp_status);
1528 env->fp_status.floatx80_rounding_precision = 80;
1529 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
1530 env->fp_status.floatx80_rounding_precision = save;
1531 }
1532 merge_exception_flags(env, old_flags);
1533 }
1534
1535 void helper_fsin(CPUX86State *env)
1536 {
1537 double fptemp = floatx80_to_double(env, ST0);
1538
1539 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1540 env->fpus |= 0x400;
1541 } else {
1542 ST0 = double_to_floatx80(env, sin(fptemp));
1543 env->fpus &= ~0x400; /* C2 <-- 0 */
1544 /* the above code is for |arg| < 2**53 only */
1545 }
1546 }
1547
1548 void helper_fcos(CPUX86State *env)
1549 {
1550 double fptemp = floatx80_to_double(env, ST0);
1551
1552 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1553 env->fpus |= 0x400;
1554 } else {
1555 ST0 = double_to_floatx80(env, cos(fptemp));
1556 env->fpus &= ~0x400; /* C2 <-- 0 */
1557 /* the above code is for |arg| < 2**63 only */
1558 }
1559 }
1560
1561 void helper_fxam_ST0(CPUX86State *env)
1562 {
1563 CPU_LDoubleU temp;
1564 int expdif;
1565
1566 temp.d = ST0;
1567
1568 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1569 if (SIGND(temp)) {
1570 env->fpus |= 0x200; /* C1 <-- 1 */
1571 }
1572
1573 if (env->fptags[env->fpstt]) {
1574 env->fpus |= 0x4100; /* Empty */
1575 return;
1576 }
1577
1578 expdif = EXPD(temp);
1579 if (expdif == MAXEXPD) {
1580 if (MANTD(temp) == 0x8000000000000000ULL) {
1581 env->fpus |= 0x500; /* Infinity */
1582 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1583 env->fpus |= 0x100; /* NaN */
1584 }
1585 } else if (expdif == 0) {
1586 if (MANTD(temp) == 0) {
1587 env->fpus |= 0x4000; /* Zero */
1588 } else {
1589 env->fpus |= 0x4400; /* Denormal */
1590 }
1591 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1592 env->fpus |= 0x400;
1593 }
1594 }
1595
1596 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1597 uintptr_t retaddr)
1598 {
1599 int fpus, fptag, exp, i;
1600 uint64_t mant;
1601 CPU_LDoubleU tmp;
1602
1603 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1604 fptag = 0;
1605 for (i = 7; i >= 0; i--) {
1606 fptag <<= 2;
1607 if (env->fptags[i]) {
1608 fptag |= 3;
1609 } else {
1610 tmp.d = env->fpregs[i].d;
1611 exp = EXPD(tmp);
1612 mant = MANTD(tmp);
1613 if (exp == 0 && mant == 0) {
1614 /* zero */
1615 fptag |= 1;
1616 } else if (exp == 0 || exp == MAXEXPD
1617 || (mant & (1LL << 63)) == 0) {
1618 /* NaNs, infinity, denormal */
1619 fptag |= 2;
1620 }
1621 }
1622 }
1623 if (data32) {
1624 /* 32 bit */
1625 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1626 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1627 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1628 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1629 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1630 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1631 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1632 } else {
1633 /* 16 bit */
1634 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1635 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1636 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1637 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1638 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1639 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1640 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1641 }
1642 }
1643
1644 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1645 {
1646 do_fstenv(env, ptr, data32, GETPC());
1647 }
1648
1649 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1650 {
1651 env->fpstt = (fpus >> 11) & 7;
1652 env->fpus = fpus & ~0x3800 & ~FPUS_B;
1653 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1654 #if !defined(CONFIG_USER_ONLY)
1655 if (!(env->fpus & FPUS_SE)) {
1656 /*
1657 * Here the processor deasserts FERR#; in response, the chipset deasserts
1658 * IGNNE#.
1659 */
1660 cpu_clear_ignne();
1661 }
1662 #endif
1663 }
1664
1665 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1666 uintptr_t retaddr)
1667 {
1668 int i, fpus, fptag;
1669
1670 if (data32) {
1671 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1672 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1673 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1674 } else {
1675 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1676 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1677 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1678 }
1679 cpu_set_fpus(env, fpus);
1680 for (i = 0; i < 8; i++) {
1681 env->fptags[i] = ((fptag & 3) == 3);
1682 fptag >>= 2;
1683 }
1684 }
1685
1686 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1687 {
1688 do_fldenv(env, ptr, data32, GETPC());
1689 }
1690
1691 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1692 {
1693 floatx80 tmp;
1694 int i;
1695
1696 do_fstenv(env, ptr, data32, GETPC());
1697
1698 ptr += (14 << data32);
1699 for (i = 0; i < 8; i++) {
1700 tmp = ST(i);
1701 helper_fstt(env, tmp, ptr, GETPC());
1702 ptr += 10;
1703 }
1704
1705 /* fninit */
1706 env->fpus = 0;
1707 env->fpstt = 0;
1708 cpu_set_fpuc(env, 0x37f);
1709 env->fptags[0] = 1;
1710 env->fptags[1] = 1;
1711 env->fptags[2] = 1;
1712 env->fptags[3] = 1;
1713 env->fptags[4] = 1;
1714 env->fptags[5] = 1;
1715 env->fptags[6] = 1;
1716 env->fptags[7] = 1;
1717 }
1718
1719 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1720 {
1721 floatx80 tmp;
1722 int i;
1723
1724 do_fldenv(env, ptr, data32, GETPC());
1725 ptr += (14 << data32);
1726
1727 for (i = 0; i < 8; i++) {
1728 tmp = helper_fldt(env, ptr, GETPC());
1729 ST(i) = tmp;
1730 ptr += 10;
1731 }
1732 }
1733
1734 #if defined(CONFIG_USER_ONLY)
1735 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1736 {
1737 helper_fsave(env, ptr, data32);
1738 }
1739
1740 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1741 {
1742 helper_frstor(env, ptr, data32);
1743 }
1744 #endif
1745
1746 #define XO(X) offsetof(X86XSaveArea, X)
1747
1748 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1749 {
1750 int fpus, fptag, i;
1751 target_ulong addr;
1752
1753 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1754 fptag = 0;
1755 for (i = 0; i < 8; i++) {
1756 fptag |= (env->fptags[i] << i);
1757 }
1758
1759 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1760 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1761 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1762
1763 /* In 32-bit mode this is eip, sel, dp, sel.
1764 In 64-bit mode this is rip, rdp.
1765 But in either case we don't write actual data, just zeros. */
1766 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1767 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1768
1769 addr = ptr + XO(legacy.fpregs);
1770 for (i = 0; i < 8; i++) {
1771 floatx80 tmp = ST(i);
1772 helper_fstt(env, tmp, addr, ra);
1773 addr += 16;
1774 }
1775 }
1776
1777 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1778 {
1779 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1780 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1781 }
1782
1783 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1784 {
1785 int i, nb_xmm_regs;
1786 target_ulong addr;
1787
1788 if (env->hflags & HF_CS64_MASK) {
1789 nb_xmm_regs = 16;
1790 } else {
1791 nb_xmm_regs = 8;
1792 }
1793
1794 addr = ptr + XO(legacy.xmm_regs);
1795 for (i = 0; i < nb_xmm_regs; i++) {
1796 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1797 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1798 addr += 16;
1799 }
1800 }
1801
1802 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1803 {
1804 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1805 int i;
1806
1807 for (i = 0; i < 4; i++, addr += 16) {
1808 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1809 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1810 }
1811 }
1812
1813 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1814 {
1815 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1816 env->bndcs_regs.cfgu, ra);
1817 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1818 env->bndcs_regs.sts, ra);
1819 }
1820
1821 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1822 {
1823 cpu_stq_data_ra(env, ptr, env->pkru, ra);
1824 }
1825
1826 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1827 {
1828 uintptr_t ra = GETPC();
1829
1830 /* The operand must be 16 byte aligned */
1831 if (ptr & 0xf) {
1832 raise_exception_ra(env, EXCP0D_GPF, ra);
1833 }
1834
1835 do_xsave_fpu(env, ptr, ra);
1836
1837 if (env->cr[4] & CR4_OSFXSR_MASK) {
1838 do_xsave_mxcsr(env, ptr, ra);
1839 /* Fast FXSAVE leaves out the XMM registers */
1840 if (!(env->efer & MSR_EFER_FFXSR)
1841 || (env->hflags & HF_CPL_MASK)
1842 || !(env->hflags & HF_LMA_MASK)) {
1843 do_xsave_sse(env, ptr, ra);
1844 }
1845 }
1846 }
1847
1848 static uint64_t get_xinuse(CPUX86State *env)
1849 {
1850 uint64_t inuse = -1;
1851
1852 /* For the most part, we don't track XINUSE. We could calculate it
1853 here for all components, but it's probably less work to simply
1854 indicate in use. That said, the state of BNDREGS is important
1855 enough to track in HFLAGS, so we might as well use that here. */
1856 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1857 inuse &= ~XSTATE_BNDREGS_MASK;
1858 }
1859 return inuse;
1860 }
1861
1862 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1863 uint64_t inuse, uint64_t opt, uintptr_t ra)
1864 {
1865 uint64_t old_bv, new_bv;
1866
1867 /* The OS must have enabled XSAVE. */
1868 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1869 raise_exception_ra(env, EXCP06_ILLOP, ra);
1870 }
1871
1872 /* The operand must be 64 byte aligned. */
1873 if (ptr & 63) {
1874 raise_exception_ra(env, EXCP0D_GPF, ra);
1875 }
1876
1877 /* Never save anything not enabled by XCR0. */
1878 rfbm &= env->xcr0;
1879 opt &= rfbm;
1880
1881 if (opt & XSTATE_FP_MASK) {
1882 do_xsave_fpu(env, ptr, ra);
1883 }
1884 if (rfbm & XSTATE_SSE_MASK) {
1885 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1886 do_xsave_mxcsr(env, ptr, ra);
1887 }
1888 if (opt & XSTATE_SSE_MASK) {
1889 do_xsave_sse(env, ptr, ra);
1890 }
1891 if (opt & XSTATE_BNDREGS_MASK) {
1892 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1893 }
1894 if (opt & XSTATE_BNDCSR_MASK) {
1895 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1896 }
1897 if (opt & XSTATE_PKRU_MASK) {
1898 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1899 }
1900
1901 /* Update the XSTATE_BV field. */
1902 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1903 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1904 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1905 }
1906
1907 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1908 {
1909 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1910 }
1911
1912 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1913 {
1914 uint64_t inuse = get_xinuse(env);
1915 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1916 }
1917
1918 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1919 {
1920 int i, fpuc, fpus, fptag;
1921 target_ulong addr;
1922
1923 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1924 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1925 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1926 cpu_set_fpuc(env, fpuc);
1927 cpu_set_fpus(env, fpus);
1928 fptag ^= 0xff;
1929 for (i = 0; i < 8; i++) {
1930 env->fptags[i] = ((fptag >> i) & 1);
1931 }
1932
1933 addr = ptr + XO(legacy.fpregs);
1934 for (i = 0; i < 8; i++) {
1935 floatx80 tmp = helper_fldt(env, addr, ra);
1936 ST(i) = tmp;
1937 addr += 16;
1938 }
1939 }
1940
1941 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1942 {
1943 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1944 }
1945
1946 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1947 {
1948 int i, nb_xmm_regs;
1949 target_ulong addr;
1950
1951 if (env->hflags & HF_CS64_MASK) {
1952 nb_xmm_regs = 16;
1953 } else {
1954 nb_xmm_regs = 8;
1955 }
1956
1957 addr = ptr + XO(legacy.xmm_regs);
1958 for (i = 0; i < nb_xmm_regs; i++) {
1959 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1960 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1961 addr += 16;
1962 }
1963 }
1964
1965 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1966 {
1967 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1968 int i;
1969
1970 for (i = 0; i < 4; i++, addr += 16) {
1971 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1972 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1973 }
1974 }
1975
1976 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1977 {
1978 /* FIXME: Extend highest implemented bit of linear address. */
1979 env->bndcs_regs.cfgu
1980 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1981 env->bndcs_regs.sts
1982 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1983 }
1984
1985 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1986 {
1987 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1988 }
1989
1990 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1991 {
1992 uintptr_t ra = GETPC();
1993
1994 /* The operand must be 16 byte aligned */
1995 if (ptr & 0xf) {
1996 raise_exception_ra(env, EXCP0D_GPF, ra);
1997 }
1998
1999 do_xrstor_fpu(env, ptr, ra);
2000
2001 if (env->cr[4] & CR4_OSFXSR_MASK) {
2002 do_xrstor_mxcsr(env, ptr, ra);
2003 /* Fast FXRSTOR leaves out the XMM registers */
2004 if (!(env->efer & MSR_EFER_FFXSR)
2005 || (env->hflags & HF_CPL_MASK)
2006 || !(env->hflags & HF_LMA_MASK)) {
2007 do_xrstor_sse(env, ptr, ra);
2008 }
2009 }
2010 }
2011
2012 #if defined(CONFIG_USER_ONLY)
2013 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
2014 {
2015 helper_fxsave(env, ptr);
2016 }
2017
2018 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
2019 {
2020 helper_fxrstor(env, ptr);
2021 }
2022 #endif
2023
2024 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2025 {
2026 uintptr_t ra = GETPC();
2027 uint64_t xstate_bv, xcomp_bv, reserve0;
2028
2029 rfbm &= env->xcr0;
2030
2031 /* The OS must have enabled XSAVE. */
2032 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2033 raise_exception_ra(env, EXCP06_ILLOP, ra);
2034 }
2035
2036 /* The operand must be 64 byte aligned. */
2037 if (ptr & 63) {
2038 raise_exception_ra(env, EXCP0D_GPF, ra);
2039 }
2040
2041 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2042
2043 if ((int64_t)xstate_bv < 0) {
2044 /* FIXME: Compact form. */
2045 raise_exception_ra(env, EXCP0D_GPF, ra);
2046 }
2047
2048 /* Standard form. */
2049
2050 /* The XSTATE_BV field must not set bits not present in XCR0. */
2051 if (xstate_bv & ~env->xcr0) {
2052 raise_exception_ra(env, EXCP0D_GPF, ra);
2053 }
2054
2055 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
2056 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2057 describes only XCOMP_BV, but the description of the standard form
2058 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2059 includes the next 64-bit field. */
2060 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
2061 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
2062 if (xcomp_bv || reserve0) {
2063 raise_exception_ra(env, EXCP0D_GPF, ra);
2064 }
2065
2066 if (rfbm & XSTATE_FP_MASK) {
2067 if (xstate_bv & XSTATE_FP_MASK) {
2068 do_xrstor_fpu(env, ptr, ra);
2069 } else {
2070 helper_fninit(env);
2071 memset(env->fpregs, 0, sizeof(env->fpregs));
2072 }
2073 }
2074 if (rfbm & XSTATE_SSE_MASK) {
2075 /* Note that the standard form of XRSTOR loads MXCSR from memory
2076 whether or not the XSTATE_BV bit is set. */
2077 do_xrstor_mxcsr(env, ptr, ra);
2078 if (xstate_bv & XSTATE_SSE_MASK) {
2079 do_xrstor_sse(env, ptr, ra);
2080 } else {
2081 /* ??? When AVX is implemented, we may have to be more
2082 selective in the clearing. */
2083 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
2084 }
2085 }
2086 if (rfbm & XSTATE_BNDREGS_MASK) {
2087 if (xstate_bv & XSTATE_BNDREGS_MASK) {
2088 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
2089 env->hflags |= HF_MPX_IU_MASK;
2090 } else {
2091 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
2092 env->hflags &= ~HF_MPX_IU_MASK;
2093 }
2094 }
2095 if (rfbm & XSTATE_BNDCSR_MASK) {
2096 if (xstate_bv & XSTATE_BNDCSR_MASK) {
2097 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
2098 } else {
2099 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
2100 }
2101 cpu_sync_bndcs_hflags(env);
2102 }
2103 if (rfbm & XSTATE_PKRU_MASK) {
2104 uint64_t old_pkru = env->pkru;
2105 if (xstate_bv & XSTATE_PKRU_MASK) {
2106 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
2107 } else {
2108 env->pkru = 0;
2109 }
2110 if (env->pkru != old_pkru) {
2111 CPUState *cs = env_cpu(env);
2112 tlb_flush(cs);
2113 }
2114 }
2115 }
2116
2117 #undef XO
2118
2119 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
2120 {
2121 /* The OS must have enabled XSAVE. */
2122 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2123 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2124 }
2125
2126 switch (ecx) {
2127 case 0:
2128 return env->xcr0;
2129 case 1:
2130 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
2131 return env->xcr0 & get_xinuse(env);
2132 }
2133 break;
2134 }
2135 raise_exception_ra(env, EXCP0D_GPF, GETPC());
2136 }
2137
2138 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
2139 {
2140 uint32_t dummy, ena_lo, ena_hi;
2141 uint64_t ena;
2142
2143 /* The OS must have enabled XSAVE. */
2144 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2145 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2146 }
2147
2148 /* Only XCR0 is defined at present; the FPU may not be disabled. */
2149 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
2150 goto do_gpf;
2151 }
2152
2153 /* Disallow enabling unimplemented features. */
2154 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
2155 ena = ((uint64_t)ena_hi << 32) | ena_lo;
2156 if (mask & ~ena) {
2157 goto do_gpf;
2158 }
2159
2160 /* Disallow enabling only half of MPX. */
2161 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
2162 & XSTATE_BNDCSR_MASK) {
2163 goto do_gpf;
2164 }
2165
2166 env->xcr0 = mask;
2167 cpu_sync_bndcs_hflags(env);
2168 return;
2169
2170 do_gpf:
2171 raise_exception_ra(env, EXCP0D_GPF, GETPC());
2172 }
2173
2174 /* MMX/SSE */
2175 /* XXX: optimize by storing fptt and fptags in the static cpu state */
2176
2177 #define SSE_DAZ 0x0040
2178 #define SSE_RC_MASK 0x6000
2179 #define SSE_RC_NEAR 0x0000
2180 #define SSE_RC_DOWN 0x2000
2181 #define SSE_RC_UP 0x4000
2182 #define SSE_RC_CHOP 0x6000
2183 #define SSE_FZ 0x8000
2184
2185 void update_mxcsr_status(CPUX86State *env)
2186 {
2187 uint32_t mxcsr = env->mxcsr;
2188 int rnd_type;
2189
2190 /* set rounding mode */
2191 switch (mxcsr & SSE_RC_MASK) {
2192 default:
2193 case SSE_RC_NEAR:
2194 rnd_type = float_round_nearest_even;
2195 break;
2196 case SSE_RC_DOWN:
2197 rnd_type = float_round_down;
2198 break;
2199 case SSE_RC_UP:
2200 rnd_type = float_round_up;
2201 break;
2202 case SSE_RC_CHOP:
2203 rnd_type = float_round_to_zero;
2204 break;
2205 }
2206 set_float_rounding_mode(rnd_type, &env->sse_status);
2207
2208 /* set denormals are zero */
2209 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
2210
2211 /* set flush to zero */
2212 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
2213 }
2214
2215 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
2216 {
2217 cpu_set_mxcsr(env, val);
2218 }
2219
2220 void helper_enter_mmx(CPUX86State *env)
2221 {
2222 env->fpstt = 0;
2223 *(uint32_t *)(env->fptags) = 0;
2224 *(uint32_t *)(env->fptags + 4) = 0;
2225 }
2226
2227 void helper_emms(CPUX86State *env)
2228 {
2229 /* set to empty state */
2230 *(uint32_t *)(env->fptags) = 0x01010101;
2231 *(uint32_t *)(env->fptags + 4) = 0x01010101;
2232 }
2233
2234 /* XXX: suppress */
2235 void helper_movq(CPUX86State *env, void *d, void *s)
2236 {
2237 *(uint64_t *)d = *(uint64_t *)s;
2238 }
2239
2240 #define SHIFT 0
2241 #include "ops_sse.h"
2242
2243 #define SHIFT 1
2244 #include "ops_sse.h"