vmsvga: don't process more than 1024 fifo commands at once
[qemu.git] / target-i386 / fpu_helper.c
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27
28 #define FPU_RC_MASK 0xc00
29 #define FPU_RC_NEAR 0x000
30 #define FPU_RC_DOWN 0x400
31 #define FPU_RC_UP 0x800
32 #define FPU_RC_CHOP 0xc00
33
34 #define MAXTAN 9223372036854775808.0
35
36 /* the following deal with x86 long double-precision numbers */
37 #define MAXEXPD 0x7fff
38 #define EXPBIAS 16383
39 #define EXPD(fp) (fp.l.upper & 0x7fff)
40 #define SIGND(fp) ((fp.l.upper) & 0x8000)
41 #define MANTD(fp) (fp.l.lower)
42 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
43
44 #define FPUS_IE (1 << 0)
45 #define FPUS_DE (1 << 1)
46 #define FPUS_ZE (1 << 2)
47 #define FPUS_OE (1 << 3)
48 #define FPUS_UE (1 << 4)
49 #define FPUS_PE (1 << 5)
50 #define FPUS_SF (1 << 6)
51 #define FPUS_SE (1 << 7)
52 #define FPUS_B (1 << 15)
53
54 #define FPUC_EM 0x3f
55
56 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
57 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
58 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
59
60 static inline void fpush(CPUX86State *env)
61 {
62 env->fpstt = (env->fpstt - 1) & 7;
63 env->fptags[env->fpstt] = 0; /* validate stack entry */
64 }
65
66 static inline void fpop(CPUX86State *env)
67 {
68 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
69 env->fpstt = (env->fpstt + 1) & 7;
70 }
71
72 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
73 uintptr_t retaddr)
74 {
75 CPU_LDoubleU temp;
76
77 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
78 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
79 return temp.d;
80 }
81
82 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
83 uintptr_t retaddr)
84 {
85 CPU_LDoubleU temp;
86
87 temp.d = f;
88 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
89 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
90 }
91
92 /* x87 FPU helpers */
93
94 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
95 {
96 union {
97 float64 f64;
98 double d;
99 } u;
100
101 u.f64 = floatx80_to_float64(a, &env->fp_status);
102 return u.d;
103 }
104
105 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
106 {
107 union {
108 float64 f64;
109 double d;
110 } u;
111
112 u.d = a;
113 return float64_to_floatx80(u.f64, &env->fp_status);
114 }
115
116 static void fpu_set_exception(CPUX86State *env, int mask)
117 {
118 env->fpus |= mask;
119 if (env->fpus & (~env->fpuc & FPUC_EM)) {
120 env->fpus |= FPUS_SE | FPUS_B;
121 }
122 }
123
124 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
125 {
126 if (floatx80_is_zero(b)) {
127 fpu_set_exception(env, FPUS_ZE);
128 }
129 return floatx80_div(a, b, &env->fp_status);
130 }
131
132 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
133 {
134 if (env->cr[0] & CR0_NE_MASK) {
135 raise_exception_ra(env, EXCP10_COPR, retaddr);
136 }
137 #if !defined(CONFIG_USER_ONLY)
138 else {
139 cpu_set_ferr(env);
140 }
141 #endif
142 }
143
144 void helper_flds_FT0(CPUX86State *env, uint32_t val)
145 {
146 union {
147 float32 f;
148 uint32_t i;
149 } u;
150
151 u.i = val;
152 FT0 = float32_to_floatx80(u.f, &env->fp_status);
153 }
154
155 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
156 {
157 union {
158 float64 f;
159 uint64_t i;
160 } u;
161
162 u.i = val;
163 FT0 = float64_to_floatx80(u.f, &env->fp_status);
164 }
165
166 void helper_fildl_FT0(CPUX86State *env, int32_t val)
167 {
168 FT0 = int32_to_floatx80(val, &env->fp_status);
169 }
170
171 void helper_flds_ST0(CPUX86State *env, uint32_t val)
172 {
173 int new_fpstt;
174 union {
175 float32 f;
176 uint32_t i;
177 } u;
178
179 new_fpstt = (env->fpstt - 1) & 7;
180 u.i = val;
181 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
182 env->fpstt = new_fpstt;
183 env->fptags[new_fpstt] = 0; /* validate stack entry */
184 }
185
186 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
187 {
188 int new_fpstt;
189 union {
190 float64 f;
191 uint64_t i;
192 } u;
193
194 new_fpstt = (env->fpstt - 1) & 7;
195 u.i = val;
196 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
197 env->fpstt = new_fpstt;
198 env->fptags[new_fpstt] = 0; /* validate stack entry */
199 }
200
201 void helper_fildl_ST0(CPUX86State *env, int32_t val)
202 {
203 int new_fpstt;
204
205 new_fpstt = (env->fpstt - 1) & 7;
206 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
207 env->fpstt = new_fpstt;
208 env->fptags[new_fpstt] = 0; /* validate stack entry */
209 }
210
211 void helper_fildll_ST0(CPUX86State *env, int64_t val)
212 {
213 int new_fpstt;
214
215 new_fpstt = (env->fpstt - 1) & 7;
216 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
217 env->fpstt = new_fpstt;
218 env->fptags[new_fpstt] = 0; /* validate stack entry */
219 }
220
221 uint32_t helper_fsts_ST0(CPUX86State *env)
222 {
223 union {
224 float32 f;
225 uint32_t i;
226 } u;
227
228 u.f = floatx80_to_float32(ST0, &env->fp_status);
229 return u.i;
230 }
231
232 uint64_t helper_fstl_ST0(CPUX86State *env)
233 {
234 union {
235 float64 f;
236 uint64_t i;
237 } u;
238
239 u.f = floatx80_to_float64(ST0, &env->fp_status);
240 return u.i;
241 }
242
243 int32_t helper_fist_ST0(CPUX86State *env)
244 {
245 int32_t val;
246
247 val = floatx80_to_int32(ST0, &env->fp_status);
248 if (val != (int16_t)val) {
249 val = -32768;
250 }
251 return val;
252 }
253
254 int32_t helper_fistl_ST0(CPUX86State *env)
255 {
256 int32_t val;
257 signed char old_exp_flags;
258
259 old_exp_flags = get_float_exception_flags(&env->fp_status);
260 set_float_exception_flags(0, &env->fp_status);
261
262 val = floatx80_to_int32(ST0, &env->fp_status);
263 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
264 val = 0x80000000;
265 }
266 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
267 | old_exp_flags, &env->fp_status);
268 return val;
269 }
270
271 int64_t helper_fistll_ST0(CPUX86State *env)
272 {
273 int64_t val;
274 signed char old_exp_flags;
275
276 old_exp_flags = get_float_exception_flags(&env->fp_status);
277 set_float_exception_flags(0, &env->fp_status);
278
279 val = floatx80_to_int64(ST0, &env->fp_status);
280 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
281 val = 0x8000000000000000ULL;
282 }
283 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
284 | old_exp_flags, &env->fp_status);
285 return val;
286 }
287
288 int32_t helper_fistt_ST0(CPUX86State *env)
289 {
290 int32_t val;
291
292 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
293 if (val != (int16_t)val) {
294 val = -32768;
295 }
296 return val;
297 }
298
299 int32_t helper_fisttl_ST0(CPUX86State *env)
300 {
301 int32_t val;
302
303 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
304 return val;
305 }
306
307 int64_t helper_fisttll_ST0(CPUX86State *env)
308 {
309 int64_t val;
310
311 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
312 return val;
313 }
314
315 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
316 {
317 int new_fpstt;
318
319 new_fpstt = (env->fpstt - 1) & 7;
320 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
321 env->fpstt = new_fpstt;
322 env->fptags[new_fpstt] = 0; /* validate stack entry */
323 }
324
325 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
326 {
327 helper_fstt(env, ST0, ptr, GETPC());
328 }
329
330 void helper_fpush(CPUX86State *env)
331 {
332 fpush(env);
333 }
334
335 void helper_fpop(CPUX86State *env)
336 {
337 fpop(env);
338 }
339
340 void helper_fdecstp(CPUX86State *env)
341 {
342 env->fpstt = (env->fpstt - 1) & 7;
343 env->fpus &= ~0x4700;
344 }
345
346 void helper_fincstp(CPUX86State *env)
347 {
348 env->fpstt = (env->fpstt + 1) & 7;
349 env->fpus &= ~0x4700;
350 }
351
352 /* FPU move */
353
354 void helper_ffree_STN(CPUX86State *env, int st_index)
355 {
356 env->fptags[(env->fpstt + st_index) & 7] = 1;
357 }
358
359 void helper_fmov_ST0_FT0(CPUX86State *env)
360 {
361 ST0 = FT0;
362 }
363
364 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
365 {
366 FT0 = ST(st_index);
367 }
368
369 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
370 {
371 ST0 = ST(st_index);
372 }
373
374 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
375 {
376 ST(st_index) = ST0;
377 }
378
379 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
380 {
381 floatx80 tmp;
382
383 tmp = ST(st_index);
384 ST(st_index) = ST0;
385 ST0 = tmp;
386 }
387
388 /* FPU operations */
389
390 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
391
392 void helper_fcom_ST0_FT0(CPUX86State *env)
393 {
394 int ret;
395
396 ret = floatx80_compare(ST0, FT0, &env->fp_status);
397 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
398 }
399
400 void helper_fucom_ST0_FT0(CPUX86State *env)
401 {
402 int ret;
403
404 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
405 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
406 }
407
408 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
409
410 void helper_fcomi_ST0_FT0(CPUX86State *env)
411 {
412 int eflags;
413 int ret;
414
415 ret = floatx80_compare(ST0, FT0, &env->fp_status);
416 eflags = cpu_cc_compute_all(env, CC_OP);
417 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
418 CC_SRC = eflags;
419 }
420
421 void helper_fucomi_ST0_FT0(CPUX86State *env)
422 {
423 int eflags;
424 int ret;
425
426 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
427 eflags = cpu_cc_compute_all(env, CC_OP);
428 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
429 CC_SRC = eflags;
430 }
431
432 void helper_fadd_ST0_FT0(CPUX86State *env)
433 {
434 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
435 }
436
437 void helper_fmul_ST0_FT0(CPUX86State *env)
438 {
439 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
440 }
441
442 void helper_fsub_ST0_FT0(CPUX86State *env)
443 {
444 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
445 }
446
447 void helper_fsubr_ST0_FT0(CPUX86State *env)
448 {
449 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
450 }
451
452 void helper_fdiv_ST0_FT0(CPUX86State *env)
453 {
454 ST0 = helper_fdiv(env, ST0, FT0);
455 }
456
457 void helper_fdivr_ST0_FT0(CPUX86State *env)
458 {
459 ST0 = helper_fdiv(env, FT0, ST0);
460 }
461
462 /* fp operations between STN and ST0 */
463
464 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
465 {
466 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
467 }
468
469 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
470 {
471 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
472 }
473
474 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
475 {
476 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
477 }
478
479 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
480 {
481 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
482 }
483
484 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
485 {
486 floatx80 *p;
487
488 p = &ST(st_index);
489 *p = helper_fdiv(env, *p, ST0);
490 }
491
492 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
493 {
494 floatx80 *p;
495
496 p = &ST(st_index);
497 *p = helper_fdiv(env, ST0, *p);
498 }
499
500 /* misc FPU operations */
501 void helper_fchs_ST0(CPUX86State *env)
502 {
503 ST0 = floatx80_chs(ST0);
504 }
505
506 void helper_fabs_ST0(CPUX86State *env)
507 {
508 ST0 = floatx80_abs(ST0);
509 }
510
511 void helper_fld1_ST0(CPUX86State *env)
512 {
513 ST0 = floatx80_one;
514 }
515
516 void helper_fldl2t_ST0(CPUX86State *env)
517 {
518 ST0 = floatx80_l2t;
519 }
520
521 void helper_fldl2e_ST0(CPUX86State *env)
522 {
523 ST0 = floatx80_l2e;
524 }
525
526 void helper_fldpi_ST0(CPUX86State *env)
527 {
528 ST0 = floatx80_pi;
529 }
530
531 void helper_fldlg2_ST0(CPUX86State *env)
532 {
533 ST0 = floatx80_lg2;
534 }
535
536 void helper_fldln2_ST0(CPUX86State *env)
537 {
538 ST0 = floatx80_ln2;
539 }
540
541 void helper_fldz_ST0(CPUX86State *env)
542 {
543 ST0 = floatx80_zero;
544 }
545
546 void helper_fldz_FT0(CPUX86State *env)
547 {
548 FT0 = floatx80_zero;
549 }
550
551 uint32_t helper_fnstsw(CPUX86State *env)
552 {
553 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
554 }
555
556 uint32_t helper_fnstcw(CPUX86State *env)
557 {
558 return env->fpuc;
559 }
560
561 void update_fp_status(CPUX86State *env)
562 {
563 int rnd_type;
564
565 /* set rounding mode */
566 switch (env->fpuc & FPU_RC_MASK) {
567 default:
568 case FPU_RC_NEAR:
569 rnd_type = float_round_nearest_even;
570 break;
571 case FPU_RC_DOWN:
572 rnd_type = float_round_down;
573 break;
574 case FPU_RC_UP:
575 rnd_type = float_round_up;
576 break;
577 case FPU_RC_CHOP:
578 rnd_type = float_round_to_zero;
579 break;
580 }
581 set_float_rounding_mode(rnd_type, &env->fp_status);
582 switch ((env->fpuc >> 8) & 3) {
583 case 0:
584 rnd_type = 32;
585 break;
586 case 2:
587 rnd_type = 64;
588 break;
589 case 3:
590 default:
591 rnd_type = 80;
592 break;
593 }
594 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
595 }
596
597 void helper_fldcw(CPUX86State *env, uint32_t val)
598 {
599 cpu_set_fpuc(env, val);
600 }
601
602 void helper_fclex(CPUX86State *env)
603 {
604 env->fpus &= 0x7f00;
605 }
606
607 void helper_fwait(CPUX86State *env)
608 {
609 if (env->fpus & FPUS_SE) {
610 fpu_raise_exception(env, GETPC());
611 }
612 }
613
614 void helper_fninit(CPUX86State *env)
615 {
616 env->fpus = 0;
617 env->fpstt = 0;
618 cpu_set_fpuc(env, 0x37f);
619 env->fptags[0] = 1;
620 env->fptags[1] = 1;
621 env->fptags[2] = 1;
622 env->fptags[3] = 1;
623 env->fptags[4] = 1;
624 env->fptags[5] = 1;
625 env->fptags[6] = 1;
626 env->fptags[7] = 1;
627 }
628
629 /* BCD ops */
630
631 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
632 {
633 floatx80 tmp;
634 uint64_t val;
635 unsigned int v;
636 int i;
637
638 val = 0;
639 for (i = 8; i >= 0; i--) {
640 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
641 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
642 }
643 tmp = int64_to_floatx80(val, &env->fp_status);
644 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
645 tmp = floatx80_chs(tmp);
646 }
647 fpush(env);
648 ST0 = tmp;
649 }
650
651 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
652 {
653 int v;
654 target_ulong mem_ref, mem_end;
655 int64_t val;
656
657 val = floatx80_to_int64(ST0, &env->fp_status);
658 mem_ref = ptr;
659 mem_end = mem_ref + 9;
660 if (val < 0) {
661 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
662 val = -val;
663 } else {
664 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
665 }
666 while (mem_ref < mem_end) {
667 if (val == 0) {
668 break;
669 }
670 v = val % 100;
671 val = val / 100;
672 v = ((v / 10) << 4) | (v % 10);
673 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
674 }
675 while (mem_ref < mem_end) {
676 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
677 }
678 }
679
680 void helper_f2xm1(CPUX86State *env)
681 {
682 double val = floatx80_to_double(env, ST0);
683
684 val = pow(2.0, val) - 1.0;
685 ST0 = double_to_floatx80(env, val);
686 }
687
688 void helper_fyl2x(CPUX86State *env)
689 {
690 double fptemp = floatx80_to_double(env, ST0);
691
692 if (fptemp > 0.0) {
693 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
694 fptemp *= floatx80_to_double(env, ST1);
695 ST1 = double_to_floatx80(env, fptemp);
696 fpop(env);
697 } else {
698 env->fpus &= ~0x4700;
699 env->fpus |= 0x400;
700 }
701 }
702
703 void helper_fptan(CPUX86State *env)
704 {
705 double fptemp = floatx80_to_double(env, ST0);
706
707 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
708 env->fpus |= 0x400;
709 } else {
710 fptemp = tan(fptemp);
711 ST0 = double_to_floatx80(env, fptemp);
712 fpush(env);
713 ST0 = floatx80_one;
714 env->fpus &= ~0x400; /* C2 <-- 0 */
715 /* the above code is for |arg| < 2**52 only */
716 }
717 }
718
719 void helper_fpatan(CPUX86State *env)
720 {
721 double fptemp, fpsrcop;
722
723 fpsrcop = floatx80_to_double(env, ST1);
724 fptemp = floatx80_to_double(env, ST0);
725 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
726 fpop(env);
727 }
728
729 void helper_fxtract(CPUX86State *env)
730 {
731 CPU_LDoubleU temp;
732
733 temp.d = ST0;
734
735 if (floatx80_is_zero(ST0)) {
736 /* Easy way to generate -inf and raising division by 0 exception */
737 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
738 &env->fp_status);
739 fpush(env);
740 ST0 = temp.d;
741 } else {
742 int expdif;
743
744 expdif = EXPD(temp) - EXPBIAS;
745 /* DP exponent bias */
746 ST0 = int32_to_floatx80(expdif, &env->fp_status);
747 fpush(env);
748 BIASEXPONENT(temp);
749 ST0 = temp.d;
750 }
751 }
752
753 void helper_fprem1(CPUX86State *env)
754 {
755 double st0, st1, dblq, fpsrcop, fptemp;
756 CPU_LDoubleU fpsrcop1, fptemp1;
757 int expdif;
758 signed long long int q;
759
760 st0 = floatx80_to_double(env, ST0);
761 st1 = floatx80_to_double(env, ST1);
762
763 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
764 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
765 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
766 return;
767 }
768
769 fpsrcop = st0;
770 fptemp = st1;
771 fpsrcop1.d = ST0;
772 fptemp1.d = ST1;
773 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
774
775 if (expdif < 0) {
776 /* optimisation? taken from the AMD docs */
777 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
778 /* ST0 is unchanged */
779 return;
780 }
781
782 if (expdif < 53) {
783 dblq = fpsrcop / fptemp;
784 /* round dblq towards nearest integer */
785 dblq = rint(dblq);
786 st0 = fpsrcop - fptemp * dblq;
787
788 /* convert dblq to q by truncating towards zero */
789 if (dblq < 0.0) {
790 q = (signed long long int)(-dblq);
791 } else {
792 q = (signed long long int)dblq;
793 }
794
795 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
796 /* (C0,C3,C1) <-- (q2,q1,q0) */
797 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
798 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
799 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
800 } else {
801 env->fpus |= 0x400; /* C2 <-- 1 */
802 fptemp = pow(2.0, expdif - 50);
803 fpsrcop = (st0 / st1) / fptemp;
804 /* fpsrcop = integer obtained by chopping */
805 fpsrcop = (fpsrcop < 0.0) ?
806 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
807 st0 -= (st1 * fpsrcop * fptemp);
808 }
809 ST0 = double_to_floatx80(env, st0);
810 }
811
812 void helper_fprem(CPUX86State *env)
813 {
814 double st0, st1, dblq, fpsrcop, fptemp;
815 CPU_LDoubleU fpsrcop1, fptemp1;
816 int expdif;
817 signed long long int q;
818
819 st0 = floatx80_to_double(env, ST0);
820 st1 = floatx80_to_double(env, ST1);
821
822 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
823 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
824 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
825 return;
826 }
827
828 fpsrcop = st0;
829 fptemp = st1;
830 fpsrcop1.d = ST0;
831 fptemp1.d = ST1;
832 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
833
834 if (expdif < 0) {
835 /* optimisation? taken from the AMD docs */
836 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
837 /* ST0 is unchanged */
838 return;
839 }
840
841 if (expdif < 53) {
842 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
843 /* round dblq towards zero */
844 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
845 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
846
847 /* convert dblq to q by truncating towards zero */
848 if (dblq < 0.0) {
849 q = (signed long long int)(-dblq);
850 } else {
851 q = (signed long long int)dblq;
852 }
853
854 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
855 /* (C0,C3,C1) <-- (q2,q1,q0) */
856 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
857 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
858 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
859 } else {
860 int N = 32 + (expdif % 32); /* as per AMD docs */
861
862 env->fpus |= 0x400; /* C2 <-- 1 */
863 fptemp = pow(2.0, (double)(expdif - N));
864 fpsrcop = (st0 / st1) / fptemp;
865 /* fpsrcop = integer obtained by chopping */
866 fpsrcop = (fpsrcop < 0.0) ?
867 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
868 st0 -= (st1 * fpsrcop * fptemp);
869 }
870 ST0 = double_to_floatx80(env, st0);
871 }
872
873 void helper_fyl2xp1(CPUX86State *env)
874 {
875 double fptemp = floatx80_to_double(env, ST0);
876
877 if ((fptemp + 1.0) > 0.0) {
878 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
879 fptemp *= floatx80_to_double(env, ST1);
880 ST1 = double_to_floatx80(env, fptemp);
881 fpop(env);
882 } else {
883 env->fpus &= ~0x4700;
884 env->fpus |= 0x400;
885 }
886 }
887
888 void helper_fsqrt(CPUX86State *env)
889 {
890 if (floatx80_is_neg(ST0)) {
891 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
892 env->fpus |= 0x400;
893 }
894 ST0 = floatx80_sqrt(ST0, &env->fp_status);
895 }
896
897 void helper_fsincos(CPUX86State *env)
898 {
899 double fptemp = floatx80_to_double(env, ST0);
900
901 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
902 env->fpus |= 0x400;
903 } else {
904 ST0 = double_to_floatx80(env, sin(fptemp));
905 fpush(env);
906 ST0 = double_to_floatx80(env, cos(fptemp));
907 env->fpus &= ~0x400; /* C2 <-- 0 */
908 /* the above code is for |arg| < 2**63 only */
909 }
910 }
911
912 void helper_frndint(CPUX86State *env)
913 {
914 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
915 }
916
917 void helper_fscale(CPUX86State *env)
918 {
919 if (floatx80_is_any_nan(ST1)) {
920 ST0 = ST1;
921 } else {
922 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
923 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
924 }
925 }
926
927 void helper_fsin(CPUX86State *env)
928 {
929 double fptemp = floatx80_to_double(env, ST0);
930
931 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
932 env->fpus |= 0x400;
933 } else {
934 ST0 = double_to_floatx80(env, sin(fptemp));
935 env->fpus &= ~0x400; /* C2 <-- 0 */
936 /* the above code is for |arg| < 2**53 only */
937 }
938 }
939
940 void helper_fcos(CPUX86State *env)
941 {
942 double fptemp = floatx80_to_double(env, ST0);
943
944 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
945 env->fpus |= 0x400;
946 } else {
947 ST0 = double_to_floatx80(env, cos(fptemp));
948 env->fpus &= ~0x400; /* C2 <-- 0 */
949 /* the above code is for |arg| < 2**63 only */
950 }
951 }
952
953 void helper_fxam_ST0(CPUX86State *env)
954 {
955 CPU_LDoubleU temp;
956 int expdif;
957
958 temp.d = ST0;
959
960 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
961 if (SIGND(temp)) {
962 env->fpus |= 0x200; /* C1 <-- 1 */
963 }
964
965 /* XXX: test fptags too */
966 expdif = EXPD(temp);
967 if (expdif == MAXEXPD) {
968 if (MANTD(temp) == 0x8000000000000000ULL) {
969 env->fpus |= 0x500; /* Infinity */
970 } else {
971 env->fpus |= 0x100; /* NaN */
972 }
973 } else if (expdif == 0) {
974 if (MANTD(temp) == 0) {
975 env->fpus |= 0x4000; /* Zero */
976 } else {
977 env->fpus |= 0x4400; /* Denormal */
978 }
979 } else {
980 env->fpus |= 0x400;
981 }
982 }
983
984 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
985 uintptr_t retaddr)
986 {
987 int fpus, fptag, exp, i;
988 uint64_t mant;
989 CPU_LDoubleU tmp;
990
991 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
992 fptag = 0;
993 for (i = 7; i >= 0; i--) {
994 fptag <<= 2;
995 if (env->fptags[i]) {
996 fptag |= 3;
997 } else {
998 tmp.d = env->fpregs[i].d;
999 exp = EXPD(tmp);
1000 mant = MANTD(tmp);
1001 if (exp == 0 && mant == 0) {
1002 /* zero */
1003 fptag |= 1;
1004 } else if (exp == 0 || exp == MAXEXPD
1005 || (mant & (1LL << 63)) == 0) {
1006 /* NaNs, infinity, denormal */
1007 fptag |= 2;
1008 }
1009 }
1010 }
1011 if (data32) {
1012 /* 32 bit */
1013 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1014 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1015 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1016 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1017 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1018 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1019 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1020 } else {
1021 /* 16 bit */
1022 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1023 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1024 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1025 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1026 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1027 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1028 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1029 }
1030 }
1031
1032 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1033 {
1034 do_fstenv(env, ptr, data32, GETPC());
1035 }
1036
1037 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1038 uintptr_t retaddr)
1039 {
1040 int i, fpus, fptag;
1041
1042 if (data32) {
1043 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1044 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1045 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1046 } else {
1047 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1048 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1049 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1050 }
1051 env->fpstt = (fpus >> 11) & 7;
1052 env->fpus = fpus & ~0x3800;
1053 for (i = 0; i < 8; i++) {
1054 env->fptags[i] = ((fptag & 3) == 3);
1055 fptag >>= 2;
1056 }
1057 }
1058
1059 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1060 {
1061 do_fldenv(env, ptr, data32, GETPC());
1062 }
1063
1064 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1065 {
1066 floatx80 tmp;
1067 int i;
1068
1069 do_fstenv(env, ptr, data32, GETPC());
1070
1071 ptr += (14 << data32);
1072 for (i = 0; i < 8; i++) {
1073 tmp = ST(i);
1074 helper_fstt(env, tmp, ptr, GETPC());
1075 ptr += 10;
1076 }
1077
1078 /* fninit */
1079 env->fpus = 0;
1080 env->fpstt = 0;
1081 cpu_set_fpuc(env, 0x37f);
1082 env->fptags[0] = 1;
1083 env->fptags[1] = 1;
1084 env->fptags[2] = 1;
1085 env->fptags[3] = 1;
1086 env->fptags[4] = 1;
1087 env->fptags[5] = 1;
1088 env->fptags[6] = 1;
1089 env->fptags[7] = 1;
1090 }
1091
1092 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1093 {
1094 floatx80 tmp;
1095 int i;
1096
1097 do_fldenv(env, ptr, data32, GETPC());
1098 ptr += (14 << data32);
1099
1100 for (i = 0; i < 8; i++) {
1101 tmp = helper_fldt(env, ptr, GETPC());
1102 ST(i) = tmp;
1103 ptr += 10;
1104 }
1105 }
1106
1107 #if defined(CONFIG_USER_ONLY)
1108 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1109 {
1110 helper_fsave(env, ptr, data32);
1111 }
1112
1113 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1114 {
1115 helper_frstor(env, ptr, data32);
1116 }
1117 #endif
1118
1119 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1120 {
1121 int fpus, fptag, i;
1122 target_ulong addr;
1123
1124 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1125 fptag = 0;
1126 for (i = 0; i < 8; i++) {
1127 fptag |= (env->fptags[i] << i);
1128 }
1129 cpu_stw_data_ra(env, ptr, env->fpuc, ra);
1130 cpu_stw_data_ra(env, ptr + 2, fpus, ra);
1131 cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
1132
1133 /* In 32-bit mode this is eip, sel, dp, sel.
1134 In 64-bit mode this is rip, rdp.
1135 But in either case we don't write actual data, just zeros. */
1136 cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
1137 cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
1138
1139 addr = ptr + 0x20;
1140 for (i = 0; i < 8; i++) {
1141 floatx80 tmp = ST(i);
1142 helper_fstt(env, tmp, addr, ra);
1143 addr += 16;
1144 }
1145 }
1146
1147 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1148 {
1149 cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
1150 cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
1151 }
1152
1153 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1154 {
1155 int i, nb_xmm_regs;
1156 target_ulong addr;
1157
1158 if (env->hflags & HF_CS64_MASK) {
1159 nb_xmm_regs = 16;
1160 } else {
1161 nb_xmm_regs = 8;
1162 }
1163
1164 addr = ptr + 0xa0;
1165 for (i = 0; i < nb_xmm_regs; i++) {
1166 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1167 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1168 addr += 16;
1169 }
1170 }
1171
1172 static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1173 {
1174 int i;
1175
1176 for (i = 0; i < 4; i++, addr += 16) {
1177 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1178 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1179 }
1180 }
1181
1182 static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1183 {
1184 cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
1185 cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
1186 }
1187
1188 static void do_xsave_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1189 {
1190 cpu_stq_data_ra(env, addr, env->pkru, ra);
1191 }
1192
1193 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1194 {
1195 uintptr_t ra = GETPC();
1196
1197 /* The operand must be 16 byte aligned */
1198 if (ptr & 0xf) {
1199 raise_exception_ra(env, EXCP0D_GPF, ra);
1200 }
1201
1202 do_xsave_fpu(env, ptr, ra);
1203
1204 if (env->cr[4] & CR4_OSFXSR_MASK) {
1205 do_xsave_mxcsr(env, ptr, ra);
1206 /* Fast FXSAVE leaves out the XMM registers */
1207 if (!(env->efer & MSR_EFER_FFXSR)
1208 || (env->hflags & HF_CPL_MASK)
1209 || !(env->hflags & HF_LMA_MASK)) {
1210 do_xsave_sse(env, ptr, ra);
1211 }
1212 }
1213 }
1214
1215 static uint64_t get_xinuse(CPUX86State *env)
1216 {
1217 uint64_t inuse = -1;
1218
1219 /* For the most part, we don't track XINUSE. We could calculate it
1220 here for all components, but it's probably less work to simply
1221 indicate in use. That said, the state of BNDREGS is important
1222 enough to track in HFLAGS, so we might as well use that here. */
1223 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1224 inuse &= ~XSTATE_BNDREGS_MASK;
1225 }
1226 return inuse;
1227 }
1228
1229 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1230 uint64_t inuse, uint64_t opt, uintptr_t ra)
1231 {
1232 uint64_t old_bv, new_bv;
1233
1234 /* The OS must have enabled XSAVE. */
1235 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1236 raise_exception_ra(env, EXCP06_ILLOP, ra);
1237 }
1238
1239 /* The operand must be 64 byte aligned. */
1240 if (ptr & 63) {
1241 raise_exception_ra(env, EXCP0D_GPF, ra);
1242 }
1243
1244 /* Never save anything not enabled by XCR0. */
1245 rfbm &= env->xcr0;
1246 opt &= rfbm;
1247
1248 if (opt & XSTATE_FP_MASK) {
1249 do_xsave_fpu(env, ptr, ra);
1250 }
1251 if (rfbm & XSTATE_SSE_MASK) {
1252 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1253 do_xsave_mxcsr(env, ptr, ra);
1254 }
1255 if (opt & XSTATE_SSE_MASK) {
1256 do_xsave_sse(env, ptr, ra);
1257 }
1258 if (opt & XSTATE_BNDREGS_MASK) {
1259 target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1260 do_xsave_bndregs(env, ptr + off, ra);
1261 }
1262 if (opt & XSTATE_BNDCSR_MASK) {
1263 target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1264 do_xsave_bndcsr(env, ptr + off, ra);
1265 }
1266 if (opt & XSTATE_PKRU_MASK) {
1267 target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1268 do_xsave_pkru(env, ptr + off, ra);
1269 }
1270
1271 /* Update the XSTATE_BV field. */
1272 old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1273 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1274 cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
1275 }
1276
1277 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1278 {
1279 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1280 }
1281
1282 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1283 {
1284 uint64_t inuse = get_xinuse(env);
1285 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1286 }
1287
1288 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1289 {
1290 int i, fpus, fptag;
1291 target_ulong addr;
1292
1293 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
1294 fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
1295 fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
1296 env->fpstt = (fpus >> 11) & 7;
1297 env->fpus = fpus & ~0x3800;
1298 fptag ^= 0xff;
1299 for (i = 0; i < 8; i++) {
1300 env->fptags[i] = ((fptag >> i) & 1);
1301 }
1302
1303 addr = ptr + 0x20;
1304 for (i = 0; i < 8; i++) {
1305 floatx80 tmp = helper_fldt(env, addr, ra);
1306 ST(i) = tmp;
1307 addr += 16;
1308 }
1309 }
1310
1311 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1312 {
1313 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
1314 }
1315
1316 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1317 {
1318 int i, nb_xmm_regs;
1319 target_ulong addr;
1320
1321 if (env->hflags & HF_CS64_MASK) {
1322 nb_xmm_regs = 16;
1323 } else {
1324 nb_xmm_regs = 8;
1325 }
1326
1327 addr = ptr + 0xa0;
1328 for (i = 0; i < nb_xmm_regs; i++) {
1329 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1330 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1331 addr += 16;
1332 }
1333 }
1334
1335 static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1336 {
1337 int i;
1338
1339 for (i = 0; i < 4; i++, addr += 16) {
1340 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1341 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1342 }
1343 }
1344
1345 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1346 {
1347 /* FIXME: Extend highest implemented bit of linear address. */
1348 env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
1349 env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
1350 }
1351
1352 static void do_xrstor_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1353 {
1354 env->pkru = cpu_ldq_data_ra(env, addr, ra);
1355 }
1356
1357 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1358 {
1359 uintptr_t ra = GETPC();
1360
1361 /* The operand must be 16 byte aligned */
1362 if (ptr & 0xf) {
1363 raise_exception_ra(env, EXCP0D_GPF, ra);
1364 }
1365
1366 do_xrstor_fpu(env, ptr, ra);
1367
1368 if (env->cr[4] & CR4_OSFXSR_MASK) {
1369 do_xrstor_mxcsr(env, ptr, ra);
1370 /* Fast FXRSTOR leaves out the XMM registers */
1371 if (!(env->efer & MSR_EFER_FFXSR)
1372 || (env->hflags & HF_CPL_MASK)
1373 || !(env->hflags & HF_LMA_MASK)) {
1374 do_xrstor_sse(env, ptr, ra);
1375 }
1376 }
1377 }
1378
1379 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1380 {
1381 uintptr_t ra = GETPC();
1382 uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
1383
1384 rfbm &= env->xcr0;
1385
1386 /* The OS must have enabled XSAVE. */
1387 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1388 raise_exception_ra(env, EXCP06_ILLOP, ra);
1389 }
1390
1391 /* The operand must be 64 byte aligned. */
1392 if (ptr & 63) {
1393 raise_exception_ra(env, EXCP0D_GPF, ra);
1394 }
1395
1396 xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1397
1398 if ((int64_t)xstate_bv < 0) {
1399 /* FIXME: Compact form. */
1400 raise_exception_ra(env, EXCP0D_GPF, ra);
1401 }
1402
1403 /* Standard form. */
1404
1405 /* The XSTATE field must not set bits not present in XCR0. */
1406 if (xstate_bv & ~env->xcr0) {
1407 raise_exception_ra(env, EXCP0D_GPF, ra);
1408 }
1409
1410 /* The XCOMP field must be zero. */
1411 xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
1412 xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
1413 if (xcomp_bv0 || xcomp_bv1) {
1414 raise_exception_ra(env, EXCP0D_GPF, ra);
1415 }
1416
1417 if (rfbm & XSTATE_FP_MASK) {
1418 if (xstate_bv & XSTATE_FP_MASK) {
1419 do_xrstor_fpu(env, ptr, ra);
1420 } else {
1421 helper_fninit(env);
1422 memset(env->fpregs, 0, sizeof(env->fpregs));
1423 }
1424 }
1425 if (rfbm & XSTATE_SSE_MASK) {
1426 /* Note that the standard form of XRSTOR loads MXCSR from memory
1427 whether or not the XSTATE_BV bit is set. */
1428 do_xrstor_mxcsr(env, ptr, ra);
1429 if (xstate_bv & XSTATE_SSE_MASK) {
1430 do_xrstor_sse(env, ptr, ra);
1431 } else {
1432 /* ??? When AVX is implemented, we may have to be more
1433 selective in the clearing. */
1434 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1435 }
1436 }
1437 if (rfbm & XSTATE_BNDREGS_MASK) {
1438 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1439 target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1440 do_xrstor_bndregs(env, ptr + off, ra);
1441 env->hflags |= HF_MPX_IU_MASK;
1442 } else {
1443 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1444 env->hflags &= ~HF_MPX_IU_MASK;
1445 }
1446 }
1447 if (rfbm & XSTATE_BNDCSR_MASK) {
1448 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1449 target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1450 do_xrstor_bndcsr(env, ptr + off, ra);
1451 } else {
1452 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1453 }
1454 cpu_sync_bndcs_hflags(env);
1455 }
1456 if (rfbm & XSTATE_PKRU_MASK) {
1457 uint64_t old_pkru = env->pkru;
1458 if (xstate_bv & XSTATE_PKRU_MASK) {
1459 target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1460 do_xrstor_pkru(env, ptr + off, ra);
1461 } else {
1462 env->pkru = 0;
1463 }
1464 if (env->pkru != old_pkru) {
1465 CPUState *cs = CPU(x86_env_get_cpu(env));
1466 tlb_flush(cs, 1);
1467 }
1468 }
1469 }
1470
1471 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1472 {
1473 /* The OS must have enabled XSAVE. */
1474 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1475 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1476 }
1477
1478 switch (ecx) {
1479 case 0:
1480 return env->xcr0;
1481 case 1:
1482 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1483 return env->xcr0 & get_xinuse(env);
1484 }
1485 break;
1486 }
1487 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1488 }
1489
1490 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1491 {
1492 uint32_t dummy, ena_lo, ena_hi;
1493 uint64_t ena;
1494
1495 /* The OS must have enabled XSAVE. */
1496 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1497 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1498 }
1499
1500 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1501 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1502 goto do_gpf;
1503 }
1504
1505 /* Disallow enabling unimplemented features. */
1506 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1507 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1508 if (mask & ~ena) {
1509 goto do_gpf;
1510 }
1511
1512 /* Disallow enabling only half of MPX. */
1513 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1514 & XSTATE_BNDCSR_MASK) {
1515 goto do_gpf;
1516 }
1517
1518 env->xcr0 = mask;
1519 cpu_sync_bndcs_hflags(env);
1520 return;
1521
1522 do_gpf:
1523 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1524 }
1525
1526 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1527 {
1528 CPU_LDoubleU temp;
1529
1530 temp.d = f;
1531 *pmant = temp.l.lower;
1532 *pexp = temp.l.upper;
1533 }
1534
1535 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1536 {
1537 CPU_LDoubleU temp;
1538
1539 temp.l.upper = upper;
1540 temp.l.lower = mant;
1541 return temp.d;
1542 }
1543
1544 /* MMX/SSE */
1545 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1546
1547 #define SSE_DAZ 0x0040
1548 #define SSE_RC_MASK 0x6000
1549 #define SSE_RC_NEAR 0x0000
1550 #define SSE_RC_DOWN 0x2000
1551 #define SSE_RC_UP 0x4000
1552 #define SSE_RC_CHOP 0x6000
1553 #define SSE_FZ 0x8000
1554
1555 void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1556 {
1557 int rnd_type;
1558
1559 env->mxcsr = mxcsr;
1560
1561 /* set rounding mode */
1562 switch (mxcsr & SSE_RC_MASK) {
1563 default:
1564 case SSE_RC_NEAR:
1565 rnd_type = float_round_nearest_even;
1566 break;
1567 case SSE_RC_DOWN:
1568 rnd_type = float_round_down;
1569 break;
1570 case SSE_RC_UP:
1571 rnd_type = float_round_up;
1572 break;
1573 case SSE_RC_CHOP:
1574 rnd_type = float_round_to_zero;
1575 break;
1576 }
1577 set_float_rounding_mode(rnd_type, &env->sse_status);
1578
1579 /* set denormals are zero */
1580 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1581
1582 /* set flush to zero */
1583 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1584 }
1585
1586 void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1587 {
1588 env->fpuc = val;
1589 update_fp_status(env);
1590 }
1591
1592 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1593 {
1594 cpu_set_mxcsr(env, val);
1595 }
1596
1597 void helper_enter_mmx(CPUX86State *env)
1598 {
1599 env->fpstt = 0;
1600 *(uint32_t *)(env->fptags) = 0;
1601 *(uint32_t *)(env->fptags + 4) = 0;
1602 }
1603
1604 void helper_emms(CPUX86State *env)
1605 {
1606 /* set to empty state */
1607 *(uint32_t *)(env->fptags) = 0x01010101;
1608 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1609 }
1610
1611 /* XXX: suppress */
1612 void helper_movq(CPUX86State *env, void *d, void *s)
1613 {
1614 *(uint64_t *)d = *(uint64_t *)s;
1615 }
1616
1617 #define SHIFT 0
1618 #include "ops_sse.h"
1619
1620 #define SHIFT 1
1621 #include "ops_sse.h"