Merge remote-tracking branch 'remotes/jsnow-gitlab/tags/python-pull-request' into...
[qemu.git] / target / i386 / tcg / fpu_helper.c
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "fpu/softfloat.h"
25 #include "fpu/softfloat-macros.h"
26 #include "helper-tcg.h"
27
28 /* float macros */
29 #define FT0 (env->ft0)
30 #define ST0 (env->fpregs[env->fpstt].d)
31 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
32 #define ST1 ST(1)
33
34 #define FPU_RC_MASK 0xc00
35 #define FPU_RC_NEAR 0x000
36 #define FPU_RC_DOWN 0x400
37 #define FPU_RC_UP 0x800
38 #define FPU_RC_CHOP 0xc00
39
40 #define MAXTAN 9223372036854775808.0
41
42 /* the following deal with x86 long double-precision numbers */
43 #define MAXEXPD 0x7fff
44 #define EXPBIAS 16383
45 #define EXPD(fp) (fp.l.upper & 0x7fff)
46 #define SIGND(fp) ((fp.l.upper) & 0x8000)
47 #define MANTD(fp) (fp.l.lower)
48 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
49
50 #define FPUS_IE (1 << 0)
51 #define FPUS_DE (1 << 1)
52 #define FPUS_ZE (1 << 2)
53 #define FPUS_OE (1 << 3)
54 #define FPUS_UE (1 << 4)
55 #define FPUS_PE (1 << 5)
56 #define FPUS_SF (1 << 6)
57 #define FPUS_SE (1 << 7)
58 #define FPUS_B (1 << 15)
59
60 #define FPUC_EM 0x3f
61
62 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
63 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
64 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
65 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
66 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
67 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
68 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
69 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
70
71 static inline void fpush(CPUX86State *env)
72 {
73 env->fpstt = (env->fpstt - 1) & 7;
74 env->fptags[env->fpstt] = 0; /* validate stack entry */
75 }
76
77 static inline void fpop(CPUX86State *env)
78 {
79 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
80 env->fpstt = (env->fpstt + 1) & 7;
81 }
82
83 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr)
84 {
85 CPU_LDoubleU temp;
86
87 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
88 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
89 return temp.d;
90 }
91
92 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
93 uintptr_t retaddr)
94 {
95 CPU_LDoubleU temp;
96
97 temp.d = f;
98 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
99 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
100 }
101
102 /* x87 FPU helpers */
103
104 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
105 {
106 union {
107 float64 f64;
108 double d;
109 } u;
110
111 u.f64 = floatx80_to_float64(a, &env->fp_status);
112 return u.d;
113 }
114
115 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
116 {
117 union {
118 float64 f64;
119 double d;
120 } u;
121
122 u.d = a;
123 return float64_to_floatx80(u.f64, &env->fp_status);
124 }
125
126 static void fpu_set_exception(CPUX86State *env, int mask)
127 {
128 env->fpus |= mask;
129 if (env->fpus & (~env->fpuc & FPUC_EM)) {
130 env->fpus |= FPUS_SE | FPUS_B;
131 }
132 }
133
134 static inline uint8_t save_exception_flags(CPUX86State *env)
135 {
136 uint8_t old_flags = get_float_exception_flags(&env->fp_status);
137 set_float_exception_flags(0, &env->fp_status);
138 return old_flags;
139 }
140
141 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
142 {
143 uint8_t new_flags = get_float_exception_flags(&env->fp_status);
144 float_raise(old_flags, &env->fp_status);
145 fpu_set_exception(env,
146 ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
147 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
148 (new_flags & float_flag_overflow ? FPUS_OE : 0) |
149 (new_flags & float_flag_underflow ? FPUS_UE : 0) |
150 (new_flags & float_flag_inexact ? FPUS_PE : 0) |
151 (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
152 }
153
154 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
155 {
156 uint8_t old_flags = save_exception_flags(env);
157 floatx80 ret = floatx80_div(a, b, &env->fp_status);
158 merge_exception_flags(env, old_flags);
159 return ret;
160 }
161
162 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
163 {
164 if (env->cr[0] & CR0_NE_MASK) {
165 raise_exception_ra(env, EXCP10_COPR, retaddr);
166 }
167 #if !defined(CONFIG_USER_ONLY)
168 else {
169 fpu_check_raise_ferr_irq(env);
170 }
171 #endif
172 }
173
174 void helper_flds_FT0(CPUX86State *env, uint32_t val)
175 {
176 uint8_t old_flags = save_exception_flags(env);
177 union {
178 float32 f;
179 uint32_t i;
180 } u;
181
182 u.i = val;
183 FT0 = float32_to_floatx80(u.f, &env->fp_status);
184 merge_exception_flags(env, old_flags);
185 }
186
187 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
188 {
189 uint8_t old_flags = save_exception_flags(env);
190 union {
191 float64 f;
192 uint64_t i;
193 } u;
194
195 u.i = val;
196 FT0 = float64_to_floatx80(u.f, &env->fp_status);
197 merge_exception_flags(env, old_flags);
198 }
199
200 void helper_fildl_FT0(CPUX86State *env, int32_t val)
201 {
202 FT0 = int32_to_floatx80(val, &env->fp_status);
203 }
204
205 void helper_flds_ST0(CPUX86State *env, uint32_t val)
206 {
207 uint8_t old_flags = save_exception_flags(env);
208 int new_fpstt;
209 union {
210 float32 f;
211 uint32_t i;
212 } u;
213
214 new_fpstt = (env->fpstt - 1) & 7;
215 u.i = val;
216 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
217 env->fpstt = new_fpstt;
218 env->fptags[new_fpstt] = 0; /* validate stack entry */
219 merge_exception_flags(env, old_flags);
220 }
221
222 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
223 {
224 uint8_t old_flags = save_exception_flags(env);
225 int new_fpstt;
226 union {
227 float64 f;
228 uint64_t i;
229 } u;
230
231 new_fpstt = (env->fpstt - 1) & 7;
232 u.i = val;
233 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
234 env->fpstt = new_fpstt;
235 env->fptags[new_fpstt] = 0; /* validate stack entry */
236 merge_exception_flags(env, old_flags);
237 }
238
239 void helper_fildl_ST0(CPUX86State *env, int32_t val)
240 {
241 int new_fpstt;
242
243 new_fpstt = (env->fpstt - 1) & 7;
244 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
245 env->fpstt = new_fpstt;
246 env->fptags[new_fpstt] = 0; /* validate stack entry */
247 }
248
249 void helper_fildll_ST0(CPUX86State *env, int64_t val)
250 {
251 int new_fpstt;
252
253 new_fpstt = (env->fpstt - 1) & 7;
254 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
255 env->fpstt = new_fpstt;
256 env->fptags[new_fpstt] = 0; /* validate stack entry */
257 }
258
259 uint32_t helper_fsts_ST0(CPUX86State *env)
260 {
261 uint8_t old_flags = save_exception_flags(env);
262 union {
263 float32 f;
264 uint32_t i;
265 } u;
266
267 u.f = floatx80_to_float32(ST0, &env->fp_status);
268 merge_exception_flags(env, old_flags);
269 return u.i;
270 }
271
272 uint64_t helper_fstl_ST0(CPUX86State *env)
273 {
274 uint8_t old_flags = save_exception_flags(env);
275 union {
276 float64 f;
277 uint64_t i;
278 } u;
279
280 u.f = floatx80_to_float64(ST0, &env->fp_status);
281 merge_exception_flags(env, old_flags);
282 return u.i;
283 }
284
285 int32_t helper_fist_ST0(CPUX86State *env)
286 {
287 uint8_t old_flags = save_exception_flags(env);
288 int32_t val;
289
290 val = floatx80_to_int32(ST0, &env->fp_status);
291 if (val != (int16_t)val) {
292 set_float_exception_flags(float_flag_invalid, &env->fp_status);
293 val = -32768;
294 }
295 merge_exception_flags(env, old_flags);
296 return val;
297 }
298
299 int32_t helper_fistl_ST0(CPUX86State *env)
300 {
301 uint8_t old_flags = save_exception_flags(env);
302 int32_t val;
303
304 val = floatx80_to_int32(ST0, &env->fp_status);
305 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
306 val = 0x80000000;
307 }
308 merge_exception_flags(env, old_flags);
309 return val;
310 }
311
312 int64_t helper_fistll_ST0(CPUX86State *env)
313 {
314 uint8_t old_flags = save_exception_flags(env);
315 int64_t val;
316
317 val = floatx80_to_int64(ST0, &env->fp_status);
318 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
319 val = 0x8000000000000000ULL;
320 }
321 merge_exception_flags(env, old_flags);
322 return val;
323 }
324
325 int32_t helper_fistt_ST0(CPUX86State *env)
326 {
327 uint8_t old_flags = save_exception_flags(env);
328 int32_t val;
329
330 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
331 if (val != (int16_t)val) {
332 set_float_exception_flags(float_flag_invalid, &env->fp_status);
333 val = -32768;
334 }
335 merge_exception_flags(env, old_flags);
336 return val;
337 }
338
339 int32_t helper_fisttl_ST0(CPUX86State *env)
340 {
341 uint8_t old_flags = save_exception_flags(env);
342 int32_t val;
343
344 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
345 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
346 val = 0x80000000;
347 }
348 merge_exception_flags(env, old_flags);
349 return val;
350 }
351
352 int64_t helper_fisttll_ST0(CPUX86State *env)
353 {
354 uint8_t old_flags = save_exception_flags(env);
355 int64_t val;
356
357 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
358 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
359 val = 0x8000000000000000ULL;
360 }
361 merge_exception_flags(env, old_flags);
362 return val;
363 }
364
365 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
366 {
367 int new_fpstt;
368
369 new_fpstt = (env->fpstt - 1) & 7;
370 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC());
371 env->fpstt = new_fpstt;
372 env->fptags[new_fpstt] = 0; /* validate stack entry */
373 }
374
375 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
376 {
377 do_fstt(env, ST0, ptr, GETPC());
378 }
379
380 void helper_fpush(CPUX86State *env)
381 {
382 fpush(env);
383 }
384
385 void helper_fpop(CPUX86State *env)
386 {
387 fpop(env);
388 }
389
390 void helper_fdecstp(CPUX86State *env)
391 {
392 env->fpstt = (env->fpstt - 1) & 7;
393 env->fpus &= ~0x4700;
394 }
395
396 void helper_fincstp(CPUX86State *env)
397 {
398 env->fpstt = (env->fpstt + 1) & 7;
399 env->fpus &= ~0x4700;
400 }
401
402 /* FPU move */
403
404 void helper_ffree_STN(CPUX86State *env, int st_index)
405 {
406 env->fptags[(env->fpstt + st_index) & 7] = 1;
407 }
408
409 void helper_fmov_ST0_FT0(CPUX86State *env)
410 {
411 ST0 = FT0;
412 }
413
414 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
415 {
416 FT0 = ST(st_index);
417 }
418
419 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
420 {
421 ST0 = ST(st_index);
422 }
423
424 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
425 {
426 ST(st_index) = ST0;
427 }
428
429 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
430 {
431 floatx80 tmp;
432
433 tmp = ST(st_index);
434 ST(st_index) = ST0;
435 ST0 = tmp;
436 }
437
438 /* FPU operations */
439
440 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
441
442 void helper_fcom_ST0_FT0(CPUX86State *env)
443 {
444 uint8_t old_flags = save_exception_flags(env);
445 FloatRelation ret;
446
447 ret = floatx80_compare(ST0, FT0, &env->fp_status);
448 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
449 merge_exception_flags(env, old_flags);
450 }
451
452 void helper_fucom_ST0_FT0(CPUX86State *env)
453 {
454 uint8_t old_flags = save_exception_flags(env);
455 FloatRelation ret;
456
457 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
458 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
459 merge_exception_flags(env, old_flags);
460 }
461
462 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
463
464 void helper_fcomi_ST0_FT0(CPUX86State *env)
465 {
466 uint8_t old_flags = save_exception_flags(env);
467 int eflags;
468 FloatRelation ret;
469
470 ret = floatx80_compare(ST0, FT0, &env->fp_status);
471 eflags = cpu_cc_compute_all(env, CC_OP);
472 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
473 CC_SRC = eflags;
474 merge_exception_flags(env, old_flags);
475 }
476
477 void helper_fucomi_ST0_FT0(CPUX86State *env)
478 {
479 uint8_t old_flags = save_exception_flags(env);
480 int eflags;
481 FloatRelation ret;
482
483 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
484 eflags = cpu_cc_compute_all(env, CC_OP);
485 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
486 CC_SRC = eflags;
487 merge_exception_flags(env, old_flags);
488 }
489
490 void helper_fadd_ST0_FT0(CPUX86State *env)
491 {
492 uint8_t old_flags = save_exception_flags(env);
493 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
494 merge_exception_flags(env, old_flags);
495 }
496
497 void helper_fmul_ST0_FT0(CPUX86State *env)
498 {
499 uint8_t old_flags = save_exception_flags(env);
500 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
501 merge_exception_flags(env, old_flags);
502 }
503
504 void helper_fsub_ST0_FT0(CPUX86State *env)
505 {
506 uint8_t old_flags = save_exception_flags(env);
507 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
508 merge_exception_flags(env, old_flags);
509 }
510
511 void helper_fsubr_ST0_FT0(CPUX86State *env)
512 {
513 uint8_t old_flags = save_exception_flags(env);
514 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
515 merge_exception_flags(env, old_flags);
516 }
517
518 void helper_fdiv_ST0_FT0(CPUX86State *env)
519 {
520 ST0 = helper_fdiv(env, ST0, FT0);
521 }
522
523 void helper_fdivr_ST0_FT0(CPUX86State *env)
524 {
525 ST0 = helper_fdiv(env, FT0, ST0);
526 }
527
528 /* fp operations between STN and ST0 */
529
530 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
531 {
532 uint8_t old_flags = save_exception_flags(env);
533 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
534 merge_exception_flags(env, old_flags);
535 }
536
537 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
538 {
539 uint8_t old_flags = save_exception_flags(env);
540 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
541 merge_exception_flags(env, old_flags);
542 }
543
544 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
545 {
546 uint8_t old_flags = save_exception_flags(env);
547 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
548 merge_exception_flags(env, old_flags);
549 }
550
551 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
552 {
553 uint8_t old_flags = save_exception_flags(env);
554 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
555 merge_exception_flags(env, old_flags);
556 }
557
558 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
559 {
560 floatx80 *p;
561
562 p = &ST(st_index);
563 *p = helper_fdiv(env, *p, ST0);
564 }
565
566 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
567 {
568 floatx80 *p;
569
570 p = &ST(st_index);
571 *p = helper_fdiv(env, ST0, *p);
572 }
573
574 /* misc FPU operations */
575 void helper_fchs_ST0(CPUX86State *env)
576 {
577 ST0 = floatx80_chs(ST0);
578 }
579
580 void helper_fabs_ST0(CPUX86State *env)
581 {
582 ST0 = floatx80_abs(ST0);
583 }
584
585 void helper_fld1_ST0(CPUX86State *env)
586 {
587 ST0 = floatx80_one;
588 }
589
590 void helper_fldl2t_ST0(CPUX86State *env)
591 {
592 switch (env->fpuc & FPU_RC_MASK) {
593 case FPU_RC_UP:
594 ST0 = floatx80_l2t_u;
595 break;
596 default:
597 ST0 = floatx80_l2t;
598 break;
599 }
600 }
601
602 void helper_fldl2e_ST0(CPUX86State *env)
603 {
604 switch (env->fpuc & FPU_RC_MASK) {
605 case FPU_RC_DOWN:
606 case FPU_RC_CHOP:
607 ST0 = floatx80_l2e_d;
608 break;
609 default:
610 ST0 = floatx80_l2e;
611 break;
612 }
613 }
614
615 void helper_fldpi_ST0(CPUX86State *env)
616 {
617 switch (env->fpuc & FPU_RC_MASK) {
618 case FPU_RC_DOWN:
619 case FPU_RC_CHOP:
620 ST0 = floatx80_pi_d;
621 break;
622 default:
623 ST0 = floatx80_pi;
624 break;
625 }
626 }
627
628 void helper_fldlg2_ST0(CPUX86State *env)
629 {
630 switch (env->fpuc & FPU_RC_MASK) {
631 case FPU_RC_DOWN:
632 case FPU_RC_CHOP:
633 ST0 = floatx80_lg2_d;
634 break;
635 default:
636 ST0 = floatx80_lg2;
637 break;
638 }
639 }
640
641 void helper_fldln2_ST0(CPUX86State *env)
642 {
643 switch (env->fpuc & FPU_RC_MASK) {
644 case FPU_RC_DOWN:
645 case FPU_RC_CHOP:
646 ST0 = floatx80_ln2_d;
647 break;
648 default:
649 ST0 = floatx80_ln2;
650 break;
651 }
652 }
653
654 void helper_fldz_ST0(CPUX86State *env)
655 {
656 ST0 = floatx80_zero;
657 }
658
659 void helper_fldz_FT0(CPUX86State *env)
660 {
661 FT0 = floatx80_zero;
662 }
663
664 uint32_t helper_fnstsw(CPUX86State *env)
665 {
666 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
667 }
668
669 uint32_t helper_fnstcw(CPUX86State *env)
670 {
671 return env->fpuc;
672 }
673
674 void update_fp_status(CPUX86State *env)
675 {
676 FloatRoundMode rnd_mode;
677 FloatX80RoundPrec rnd_prec;
678
679 /* set rounding mode */
680 switch (env->fpuc & FPU_RC_MASK) {
681 default:
682 case FPU_RC_NEAR:
683 rnd_mode = float_round_nearest_even;
684 break;
685 case FPU_RC_DOWN:
686 rnd_mode = float_round_down;
687 break;
688 case FPU_RC_UP:
689 rnd_mode = float_round_up;
690 break;
691 case FPU_RC_CHOP:
692 rnd_mode = float_round_to_zero;
693 break;
694 }
695 set_float_rounding_mode(rnd_mode, &env->fp_status);
696
697 switch ((env->fpuc >> 8) & 3) {
698 case 0:
699 rnd_prec = floatx80_precision_s;
700 break;
701 case 2:
702 rnd_prec = floatx80_precision_d;
703 break;
704 case 3:
705 default:
706 rnd_prec = floatx80_precision_x;
707 break;
708 }
709 set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
710 }
711
712 void helper_fldcw(CPUX86State *env, uint32_t val)
713 {
714 cpu_set_fpuc(env, val);
715 }
716
717 void helper_fclex(CPUX86State *env)
718 {
719 env->fpus &= 0x7f00;
720 }
721
722 void helper_fwait(CPUX86State *env)
723 {
724 if (env->fpus & FPUS_SE) {
725 fpu_raise_exception(env, GETPC());
726 }
727 }
728
729 void helper_fninit(CPUX86State *env)
730 {
731 env->fpus = 0;
732 env->fpstt = 0;
733 cpu_set_fpuc(env, 0x37f);
734 env->fptags[0] = 1;
735 env->fptags[1] = 1;
736 env->fptags[2] = 1;
737 env->fptags[3] = 1;
738 env->fptags[4] = 1;
739 env->fptags[5] = 1;
740 env->fptags[6] = 1;
741 env->fptags[7] = 1;
742 }
743
744 /* BCD ops */
745
746 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
747 {
748 floatx80 tmp;
749 uint64_t val;
750 unsigned int v;
751 int i;
752
753 val = 0;
754 for (i = 8; i >= 0; i--) {
755 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
756 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
757 }
758 tmp = int64_to_floatx80(val, &env->fp_status);
759 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
760 tmp = floatx80_chs(tmp);
761 }
762 fpush(env);
763 ST0 = tmp;
764 }
765
766 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
767 {
768 uint8_t old_flags = save_exception_flags(env);
769 int v;
770 target_ulong mem_ref, mem_end;
771 int64_t val;
772 CPU_LDoubleU temp;
773
774 temp.d = ST0;
775
776 val = floatx80_to_int64(ST0, &env->fp_status);
777 mem_ref = ptr;
778 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
779 set_float_exception_flags(float_flag_invalid, &env->fp_status);
780 while (mem_ref < ptr + 7) {
781 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
782 }
783 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
784 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
785 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
786 merge_exception_flags(env, old_flags);
787 return;
788 }
789 mem_end = mem_ref + 9;
790 if (SIGND(temp)) {
791 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
792 val = -val;
793 } else {
794 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
795 }
796 while (mem_ref < mem_end) {
797 if (val == 0) {
798 break;
799 }
800 v = val % 100;
801 val = val / 100;
802 v = ((v / 10) << 4) | (v % 10);
803 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
804 }
805 while (mem_ref < mem_end) {
806 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
807 }
808 merge_exception_flags(env, old_flags);
809 }
810
811 /* 128-bit significand of log(2). */
812 #define ln2_sig_high 0xb17217f7d1cf79abULL
813 #define ln2_sig_low 0xc9e3b39803f2f6afULL
814
815 /*
816 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
817 * the interval [-1/64, 1/64].
818 */
819 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
820 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
821 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
822 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
823 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
824 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
825 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
826 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
827 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
828
829 struct f2xm1_data {
830 /*
831 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
832 * are very close to exact floatx80 values.
833 */
834 floatx80 t;
835 /* The value of 2^t. */
836 floatx80 exp2;
837 /* The value of 2^t - 1. */
838 floatx80 exp2m1;
839 };
840
841 static const struct f2xm1_data f2xm1_table[65] = {
842 { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
843 make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
844 make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
845 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
846 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
847 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
848 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
849 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
850 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
851 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
852 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
853 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
854 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
855 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
856 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
857 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
858 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
859 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
860 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
861 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
862 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
863 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
864 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
865 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
866 { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
867 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
868 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
869 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
870 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
871 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
872 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
873 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
874 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
875 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
876 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
877 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
878 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
879 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
880 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
881 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
882 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
883 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
884 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
885 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
886 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
887 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
888 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
889 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
890 { make_floatx80_init(0xbffe, 0x800000000000227dULL),
891 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
892 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
893 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
894 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
895 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
896 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
897 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
898 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
899 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
900 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
901 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
902 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
903 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
904 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
905 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
906 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
907 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
908 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
909 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
910 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
911 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
912 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
913 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
914 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
915 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
916 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
917 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
918 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
919 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
920 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
921 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
922 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
923 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
924 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
925 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
926 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
927 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
928 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
929 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
930 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
931 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
932 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
933 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
934 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
935 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
936 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
937 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
938 { floatx80_zero_init,
939 make_floatx80_init(0x3fff, 0x8000000000000000ULL),
940 floatx80_zero_init },
941 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
942 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
943 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
944 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
945 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
946 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
947 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
948 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
949 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
950 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
951 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
952 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
953 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
954 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
955 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
956 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
957 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
958 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
959 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
960 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
961 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
962 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
963 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
964 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
965 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
966 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
967 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
968 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
969 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
970 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
971 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
972 make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
973 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
974 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
975 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
976 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
977 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
978 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
979 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
980 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
981 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
982 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
983 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
984 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
985 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
986 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
987 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
988 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
989 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
990 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
991 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
992 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
993 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
994 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
995 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
996 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
997 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
998 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
999 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
1000 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
1001 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
1002 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
1003 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
1004 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
1005 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
1006 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
1007 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
1008 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
1009 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
1010 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
1011 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
1012 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
1013 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
1014 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
1015 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1016 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
1017 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
1018 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
1019 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
1020 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
1021 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
1022 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
1023 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
1024 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
1025 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
1026 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
1027 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1028 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
1029 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
1030 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
1031 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
1032 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
1033 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
1034 { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1035 make_floatx80_init(0x4000, 0x8000000000000000ULL),
1036 make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
1037 };
1038
1039 void helper_f2xm1(CPUX86State *env)
1040 {
1041 uint8_t old_flags = save_exception_flags(env);
1042 uint64_t sig = extractFloatx80Frac(ST0);
1043 int32_t exp = extractFloatx80Exp(ST0);
1044 bool sign = extractFloatx80Sign(ST0);
1045
1046 if (floatx80_invalid_encoding(ST0)) {
1047 float_raise(float_flag_invalid, &env->fp_status);
1048 ST0 = floatx80_default_nan(&env->fp_status);
1049 } else if (floatx80_is_any_nan(ST0)) {
1050 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1051 float_raise(float_flag_invalid, &env->fp_status);
1052 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1053 }
1054 } else if (exp > 0x3fff ||
1055 (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1056 /* Out of range for the instruction, treat as invalid. */
1057 float_raise(float_flag_invalid, &env->fp_status);
1058 ST0 = floatx80_default_nan(&env->fp_status);
1059 } else if (exp == 0x3fff) {
1060 /* Argument 1 or -1, exact result 1 or -0.5. */
1061 if (sign) {
1062 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1063 }
1064 } else if (exp < 0x3fb0) {
1065 if (!floatx80_is_zero(ST0)) {
1066 /*
1067 * Multiplying the argument by an extra-precision version
1068 * of log(2) is sufficiently precise. Zero arguments are
1069 * returned unchanged.
1070 */
1071 uint64_t sig0, sig1, sig2;
1072 if (exp == 0) {
1073 normalizeFloatx80Subnormal(sig, &exp, &sig);
1074 }
1075 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1076 &sig2);
1077 /* This result is inexact. */
1078 sig1 |= 1;
1079 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1080 sign, exp, sig0, sig1,
1081 &env->fp_status);
1082 }
1083 } else {
1084 floatx80 tmp, y, accum;
1085 bool asign, bsign;
1086 int32_t n, aexp, bexp;
1087 uint64_t asig0, asig1, asig2, bsig0, bsig1;
1088 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1089 FloatX80RoundPrec save_prec =
1090 env->fp_status.floatx80_rounding_precision;
1091 env->fp_status.float_rounding_mode = float_round_nearest_even;
1092 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1093
1094 /* Find the nearest multiple of 1/32 to the argument. */
1095 tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1096 n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1097 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1098
1099 if (floatx80_is_zero(y)) {
1100 /*
1101 * Use the value of 2^t - 1 from the table, to avoid
1102 * needing to special-case zero as a result of
1103 * multiplication below.
1104 */
1105 ST0 = f2xm1_table[n].t;
1106 set_float_exception_flags(float_flag_inexact, &env->fp_status);
1107 env->fp_status.float_rounding_mode = save_mode;
1108 } else {
1109 /*
1110 * Compute the lower parts of a polynomial expansion for
1111 * (2^y - 1) / y.
1112 */
1113 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1114 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1115 accum = floatx80_mul(accum, y, &env->fp_status);
1116 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1117 accum = floatx80_mul(accum, y, &env->fp_status);
1118 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1119 accum = floatx80_mul(accum, y, &env->fp_status);
1120 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1121 accum = floatx80_mul(accum, y, &env->fp_status);
1122 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1123 accum = floatx80_mul(accum, y, &env->fp_status);
1124 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1125 accum = floatx80_mul(accum, y, &env->fp_status);
1126 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1127
1128 /*
1129 * The full polynomial expansion is f2xm1_coeff_0 + accum
1130 * (where accum has much lower magnitude, and so, in
1131 * particular, carry out of the addition is not possible).
1132 * (This expansion is only accurate to about 70 bits, not
1133 * 128 bits.)
1134 */
1135 aexp = extractFloatx80Exp(f2xm1_coeff_0);
1136 asign = extractFloatx80Sign(f2xm1_coeff_0);
1137 shift128RightJamming(extractFloatx80Frac(accum), 0,
1138 aexp - extractFloatx80Exp(accum),
1139 &asig0, &asig1);
1140 bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1141 bsig1 = 0;
1142 if (asign == extractFloatx80Sign(accum)) {
1143 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1144 } else {
1145 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1146 }
1147 /* And thus compute an approximation to 2^y - 1. */
1148 mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1149 &asig0, &asig1, &asig2);
1150 aexp += extractFloatx80Exp(y) - 0x3ffe;
1151 asign ^= extractFloatx80Sign(y);
1152 if (n != 32) {
1153 /*
1154 * Multiply this by the precomputed value of 2^t and
1155 * add that of 2^t - 1.
1156 */
1157 mul128By64To192(asig0, asig1,
1158 extractFloatx80Frac(f2xm1_table[n].exp2),
1159 &asig0, &asig1, &asig2);
1160 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1161 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1162 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1163 bsig1 = 0;
1164 if (bexp < aexp) {
1165 shift128RightJamming(bsig0, bsig1, aexp - bexp,
1166 &bsig0, &bsig1);
1167 } else if (aexp < bexp) {
1168 shift128RightJamming(asig0, asig1, bexp - aexp,
1169 &asig0, &asig1);
1170 aexp = bexp;
1171 }
1172 /* The sign of 2^t - 1 is always that of the result. */
1173 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1174 if (asign == bsign) {
1175 /* Avoid possible carry out of the addition. */
1176 shift128RightJamming(asig0, asig1, 1,
1177 &asig0, &asig1);
1178 shift128RightJamming(bsig0, bsig1, 1,
1179 &bsig0, &bsig1);
1180 ++aexp;
1181 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1182 } else {
1183 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1184 asign = bsign;
1185 }
1186 }
1187 env->fp_status.float_rounding_mode = save_mode;
1188 /* This result is inexact. */
1189 asig1 |= 1;
1190 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1191 asign, aexp, asig0, asig1,
1192 &env->fp_status);
1193 }
1194
1195 env->fp_status.floatx80_rounding_precision = save_prec;
1196 }
1197 merge_exception_flags(env, old_flags);
1198 }
1199
1200 void helper_fptan(CPUX86State *env)
1201 {
1202 double fptemp = floatx80_to_double(env, ST0);
1203
1204 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1205 env->fpus |= 0x400;
1206 } else {
1207 fptemp = tan(fptemp);
1208 ST0 = double_to_floatx80(env, fptemp);
1209 fpush(env);
1210 ST0 = floatx80_one;
1211 env->fpus &= ~0x400; /* C2 <-- 0 */
1212 /* the above code is for |arg| < 2**52 only */
1213 }
1214 }
1215
1216 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */
1217 #define pi_4_exp 0x3ffe
1218 #define pi_4_sig_high 0xc90fdaa22168c234ULL
1219 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1220 #define pi_2_exp 0x3fff
1221 #define pi_2_sig_high 0xc90fdaa22168c234ULL
1222 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1223 #define pi_34_exp 0x4000
1224 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1225 #define pi_34_sig_low 0x9394c9e8a0a5159dULL
1226 #define pi_exp 0x4000
1227 #define pi_sig_high 0xc90fdaa22168c234ULL
1228 #define pi_sig_low 0xc4c6628b80dc1cd1ULL
1229
1230 /*
1231 * Polynomial coefficients for an approximation to atan(x), with only
1232 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike
1233 * for some other approximations, no low part is needed for the first
1234 * coefficient here to achieve a sufficiently accurate result, because
1235 * the coefficient in this minimax approximation is very close to
1236 * exactly 1.)
1237 */
1238 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1239 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1240 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1241 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1242 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1243 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1244 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1245
1246 struct fpatan_data {
1247 /* High and low parts of atan(x). */
1248 floatx80 atan_high, atan_low;
1249 };
1250
1251 static const struct fpatan_data fpatan_table[9] = {
1252 { floatx80_zero_init,
1253 floatx80_zero_init },
1254 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
1255 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
1256 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
1257 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
1258 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
1259 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
1260 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
1261 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
1262 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
1263 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
1264 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
1265 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
1266 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
1267 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
1268 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
1269 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
1270 };
1271
1272 void helper_fpatan(CPUX86State *env)
1273 {
1274 uint8_t old_flags = save_exception_flags(env);
1275 uint64_t arg0_sig = extractFloatx80Frac(ST0);
1276 int32_t arg0_exp = extractFloatx80Exp(ST0);
1277 bool arg0_sign = extractFloatx80Sign(ST0);
1278 uint64_t arg1_sig = extractFloatx80Frac(ST1);
1279 int32_t arg1_exp = extractFloatx80Exp(ST1);
1280 bool arg1_sign = extractFloatx80Sign(ST1);
1281
1282 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1283 float_raise(float_flag_invalid, &env->fp_status);
1284 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1285 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1286 float_raise(float_flag_invalid, &env->fp_status);
1287 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1288 } else if (floatx80_invalid_encoding(ST0) ||
1289 floatx80_invalid_encoding(ST1)) {
1290 float_raise(float_flag_invalid, &env->fp_status);
1291 ST1 = floatx80_default_nan(&env->fp_status);
1292 } else if (floatx80_is_any_nan(ST0)) {
1293 ST1 = ST0;
1294 } else if (floatx80_is_any_nan(ST1)) {
1295 /* Pass this NaN through. */
1296 } else if (floatx80_is_zero(ST1) && !arg0_sign) {
1297 /* Pass this zero through. */
1298 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
1299 arg0_exp - arg1_exp >= 80) &&
1300 !arg0_sign) {
1301 /*
1302 * Dividing ST1 by ST0 gives the correct result up to
1303 * rounding, and avoids spurious underflow exceptions that
1304 * might result from passing some small values through the
1305 * polynomial approximation, but if a finite nonzero result of
1306 * division is exact, the result of fpatan is still inexact
1307 * (and underflowing where appropriate).
1308 */
1309 FloatX80RoundPrec save_prec =
1310 env->fp_status.floatx80_rounding_precision;
1311 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1312 ST1 = floatx80_div(ST1, ST0, &env->fp_status);
1313 env->fp_status.floatx80_rounding_precision = save_prec;
1314 if (!floatx80_is_zero(ST1) &&
1315 !(get_float_exception_flags(&env->fp_status) &
1316 float_flag_inexact)) {
1317 /*
1318 * The mathematical result is very slightly closer to zero
1319 * than this exact result. Round a value with the
1320 * significand adjusted accordingly to get the correct
1321 * exceptions, and possibly an adjusted result depending
1322 * on the rounding mode.
1323 */
1324 uint64_t sig = extractFloatx80Frac(ST1);
1325 int32_t exp = extractFloatx80Exp(ST1);
1326 bool sign = extractFloatx80Sign(ST1);
1327 if (exp == 0) {
1328 normalizeFloatx80Subnormal(sig, &exp, &sig);
1329 }
1330 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1331 sign, exp, sig - 1,
1332 -1, &env->fp_status);
1333 }
1334 } else {
1335 /* The result is inexact. */
1336 bool rsign = arg1_sign;
1337 int32_t rexp;
1338 uint64_t rsig0, rsig1;
1339 if (floatx80_is_zero(ST1)) {
1340 /*
1341 * ST0 is negative. The result is pi with the sign of
1342 * ST1.
1343 */
1344 rexp = pi_exp;
1345 rsig0 = pi_sig_high;
1346 rsig1 = pi_sig_low;
1347 } else if (floatx80_is_infinity(ST1)) {
1348 if (floatx80_is_infinity(ST0)) {
1349 if (arg0_sign) {
1350 rexp = pi_34_exp;
1351 rsig0 = pi_34_sig_high;
1352 rsig1 = pi_34_sig_low;
1353 } else {
1354 rexp = pi_4_exp;
1355 rsig0 = pi_4_sig_high;
1356 rsig1 = pi_4_sig_low;
1357 }
1358 } else {
1359 rexp = pi_2_exp;
1360 rsig0 = pi_2_sig_high;
1361 rsig1 = pi_2_sig_low;
1362 }
1363 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
1364 rexp = pi_2_exp;
1365 rsig0 = pi_2_sig_high;
1366 rsig1 = pi_2_sig_low;
1367 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
1368 /* ST0 is negative. */
1369 rexp = pi_exp;
1370 rsig0 = pi_sig_high;
1371 rsig1 = pi_sig_low;
1372 } else {
1373 /*
1374 * ST0 and ST1 are finite, nonzero and with exponents not
1375 * too far apart.
1376 */
1377 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
1378 int32_t azexp, axexp;
1379 bool adj_sub, ysign, zsign;
1380 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
1381 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
1382 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
1383 uint64_t azsig0, azsig1;
1384 uint64_t azsig2, azsig3, axsig0, axsig1;
1385 floatx80 x8;
1386 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1387 FloatX80RoundPrec save_prec =
1388 env->fp_status.floatx80_rounding_precision;
1389 env->fp_status.float_rounding_mode = float_round_nearest_even;
1390 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1391
1392 if (arg0_exp == 0) {
1393 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1394 }
1395 if (arg1_exp == 0) {
1396 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1397 }
1398 if (arg0_exp > arg1_exp ||
1399 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
1400 /* Work with abs(ST1) / abs(ST0). */
1401 num_exp = arg1_exp;
1402 num_sig = arg1_sig;
1403 den_exp = arg0_exp;
1404 den_sig = arg0_sig;
1405 if (arg0_sign) {
1406 /* The result is subtracted from pi. */
1407 adj_exp = pi_exp;
1408 adj_sig0 = pi_sig_high;
1409 adj_sig1 = pi_sig_low;
1410 adj_sub = true;
1411 } else {
1412 /* The result is used as-is. */
1413 adj_exp = 0;
1414 adj_sig0 = 0;
1415 adj_sig1 = 0;
1416 adj_sub = false;
1417 }
1418 } else {
1419 /* Work with abs(ST0) / abs(ST1). */
1420 num_exp = arg0_exp;
1421 num_sig = arg0_sig;
1422 den_exp = arg1_exp;
1423 den_sig = arg1_sig;
1424 /* The result is added to or subtracted from pi/2. */
1425 adj_exp = pi_2_exp;
1426 adj_sig0 = pi_2_sig_high;
1427 adj_sig1 = pi_2_sig_low;
1428 adj_sub = !arg0_sign;
1429 }
1430
1431 /*
1432 * Compute x = num/den, where 0 < x <= 1 and x is not too
1433 * small.
1434 */
1435 xexp = num_exp - den_exp + 0x3ffe;
1436 remsig0 = num_sig;
1437 remsig1 = 0;
1438 if (den_sig <= remsig0) {
1439 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1440 ++xexp;
1441 }
1442 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
1443 mul64To128(den_sig, xsig0, &msig0, &msig1);
1444 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
1445 while ((int64_t) remsig0 < 0) {
1446 --xsig0;
1447 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
1448 }
1449 xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
1450 /*
1451 * No need to correct any estimation error in xsig1; even
1452 * with such error, it is accurate enough.
1453 */
1454
1455 /*
1456 * Split x as x = t + y, where t = n/8 is the nearest
1457 * multiple of 1/8 to x.
1458 */
1459 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1460 false, xexp + 3, xsig0,
1461 xsig1, &env->fp_status);
1462 n = floatx80_to_int32(x8, &env->fp_status);
1463 if (n == 0) {
1464 ysign = false;
1465 yexp = xexp;
1466 ysig0 = xsig0;
1467 ysig1 = xsig1;
1468 texp = 0;
1469 tsig = 0;
1470 } else {
1471 int shift = clz32(n) + 32;
1472 texp = 0x403b - shift;
1473 tsig = n;
1474 tsig <<= shift;
1475 if (texp == xexp) {
1476 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
1477 if ((int64_t) ysig0 >= 0) {
1478 ysign = false;
1479 if (ysig0 == 0) {
1480 if (ysig1 == 0) {
1481 yexp = 0;
1482 } else {
1483 shift = clz64(ysig1) + 64;
1484 yexp = xexp - shift;
1485 shift128Left(ysig0, ysig1, shift,
1486 &ysig0, &ysig1);
1487 }
1488 } else {
1489 shift = clz64(ysig0);
1490 yexp = xexp - shift;
1491 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1492 }
1493 } else {
1494 ysign = true;
1495 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
1496 if (ysig0 == 0) {
1497 shift = clz64(ysig1) + 64;
1498 } else {
1499 shift = clz64(ysig0);
1500 }
1501 yexp = xexp - shift;
1502 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1503 }
1504 } else {
1505 /*
1506 * t's exponent must be greater than x's because t
1507 * is positive and the nearest multiple of 1/8 to
1508 * x, and if x has a greater exponent, the power
1509 * of 2 with that exponent is also a multiple of
1510 * 1/8.
1511 */
1512 uint64_t usig0, usig1;
1513 shift128RightJamming(xsig0, xsig1, texp - xexp,
1514 &usig0, &usig1);
1515 ysign = true;
1516 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
1517 if (ysig0 == 0) {
1518 shift = clz64(ysig1) + 64;
1519 } else {
1520 shift = clz64(ysig0);
1521 }
1522 yexp = texp - shift;
1523 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1524 }
1525 }
1526
1527 /*
1528 * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1529 * arctan(z).
1530 */
1531 zsign = ysign;
1532 if (texp == 0 || yexp == 0) {
1533 zexp = yexp;
1534 zsig0 = ysig0;
1535 zsig1 = ysig1;
1536 } else {
1537 /*
1538 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1539 */
1540 int32_t dexp = texp + xexp - 0x3ffe;
1541 uint64_t dsig0, dsig1, dsig2;
1542 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
1543 /*
1544 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1545 * bit). Add 1 to produce the denominator 1+tx.
1546 */
1547 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
1548 &dsig0, &dsig1);
1549 dsig0 |= 0x8000000000000000ULL;
1550 zexp = yexp - 1;
1551 remsig0 = ysig0;
1552 remsig1 = ysig1;
1553 remsig2 = 0;
1554 if (dsig0 <= remsig0) {
1555 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1556 ++zexp;
1557 }
1558 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
1559 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
1560 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
1561 &remsig0, &remsig1, &remsig2);
1562 while ((int64_t) remsig0 < 0) {
1563 --zsig0;
1564 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
1565 &remsig0, &remsig1, &remsig2);
1566 }
1567 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
1568 /* No need to correct any estimation error in zsig1. */
1569 }
1570
1571 if (zexp == 0) {
1572 azexp = 0;
1573 azsig0 = 0;
1574 azsig1 = 0;
1575 } else {
1576 floatx80 z2, accum;
1577 uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
1578 /* Compute z^2. */
1579 mul128To256(zsig0, zsig1, zsig0, zsig1,
1580 &z2sig0, &z2sig1, &z2sig2, &z2sig3);
1581 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1582 zexp + zexp - 0x3ffe,
1583 z2sig0, z2sig1,
1584 &env->fp_status);
1585
1586 /* Compute the lower parts of the polynomial expansion. */
1587 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
1588 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
1589 accum = floatx80_mul(accum, z2, &env->fp_status);
1590 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
1591 accum = floatx80_mul(accum, z2, &env->fp_status);
1592 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
1593 accum = floatx80_mul(accum, z2, &env->fp_status);
1594 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
1595 accum = floatx80_mul(accum, z2, &env->fp_status);
1596 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
1597 accum = floatx80_mul(accum, z2, &env->fp_status);
1598
1599 /*
1600 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1601 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1602 */
1603 aexp = extractFloatx80Exp(fpatan_coeff_0);
1604 shift128RightJamming(extractFloatx80Frac(accum), 0,
1605 aexp - extractFloatx80Exp(accum),
1606 &asig0, &asig1);
1607 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
1608 &asig0, &asig1);
1609 /* Multiply by z to compute arctan(z). */
1610 azexp = aexp + zexp - 0x3ffe;
1611 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
1612 &azsig2, &azsig3);
1613 }
1614
1615 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */
1616 if (texp == 0) {
1617 /* z is positive. */
1618 axexp = azexp;
1619 axsig0 = azsig0;
1620 axsig1 = azsig1;
1621 } else {
1622 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
1623 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
1624 uint64_t low_sig0 =
1625 extractFloatx80Frac(fpatan_table[n].atan_low);
1626 uint64_t low_sig1 = 0;
1627 axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
1628 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
1629 axsig1 = 0;
1630 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
1631 &low_sig0, &low_sig1);
1632 if (low_sign) {
1633 sub128(axsig0, axsig1, low_sig0, low_sig1,
1634 &axsig0, &axsig1);
1635 } else {
1636 add128(axsig0, axsig1, low_sig0, low_sig1,
1637 &axsig0, &axsig1);
1638 }
1639 if (azexp >= axexp) {
1640 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
1641 &axsig0, &axsig1);
1642 axexp = azexp + 1;
1643 shift128RightJamming(azsig0, azsig1, 1,
1644 &azsig0, &azsig1);
1645 } else {
1646 shift128RightJamming(axsig0, axsig1, 1,
1647 &axsig0, &axsig1);
1648 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
1649 &azsig0, &azsig1);
1650 ++axexp;
1651 }
1652 if (zsign) {
1653 sub128(axsig0, axsig1, azsig0, azsig1,
1654 &axsig0, &axsig1);
1655 } else {
1656 add128(axsig0, axsig1, azsig0, azsig1,
1657 &axsig0, &axsig1);
1658 }
1659 }
1660
1661 if (adj_exp == 0) {
1662 rexp = axexp;
1663 rsig0 = axsig0;
1664 rsig1 = axsig1;
1665 } else {
1666 /*
1667 * Add or subtract arctan(x) (exponent axexp,
1668 * significand axsig0 and axsig1, positive, not
1669 * necessarily normalized) to the number given by
1670 * adj_exp, adj_sig0 and adj_sig1, according to
1671 * adj_sub.
1672 */
1673 if (adj_exp >= axexp) {
1674 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
1675 &axsig0, &axsig1);
1676 rexp = adj_exp + 1;
1677 shift128RightJamming(adj_sig0, adj_sig1, 1,
1678 &adj_sig0, &adj_sig1);
1679 } else {
1680 shift128RightJamming(axsig0, axsig1, 1,
1681 &axsig0, &axsig1);
1682 shift128RightJamming(adj_sig0, adj_sig1,
1683 axexp - adj_exp + 1,
1684 &adj_sig0, &adj_sig1);
1685 rexp = axexp + 1;
1686 }
1687 if (adj_sub) {
1688 sub128(adj_sig0, adj_sig1, axsig0, axsig1,
1689 &rsig0, &rsig1);
1690 } else {
1691 add128(adj_sig0, adj_sig1, axsig0, axsig1,
1692 &rsig0, &rsig1);
1693 }
1694 }
1695
1696 env->fp_status.float_rounding_mode = save_mode;
1697 env->fp_status.floatx80_rounding_precision = save_prec;
1698 }
1699 /* This result is inexact. */
1700 rsig1 |= 1;
1701 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
1702 rsig0, rsig1, &env->fp_status);
1703 }
1704
1705 fpop(env);
1706 merge_exception_flags(env, old_flags);
1707 }
1708
1709 void helper_fxtract(CPUX86State *env)
1710 {
1711 uint8_t old_flags = save_exception_flags(env);
1712 CPU_LDoubleU temp;
1713
1714 temp.d = ST0;
1715
1716 if (floatx80_is_zero(ST0)) {
1717 /* Easy way to generate -inf and raising division by 0 exception */
1718 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1719 &env->fp_status);
1720 fpush(env);
1721 ST0 = temp.d;
1722 } else if (floatx80_invalid_encoding(ST0)) {
1723 float_raise(float_flag_invalid, &env->fp_status);
1724 ST0 = floatx80_default_nan(&env->fp_status);
1725 fpush(env);
1726 ST0 = ST1;
1727 } else if (floatx80_is_any_nan(ST0)) {
1728 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1729 float_raise(float_flag_invalid, &env->fp_status);
1730 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1731 }
1732 fpush(env);
1733 ST0 = ST1;
1734 } else if (floatx80_is_infinity(ST0)) {
1735 fpush(env);
1736 ST0 = ST1;
1737 ST1 = floatx80_infinity;
1738 } else {
1739 int expdif;
1740
1741 if (EXPD(temp) == 0) {
1742 int shift = clz64(temp.l.lower);
1743 temp.l.lower <<= shift;
1744 expdif = 1 - EXPBIAS - shift;
1745 float_raise(float_flag_input_denormal, &env->fp_status);
1746 } else {
1747 expdif = EXPD(temp) - EXPBIAS;
1748 }
1749 /* DP exponent bias */
1750 ST0 = int32_to_floatx80(expdif, &env->fp_status);
1751 fpush(env);
1752 BIASEXPONENT(temp);
1753 ST0 = temp.d;
1754 }
1755 merge_exception_flags(env, old_flags);
1756 }
1757
1758 static void helper_fprem_common(CPUX86State *env, bool mod)
1759 {
1760 uint8_t old_flags = save_exception_flags(env);
1761 uint64_t quotient;
1762 CPU_LDoubleU temp0, temp1;
1763 int exp0, exp1, expdiff;
1764
1765 temp0.d = ST0;
1766 temp1.d = ST1;
1767 exp0 = EXPD(temp0);
1768 exp1 = EXPD(temp1);
1769
1770 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1771 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1772 exp0 == 0x7fff || exp1 == 0x7fff ||
1773 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
1774 ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1775 } else {
1776 if (exp0 == 0) {
1777 exp0 = 1 - clz64(temp0.l.lower);
1778 }
1779 if (exp1 == 0) {
1780 exp1 = 1 - clz64(temp1.l.lower);
1781 }
1782 expdiff = exp0 - exp1;
1783 if (expdiff < 64) {
1784 ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1785 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */
1786 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1787 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */
1788 } else {
1789 /*
1790 * Partial remainder. This choice of how many bits to
1791 * process at once is specified in AMD instruction set
1792 * manuals, and empirically is followed by Intel
1793 * processors as well; it ensures that the final remainder
1794 * operation in a loop does produce the correct low three
1795 * bits of the quotient. AMD manuals specify that the
1796 * flags other than C2 are cleared, and empirically Intel
1797 * processors clear them as well.
1798 */
1799 int n = 32 + (expdiff % 32);
1800 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1801 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1802 env->fpus |= 0x400; /* C2 <-- 1 */
1803 }
1804 }
1805 merge_exception_flags(env, old_flags);
1806 }
1807
1808 void helper_fprem1(CPUX86State *env)
1809 {
1810 helper_fprem_common(env, false);
1811 }
1812
1813 void helper_fprem(CPUX86State *env)
1814 {
1815 helper_fprem_common(env, true);
1816 }
1817
1818 /* 128-bit significand of log2(e). */
1819 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1820 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1821
1822 /*
1823 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1824 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1825 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1826 * interval [sqrt(2)/2, sqrt(2)].
1827 */
1828 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1829 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1830 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1831 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1832 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1833 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1834 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1835 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1836 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1837 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1838 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1839
1840 /*
1841 * Compute an approximation of log2(1+arg), where 1+arg is in the
1842 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
1843 * function is called, rounding precision is set to 80 and the
1844 * round-to-nearest mode is in effect. arg must not be exactly zero,
1845 * and must not be so close to zero that underflow might occur.
1846 */
1847 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1848 uint64_t *sig0, uint64_t *sig1)
1849 {
1850 uint64_t arg0_sig = extractFloatx80Frac(arg);
1851 int32_t arg0_exp = extractFloatx80Exp(arg);
1852 bool arg0_sign = extractFloatx80Sign(arg);
1853 bool asign;
1854 int32_t dexp, texp, aexp;
1855 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1856 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1857 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1858 floatx80 t2, accum;
1859
1860 /*
1861 * Compute an approximation of arg/(2+arg), with extra precision,
1862 * as the argument to a polynomial approximation. The extra
1863 * precision is only needed for the first term of the
1864 * approximation, with subsequent terms being significantly
1865 * smaller; the approximation only uses odd exponents, and the
1866 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1867 */
1868 if (arg0_sign) {
1869 dexp = 0x3fff;
1870 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1871 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1872 } else {
1873 dexp = 0x4000;
1874 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1875 dsig0 |= 0x8000000000000000ULL;
1876 }
1877 texp = arg0_exp - dexp + 0x3ffe;
1878 rsig0 = arg0_sig;
1879 rsig1 = 0;
1880 rsig2 = 0;
1881 if (dsig0 <= rsig0) {
1882 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1883 ++texp;
1884 }
1885 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1886 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1887 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1888 &rsig0, &rsig1, &rsig2);
1889 while ((int64_t) rsig0 < 0) {
1890 --tsig0;
1891 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1892 &rsig0, &rsig1, &rsig2);
1893 }
1894 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1895 /*
1896 * No need to correct any estimation error in tsig1; even with
1897 * such error, it is accurate enough. Now compute the square of
1898 * that approximation.
1899 */
1900 mul128To256(tsig0, tsig1, tsig0, tsig1,
1901 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
1902 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1903 texp + texp - 0x3ffe,
1904 t2sig0, t2sig1, &env->fp_status);
1905
1906 /* Compute the lower parts of the polynomial expansion. */
1907 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
1908 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
1909 accum = floatx80_mul(accum, t2, &env->fp_status);
1910 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
1911 accum = floatx80_mul(accum, t2, &env->fp_status);
1912 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
1913 accum = floatx80_mul(accum, t2, &env->fp_status);
1914 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
1915 accum = floatx80_mul(accum, t2, &env->fp_status);
1916 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
1917 accum = floatx80_mul(accum, t2, &env->fp_status);
1918 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
1919 accum = floatx80_mul(accum, t2, &env->fp_status);
1920 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
1921 accum = floatx80_mul(accum, t2, &env->fp_status);
1922 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
1923 accum = floatx80_mul(accum, t2, &env->fp_status);
1924 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
1925
1926 /*
1927 * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1928 * accum has much lower magnitude, and so, in particular, carry
1929 * out of the addition is not possible), multiplied by t. (This
1930 * expansion is only accurate to about 70 bits, not 128 bits.)
1931 */
1932 aexp = extractFloatx80Exp(fyl2x_coeff_0);
1933 asign = extractFloatx80Sign(fyl2x_coeff_0);
1934 shift128RightJamming(extractFloatx80Frac(accum), 0,
1935 aexp - extractFloatx80Exp(accum),
1936 &asig0, &asig1);
1937 bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
1938 bsig1 = 0;
1939 if (asign == extractFloatx80Sign(accum)) {
1940 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1941 } else {
1942 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1943 }
1944 /* Multiply by t to compute the required result. */
1945 mul128To256(asig0, asig1, tsig0, tsig1,
1946 &asig0, &asig1, &asig2, &asig3);
1947 aexp += texp - 0x3ffe;
1948 *exp = aexp;
1949 *sig0 = asig0;
1950 *sig1 = asig1;
1951 }
1952
1953 void helper_fyl2xp1(CPUX86State *env)
1954 {
1955 uint8_t old_flags = save_exception_flags(env);
1956 uint64_t arg0_sig = extractFloatx80Frac(ST0);
1957 int32_t arg0_exp = extractFloatx80Exp(ST0);
1958 bool arg0_sign = extractFloatx80Sign(ST0);
1959 uint64_t arg1_sig = extractFloatx80Frac(ST1);
1960 int32_t arg1_exp = extractFloatx80Exp(ST1);
1961 bool arg1_sign = extractFloatx80Sign(ST1);
1962
1963 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1964 float_raise(float_flag_invalid, &env->fp_status);
1965 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1966 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1967 float_raise(float_flag_invalid, &env->fp_status);
1968 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1969 } else if (floatx80_invalid_encoding(ST0) ||
1970 floatx80_invalid_encoding(ST1)) {
1971 float_raise(float_flag_invalid, &env->fp_status);
1972 ST1 = floatx80_default_nan(&env->fp_status);
1973 } else if (floatx80_is_any_nan(ST0)) {
1974 ST1 = ST0;
1975 } else if (floatx80_is_any_nan(ST1)) {
1976 /* Pass this NaN through. */
1977 } else if (arg0_exp > 0x3ffd ||
1978 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
1979 0x95f619980c4336f7ULL :
1980 0xd413cccfe7799211ULL))) {
1981 /*
1982 * Out of range for the instruction (ST0 must have absolute
1983 * value less than 1 - sqrt(2)/2 = 0.292..., according to
1984 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
1985 * to sqrt(2) - 1, which we allow here), treat as invalid.
1986 */
1987 float_raise(float_flag_invalid, &env->fp_status);
1988 ST1 = floatx80_default_nan(&env->fp_status);
1989 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1990 arg1_exp == 0x7fff) {
1991 /*
1992 * One argument is zero, or multiplying by infinity; correct
1993 * result is exact and can be obtained by multiplying the
1994 * arguments.
1995 */
1996 ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
1997 } else if (arg0_exp < 0x3fb0) {
1998 /*
1999 * Multiplying both arguments and an extra-precision version
2000 * of log2(e) is sufficiently precise.
2001 */
2002 uint64_t sig0, sig1, sig2;
2003 int32_t exp;
2004 if (arg0_exp == 0) {
2005 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2006 }
2007 if (arg1_exp == 0) {
2008 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2009 }
2010 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
2011 &sig0, &sig1, &sig2);
2012 exp = arg0_exp + 1;
2013 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
2014 exp += arg1_exp - 0x3ffe;
2015 /* This result is inexact. */
2016 sig1 |= 1;
2017 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2018 arg0_sign ^ arg1_sign, exp,
2019 sig0, sig1, &env->fp_status);
2020 } else {
2021 int32_t aexp;
2022 uint64_t asig0, asig1, asig2;
2023 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2024 FloatX80RoundPrec save_prec =
2025 env->fp_status.floatx80_rounding_precision;
2026 env->fp_status.float_rounding_mode = float_round_nearest_even;
2027 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2028
2029 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
2030 /*
2031 * Multiply by the second argument to compute the required
2032 * result.
2033 */
2034 if (arg1_exp == 0) {
2035 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2036 }
2037 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2038 aexp += arg1_exp - 0x3ffe;
2039 /* This result is inexact. */
2040 asig1 |= 1;
2041 env->fp_status.float_rounding_mode = save_mode;
2042 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2043 arg0_sign ^ arg1_sign, aexp,
2044 asig0, asig1, &env->fp_status);
2045 env->fp_status.floatx80_rounding_precision = save_prec;
2046 }
2047 fpop(env);
2048 merge_exception_flags(env, old_flags);
2049 }
2050
2051 void helper_fyl2x(CPUX86State *env)
2052 {
2053 uint8_t old_flags = save_exception_flags(env);
2054 uint64_t arg0_sig = extractFloatx80Frac(ST0);
2055 int32_t arg0_exp = extractFloatx80Exp(ST0);
2056 bool arg0_sign = extractFloatx80Sign(ST0);
2057 uint64_t arg1_sig = extractFloatx80Frac(ST1);
2058 int32_t arg1_exp = extractFloatx80Exp(ST1);
2059 bool arg1_sign = extractFloatx80Sign(ST1);
2060
2061 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2062 float_raise(float_flag_invalid, &env->fp_status);
2063 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2064 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2065 float_raise(float_flag_invalid, &env->fp_status);
2066 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2067 } else if (floatx80_invalid_encoding(ST0) ||
2068 floatx80_invalid_encoding(ST1)) {
2069 float_raise(float_flag_invalid, &env->fp_status);
2070 ST1 = floatx80_default_nan(&env->fp_status);
2071 } else if (floatx80_is_any_nan(ST0)) {
2072 ST1 = ST0;
2073 } else if (floatx80_is_any_nan(ST1)) {
2074 /* Pass this NaN through. */
2075 } else if (arg0_sign && !floatx80_is_zero(ST0)) {
2076 float_raise(float_flag_invalid, &env->fp_status);
2077 ST1 = floatx80_default_nan(&env->fp_status);
2078 } else if (floatx80_is_infinity(ST1)) {
2079 FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
2080 &env->fp_status);
2081 switch (cmp) {
2082 case float_relation_less:
2083 ST1 = floatx80_chs(ST1);
2084 break;
2085 case float_relation_greater:
2086 /* Result is infinity of the same sign as ST1. */
2087 break;
2088 default:
2089 float_raise(float_flag_invalid, &env->fp_status);
2090 ST1 = floatx80_default_nan(&env->fp_status);
2091 break;
2092 }
2093 } else if (floatx80_is_infinity(ST0)) {
2094 if (floatx80_is_zero(ST1)) {
2095 float_raise(float_flag_invalid, &env->fp_status);
2096 ST1 = floatx80_default_nan(&env->fp_status);
2097 } else if (arg1_sign) {
2098 ST1 = floatx80_chs(ST0);
2099 } else {
2100 ST1 = ST0;
2101 }
2102 } else if (floatx80_is_zero(ST0)) {
2103 if (floatx80_is_zero(ST1)) {
2104 float_raise(float_flag_invalid, &env->fp_status);
2105 ST1 = floatx80_default_nan(&env->fp_status);
2106 } else {
2107 /* Result is infinity with opposite sign to ST1. */
2108 float_raise(float_flag_divbyzero, &env->fp_status);
2109 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
2110 0x8000000000000000ULL);
2111 }
2112 } else if (floatx80_is_zero(ST1)) {
2113 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
2114 ST1 = floatx80_chs(ST1);
2115 }
2116 /* Otherwise, ST1 is already the correct result. */
2117 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
2118 if (arg1_sign) {
2119 ST1 = floatx80_chs(floatx80_zero);
2120 } else {
2121 ST1 = floatx80_zero;
2122 }
2123 } else {
2124 int32_t int_exp;
2125 floatx80 arg0_m1;
2126 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2127 FloatX80RoundPrec save_prec =
2128 env->fp_status.floatx80_rounding_precision;
2129 env->fp_status.float_rounding_mode = float_round_nearest_even;
2130 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2131
2132 if (arg0_exp == 0) {
2133 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2134 }
2135 if (arg1_exp == 0) {
2136 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2137 }
2138 int_exp = arg0_exp - 0x3fff;
2139 if (arg0_sig > 0xb504f333f9de6484ULL) {
2140 ++int_exp;
2141 }
2142 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
2143 &env->fp_status),
2144 floatx80_one, &env->fp_status);
2145 if (floatx80_is_zero(arg0_m1)) {
2146 /* Exact power of 2; multiply by ST1. */
2147 env->fp_status.float_rounding_mode = save_mode;
2148 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
2149 ST1, &env->fp_status);
2150 } else {
2151 bool asign = extractFloatx80Sign(arg0_m1);
2152 int32_t aexp;
2153 uint64_t asig0, asig1, asig2;
2154 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
2155 if (int_exp != 0) {
2156 bool isign = (int_exp < 0);
2157 int32_t iexp;
2158 uint64_t isig;
2159 int shift;
2160 int_exp = isign ? -int_exp : int_exp;
2161 shift = clz32(int_exp) + 32;
2162 isig = int_exp;
2163 isig <<= shift;
2164 iexp = 0x403e - shift;
2165 shift128RightJamming(asig0, asig1, iexp - aexp,
2166 &asig0, &asig1);
2167 if (asign == isign) {
2168 add128(isig, 0, asig0, asig1, &asig0, &asig1);
2169 } else {
2170 sub128(isig, 0, asig0, asig1, &asig0, &asig1);
2171 }
2172 aexp = iexp;
2173 asign = isign;
2174 }
2175 /*
2176 * Multiply by the second argument to compute the required
2177 * result.
2178 */
2179 if (arg1_exp == 0) {
2180 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2181 }
2182 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2183 aexp += arg1_exp - 0x3ffe;
2184 /* This result is inexact. */
2185 asig1 |= 1;
2186 env->fp_status.float_rounding_mode = save_mode;
2187 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2188 asign ^ arg1_sign, aexp,
2189 asig0, asig1, &env->fp_status);
2190 }
2191
2192 env->fp_status.floatx80_rounding_precision = save_prec;
2193 }
2194 fpop(env);
2195 merge_exception_flags(env, old_flags);
2196 }
2197
2198 void helper_fsqrt(CPUX86State *env)
2199 {
2200 uint8_t old_flags = save_exception_flags(env);
2201 if (floatx80_is_neg(ST0)) {
2202 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2203 env->fpus |= 0x400;
2204 }
2205 ST0 = floatx80_sqrt(ST0, &env->fp_status);
2206 merge_exception_flags(env, old_flags);
2207 }
2208
2209 void helper_fsincos(CPUX86State *env)
2210 {
2211 double fptemp = floatx80_to_double(env, ST0);
2212
2213 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2214 env->fpus |= 0x400;
2215 } else {
2216 ST0 = double_to_floatx80(env, sin(fptemp));
2217 fpush(env);
2218 ST0 = double_to_floatx80(env, cos(fptemp));
2219 env->fpus &= ~0x400; /* C2 <-- 0 */
2220 /* the above code is for |arg| < 2**63 only */
2221 }
2222 }
2223
2224 void helper_frndint(CPUX86State *env)
2225 {
2226 uint8_t old_flags = save_exception_flags(env);
2227 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
2228 merge_exception_flags(env, old_flags);
2229 }
2230
2231 void helper_fscale(CPUX86State *env)
2232 {
2233 uint8_t old_flags = save_exception_flags(env);
2234 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
2235 float_raise(float_flag_invalid, &env->fp_status);
2236 ST0 = floatx80_default_nan(&env->fp_status);
2237 } else if (floatx80_is_any_nan(ST1)) {
2238 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2239 float_raise(float_flag_invalid, &env->fp_status);
2240 }
2241 ST0 = ST1;
2242 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2243 float_raise(float_flag_invalid, &env->fp_status);
2244 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
2245 }
2246 } else if (floatx80_is_infinity(ST1) &&
2247 !floatx80_invalid_encoding(ST0) &&
2248 !floatx80_is_any_nan(ST0)) {
2249 if (floatx80_is_neg(ST1)) {
2250 if (floatx80_is_infinity(ST0)) {
2251 float_raise(float_flag_invalid, &env->fp_status);
2252 ST0 = floatx80_default_nan(&env->fp_status);
2253 } else {
2254 ST0 = (floatx80_is_neg(ST0) ?
2255 floatx80_chs(floatx80_zero) :
2256 floatx80_zero);
2257 }
2258 } else {
2259 if (floatx80_is_zero(ST0)) {
2260 float_raise(float_flag_invalid, &env->fp_status);
2261 ST0 = floatx80_default_nan(&env->fp_status);
2262 } else {
2263 ST0 = (floatx80_is_neg(ST0) ?
2264 floatx80_chs(floatx80_infinity) :
2265 floatx80_infinity);
2266 }
2267 }
2268 } else {
2269 int n;
2270 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
2271 uint8_t save_flags = get_float_exception_flags(&env->fp_status);
2272 set_float_exception_flags(0, &env->fp_status);
2273 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
2274 set_float_exception_flags(save_flags, &env->fp_status);
2275 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2276 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
2277 env->fp_status.floatx80_rounding_precision = save;
2278 }
2279 merge_exception_flags(env, old_flags);
2280 }
2281
2282 void helper_fsin(CPUX86State *env)
2283 {
2284 double fptemp = floatx80_to_double(env, ST0);
2285
2286 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2287 env->fpus |= 0x400;
2288 } else {
2289 ST0 = double_to_floatx80(env, sin(fptemp));
2290 env->fpus &= ~0x400; /* C2 <-- 0 */
2291 /* the above code is for |arg| < 2**53 only */
2292 }
2293 }
2294
2295 void helper_fcos(CPUX86State *env)
2296 {
2297 double fptemp = floatx80_to_double(env, ST0);
2298
2299 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2300 env->fpus |= 0x400;
2301 } else {
2302 ST0 = double_to_floatx80(env, cos(fptemp));
2303 env->fpus &= ~0x400; /* C2 <-- 0 */
2304 /* the above code is for |arg| < 2**63 only */
2305 }
2306 }
2307
2308 void helper_fxam_ST0(CPUX86State *env)
2309 {
2310 CPU_LDoubleU temp;
2311 int expdif;
2312
2313 temp.d = ST0;
2314
2315 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2316 if (SIGND(temp)) {
2317 env->fpus |= 0x200; /* C1 <-- 1 */
2318 }
2319
2320 if (env->fptags[env->fpstt]) {
2321 env->fpus |= 0x4100; /* Empty */
2322 return;
2323 }
2324
2325 expdif = EXPD(temp);
2326 if (expdif == MAXEXPD) {
2327 if (MANTD(temp) == 0x8000000000000000ULL) {
2328 env->fpus |= 0x500; /* Infinity */
2329 } else if (MANTD(temp) & 0x8000000000000000ULL) {
2330 env->fpus |= 0x100; /* NaN */
2331 }
2332 } else if (expdif == 0) {
2333 if (MANTD(temp) == 0) {
2334 env->fpus |= 0x4000; /* Zero */
2335 } else {
2336 env->fpus |= 0x4400; /* Denormal */
2337 }
2338 } else if (MANTD(temp) & 0x8000000000000000ULL) {
2339 env->fpus |= 0x400;
2340 }
2341 }
2342
2343 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
2344 uintptr_t retaddr)
2345 {
2346 int fpus, fptag, exp, i;
2347 uint64_t mant;
2348 CPU_LDoubleU tmp;
2349
2350 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2351 fptag = 0;
2352 for (i = 7; i >= 0; i--) {
2353 fptag <<= 2;
2354 if (env->fptags[i]) {
2355 fptag |= 3;
2356 } else {
2357 tmp.d = env->fpregs[i].d;
2358 exp = EXPD(tmp);
2359 mant = MANTD(tmp);
2360 if (exp == 0 && mant == 0) {
2361 /* zero */
2362 fptag |= 1;
2363 } else if (exp == 0 || exp == MAXEXPD
2364 || (mant & (1LL << 63)) == 0) {
2365 /* NaNs, infinity, denormal */
2366 fptag |= 2;
2367 }
2368 }
2369 }
2370 if (data32) {
2371 /* 32 bit */
2372 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
2373 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
2374 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
2375 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
2376 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
2377 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
2378 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
2379 } else {
2380 /* 16 bit */
2381 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
2382 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
2383 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
2384 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
2385 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
2386 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
2387 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
2388 }
2389 }
2390
2391 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
2392 {
2393 do_fstenv(env, ptr, data32, GETPC());
2394 }
2395
2396 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
2397 {
2398 env->fpstt = (fpus >> 11) & 7;
2399 env->fpus = fpus & ~0x3800 & ~FPUS_B;
2400 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
2401 #if !defined(CONFIG_USER_ONLY)
2402 if (!(env->fpus & FPUS_SE)) {
2403 /*
2404 * Here the processor deasserts FERR#; in response, the chipset deasserts
2405 * IGNNE#.
2406 */
2407 cpu_clear_ignne();
2408 }
2409 #endif
2410 }
2411
2412 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
2413 uintptr_t retaddr)
2414 {
2415 int i, fpus, fptag;
2416
2417 if (data32) {
2418 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2419 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2420 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
2421 } else {
2422 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2423 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
2424 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2425 }
2426 cpu_set_fpus(env, fpus);
2427 for (i = 0; i < 8; i++) {
2428 env->fptags[i] = ((fptag & 3) == 3);
2429 fptag >>= 2;
2430 }
2431 }
2432
2433 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
2434 {
2435 do_fldenv(env, ptr, data32, GETPC());
2436 }
2437
2438 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
2439 uintptr_t retaddr)
2440 {
2441 floatx80 tmp;
2442 int i;
2443
2444 do_fstenv(env, ptr, data32, retaddr);
2445
2446 ptr += (14 << data32);
2447 for (i = 0; i < 8; i++) {
2448 tmp = ST(i);
2449 do_fstt(env, tmp, ptr, retaddr);
2450 ptr += 10;
2451 }
2452
2453 /* fninit */
2454 env->fpus = 0;
2455 env->fpstt = 0;
2456 cpu_set_fpuc(env, 0x37f);
2457 env->fptags[0] = 1;
2458 env->fptags[1] = 1;
2459 env->fptags[2] = 1;
2460 env->fptags[3] = 1;
2461 env->fptags[4] = 1;
2462 env->fptags[5] = 1;
2463 env->fptags[6] = 1;
2464 env->fptags[7] = 1;
2465 }
2466
2467 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
2468 {
2469 do_fsave(env, ptr, data32, GETPC());
2470 }
2471
2472 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
2473 uintptr_t retaddr)
2474 {
2475 floatx80 tmp;
2476 int i;
2477
2478 do_fldenv(env, ptr, data32, retaddr);
2479 ptr += (14 << data32);
2480
2481 for (i = 0; i < 8; i++) {
2482 tmp = do_fldt(env, ptr, retaddr);
2483 ST(i) = tmp;
2484 ptr += 10;
2485 }
2486 }
2487
2488 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2489 {
2490 do_frstor(env, ptr, data32, GETPC());
2491 }
2492
2493 #if defined(CONFIG_USER_ONLY)
2494 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
2495 {
2496 do_fsave(env, ptr, data32, 0);
2497 }
2498
2499 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
2500 {
2501 do_frstor(env, ptr, data32, 0);
2502 }
2503 #endif
2504
2505 #define XO(X) offsetof(X86XSaveArea, X)
2506
2507 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2508 {
2509 int fpus, fptag, i;
2510 target_ulong addr;
2511
2512 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2513 fptag = 0;
2514 for (i = 0; i < 8; i++) {
2515 fptag |= (env->fptags[i] << i);
2516 }
2517
2518 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
2519 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
2520 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
2521
2522 /* In 32-bit mode this is eip, sel, dp, sel.
2523 In 64-bit mode this is rip, rdp.
2524 But in either case we don't write actual data, just zeros. */
2525 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
2526 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
2527
2528 addr = ptr + XO(legacy.fpregs);
2529 for (i = 0; i < 8; i++) {
2530 floatx80 tmp = ST(i);
2531 do_fstt(env, tmp, addr, ra);
2532 addr += 16;
2533 }
2534 }
2535
2536 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2537 {
2538 update_mxcsr_from_sse_status(env);
2539 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
2540 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
2541 }
2542
2543 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2544 {
2545 int i, nb_xmm_regs;
2546 target_ulong addr;
2547
2548 if (env->hflags & HF_CS64_MASK) {
2549 nb_xmm_regs = 16;
2550 } else {
2551 nb_xmm_regs = 8;
2552 }
2553
2554 addr = ptr + XO(legacy.xmm_regs);
2555 for (i = 0; i < nb_xmm_regs; i++) {
2556 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
2557 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
2558 addr += 16;
2559 }
2560 }
2561
2562 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2563 {
2564 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2565 int i;
2566
2567 for (i = 0; i < 4; i++, addr += 16) {
2568 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
2569 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
2570 }
2571 }
2572
2573 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2574 {
2575 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2576 env->bndcs_regs.cfgu, ra);
2577 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2578 env->bndcs_regs.sts, ra);
2579 }
2580
2581 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2582 {
2583 cpu_stq_data_ra(env, ptr, env->pkru, ra);
2584 }
2585
2586 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2587 {
2588 /* The operand must be 16 byte aligned */
2589 if (ptr & 0xf) {
2590 raise_exception_ra(env, EXCP0D_GPF, ra);
2591 }
2592
2593 do_xsave_fpu(env, ptr, ra);
2594
2595 if (env->cr[4] & CR4_OSFXSR_MASK) {
2596 do_xsave_mxcsr(env, ptr, ra);
2597 /* Fast FXSAVE leaves out the XMM registers */
2598 if (!(env->efer & MSR_EFER_FFXSR)
2599 || (env->hflags & HF_CPL_MASK)
2600 || !(env->hflags & HF_LMA_MASK)) {
2601 do_xsave_sse(env, ptr, ra);
2602 }
2603 }
2604 }
2605
2606 void helper_fxsave(CPUX86State *env, target_ulong ptr)
2607 {
2608 do_fxsave(env, ptr, GETPC());
2609 }
2610
2611 static uint64_t get_xinuse(CPUX86State *env)
2612 {
2613 uint64_t inuse = -1;
2614
2615 /* For the most part, we don't track XINUSE. We could calculate it
2616 here for all components, but it's probably less work to simply
2617 indicate in use. That said, the state of BNDREGS is important
2618 enough to track in HFLAGS, so we might as well use that here. */
2619 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2620 inuse &= ~XSTATE_BNDREGS_MASK;
2621 }
2622 return inuse;
2623 }
2624
2625 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2626 uint64_t inuse, uint64_t opt, uintptr_t ra)
2627 {
2628 uint64_t old_bv, new_bv;
2629
2630 /* The OS must have enabled XSAVE. */
2631 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2632 raise_exception_ra(env, EXCP06_ILLOP, ra);
2633 }
2634
2635 /* The operand must be 64 byte aligned. */
2636 if (ptr & 63) {
2637 raise_exception_ra(env, EXCP0D_GPF, ra);
2638 }
2639
2640 /* Never save anything not enabled by XCR0. */
2641 rfbm &= env->xcr0;
2642 opt &= rfbm;
2643
2644 if (opt & XSTATE_FP_MASK) {
2645 do_xsave_fpu(env, ptr, ra);
2646 }
2647 if (rfbm & XSTATE_SSE_MASK) {
2648 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2649 do_xsave_mxcsr(env, ptr, ra);
2650 }
2651 if (opt & XSTATE_SSE_MASK) {
2652 do_xsave_sse(env, ptr, ra);
2653 }
2654 if (opt & XSTATE_BNDREGS_MASK) {
2655 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
2656 }
2657 if (opt & XSTATE_BNDCSR_MASK) {
2658 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
2659 }
2660 if (opt & XSTATE_PKRU_MASK) {
2661 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
2662 }
2663
2664 /* Update the XSTATE_BV field. */
2665 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2666 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2667 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
2668 }
2669
2670 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2671 {
2672 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
2673 }
2674
2675 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2676 {
2677 uint64_t inuse = get_xinuse(env);
2678 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2679 }
2680
2681 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2682 {
2683 int i, fpuc, fpus, fptag;
2684 target_ulong addr;
2685
2686 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
2687 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
2688 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
2689 cpu_set_fpuc(env, fpuc);
2690 cpu_set_fpus(env, fpus);
2691 fptag ^= 0xff;
2692 for (i = 0; i < 8; i++) {
2693 env->fptags[i] = ((fptag >> i) & 1);
2694 }
2695
2696 addr = ptr + XO(legacy.fpregs);
2697 for (i = 0; i < 8; i++) {
2698 floatx80 tmp = do_fldt(env, addr, ra);
2699 ST(i) = tmp;
2700 addr += 16;
2701 }
2702 }
2703
2704 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2705 {
2706 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
2707 }
2708
2709 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2710 {
2711 int i, nb_xmm_regs;
2712 target_ulong addr;
2713
2714 if (env->hflags & HF_CS64_MASK) {
2715 nb_xmm_regs = 16;
2716 } else {
2717 nb_xmm_regs = 8;
2718 }
2719
2720 addr = ptr + XO(legacy.xmm_regs);
2721 for (i = 0; i < nb_xmm_regs; i++) {
2722 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
2723 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
2724 addr += 16;
2725 }
2726 }
2727
2728 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2729 {
2730 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2731 int i;
2732
2733 for (i = 0; i < 4; i++, addr += 16) {
2734 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
2735 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
2736 }
2737 }
2738
2739 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2740 {
2741 /* FIXME: Extend highest implemented bit of linear address. */
2742 env->bndcs_regs.cfgu
2743 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
2744 env->bndcs_regs.sts
2745 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
2746 }
2747
2748 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2749 {
2750 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
2751 }
2752
2753 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2754 {
2755 /* The operand must be 16 byte aligned */
2756 if (ptr & 0xf) {
2757 raise_exception_ra(env, EXCP0D_GPF, ra);
2758 }
2759
2760 do_xrstor_fpu(env, ptr, ra);
2761
2762 if (env->cr[4] & CR4_OSFXSR_MASK) {
2763 do_xrstor_mxcsr(env, ptr, ra);
2764 /* Fast FXRSTOR leaves out the XMM registers */
2765 if (!(env->efer & MSR_EFER_FFXSR)
2766 || (env->hflags & HF_CPL_MASK)
2767 || !(env->hflags & HF_LMA_MASK)) {
2768 do_xrstor_sse(env, ptr, ra);
2769 }
2770 }
2771 }
2772
2773 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2774 {
2775 do_fxrstor(env, ptr, GETPC());
2776 }
2777
2778 #if defined(CONFIG_USER_ONLY)
2779 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
2780 {
2781 do_fxsave(env, ptr, 0);
2782 }
2783
2784 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
2785 {
2786 do_fxrstor(env, ptr, 0);
2787 }
2788 #endif
2789
2790 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2791 {
2792 uintptr_t ra = GETPC();
2793 uint64_t xstate_bv, xcomp_bv, reserve0;
2794
2795 rfbm &= env->xcr0;
2796
2797 /* The OS must have enabled XSAVE. */
2798 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2799 raise_exception_ra(env, EXCP06_ILLOP, ra);
2800 }
2801
2802 /* The operand must be 64 byte aligned. */
2803 if (ptr & 63) {
2804 raise_exception_ra(env, EXCP0D_GPF, ra);
2805 }
2806
2807 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2808
2809 if ((int64_t)xstate_bv < 0) {
2810 /* FIXME: Compact form. */
2811 raise_exception_ra(env, EXCP0D_GPF, ra);
2812 }
2813
2814 /* Standard form. */
2815
2816 /* The XSTATE_BV field must not set bits not present in XCR0. */
2817 if (xstate_bv & ~env->xcr0) {
2818 raise_exception_ra(env, EXCP0D_GPF, ra);
2819 }
2820
2821 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
2822 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2823 describes only XCOMP_BV, but the description of the standard form
2824 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2825 includes the next 64-bit field. */
2826 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
2827 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
2828 if (xcomp_bv || reserve0) {
2829 raise_exception_ra(env, EXCP0D_GPF, ra);
2830 }
2831
2832 if (rfbm & XSTATE_FP_MASK) {
2833 if (xstate_bv & XSTATE_FP_MASK) {
2834 do_xrstor_fpu(env, ptr, ra);
2835 } else {
2836 helper_fninit(env);
2837 memset(env->fpregs, 0, sizeof(env->fpregs));
2838 }
2839 }
2840 if (rfbm & XSTATE_SSE_MASK) {
2841 /* Note that the standard form of XRSTOR loads MXCSR from memory
2842 whether or not the XSTATE_BV bit is set. */
2843 do_xrstor_mxcsr(env, ptr, ra);
2844 if (xstate_bv & XSTATE_SSE_MASK) {
2845 do_xrstor_sse(env, ptr, ra);
2846 } else {
2847 /* ??? When AVX is implemented, we may have to be more
2848 selective in the clearing. */
2849 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
2850 }
2851 }
2852 if (rfbm & XSTATE_BNDREGS_MASK) {
2853 if (xstate_bv & XSTATE_BNDREGS_MASK) {
2854 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
2855 env->hflags |= HF_MPX_IU_MASK;
2856 } else {
2857 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
2858 env->hflags &= ~HF_MPX_IU_MASK;
2859 }
2860 }
2861 if (rfbm & XSTATE_BNDCSR_MASK) {
2862 if (xstate_bv & XSTATE_BNDCSR_MASK) {
2863 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
2864 } else {
2865 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
2866 }
2867 cpu_sync_bndcs_hflags(env);
2868 }
2869 if (rfbm & XSTATE_PKRU_MASK) {
2870 uint64_t old_pkru = env->pkru;
2871 if (xstate_bv & XSTATE_PKRU_MASK) {
2872 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
2873 } else {
2874 env->pkru = 0;
2875 }
2876 if (env->pkru != old_pkru) {
2877 CPUState *cs = env_cpu(env);
2878 tlb_flush(cs);
2879 }
2880 }
2881 }
2882
2883 #undef XO
2884
2885 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
2886 {
2887 /* The OS must have enabled XSAVE. */
2888 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2889 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2890 }
2891
2892 switch (ecx) {
2893 case 0:
2894 return env->xcr0;
2895 case 1:
2896 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
2897 return env->xcr0 & get_xinuse(env);
2898 }
2899 break;
2900 }
2901 raise_exception_ra(env, EXCP0D_GPF, GETPC());
2902 }
2903
2904 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
2905 {
2906 uint32_t dummy, ena_lo, ena_hi;
2907 uint64_t ena;
2908
2909 /* The OS must have enabled XSAVE. */
2910 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2911 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2912 }
2913
2914 /* Only XCR0 is defined at present; the FPU may not be disabled. */
2915 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
2916 goto do_gpf;
2917 }
2918
2919 /* Disallow enabling unimplemented features. */
2920 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
2921 ena = ((uint64_t)ena_hi << 32) | ena_lo;
2922 if (mask & ~ena) {
2923 goto do_gpf;
2924 }
2925
2926 /* Disallow enabling only half of MPX. */
2927 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
2928 & XSTATE_BNDCSR_MASK) {
2929 goto do_gpf;
2930 }
2931
2932 env->xcr0 = mask;
2933 cpu_sync_bndcs_hflags(env);
2934 return;
2935
2936 do_gpf:
2937 raise_exception_ra(env, EXCP0D_GPF, GETPC());
2938 }
2939
2940 /* MMX/SSE */
2941 /* XXX: optimize by storing fptt and fptags in the static cpu state */
2942
2943 #define SSE_DAZ 0x0040
2944 #define SSE_RC_MASK 0x6000
2945 #define SSE_RC_NEAR 0x0000
2946 #define SSE_RC_DOWN 0x2000
2947 #define SSE_RC_UP 0x4000
2948 #define SSE_RC_CHOP 0x6000
2949 #define SSE_FZ 0x8000
2950
2951 void update_mxcsr_status(CPUX86State *env)
2952 {
2953 uint32_t mxcsr = env->mxcsr;
2954 int rnd_type;
2955
2956 /* set rounding mode */
2957 switch (mxcsr & SSE_RC_MASK) {
2958 default:
2959 case SSE_RC_NEAR:
2960 rnd_type = float_round_nearest_even;
2961 break;
2962 case SSE_RC_DOWN:
2963 rnd_type = float_round_down;
2964 break;
2965 case SSE_RC_UP:
2966 rnd_type = float_round_up;
2967 break;
2968 case SSE_RC_CHOP:
2969 rnd_type = float_round_to_zero;
2970 break;
2971 }
2972 set_float_rounding_mode(rnd_type, &env->sse_status);
2973
2974 /* Set exception flags. */
2975 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
2976 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
2977 (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
2978 (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
2979 (mxcsr & FPUS_PE ? float_flag_inexact : 0),
2980 &env->sse_status);
2981
2982 /* set denormals are zero */
2983 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
2984
2985 /* set flush to zero */
2986 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
2987 }
2988
2989 void update_mxcsr_from_sse_status(CPUX86State *env)
2990 {
2991 uint8_t flags = get_float_exception_flags(&env->sse_status);
2992 /*
2993 * The MXCSR denormal flag has opposite semantics to
2994 * float_flag_input_denormal (the softfloat code sets that flag
2995 * only when flushing input denormals to zero, but SSE sets it
2996 * only when not flushing them to zero), so is not converted
2997 * here.
2998 */
2999 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
3000 (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
3001 (flags & float_flag_overflow ? FPUS_OE : 0) |
3002 (flags & float_flag_underflow ? FPUS_UE : 0) |
3003 (flags & float_flag_inexact ? FPUS_PE : 0) |
3004 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
3005 0));
3006 }
3007
3008 void helper_update_mxcsr(CPUX86State *env)
3009 {
3010 update_mxcsr_from_sse_status(env);
3011 }
3012
3013 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
3014 {
3015 cpu_set_mxcsr(env, val);
3016 }
3017
3018 void helper_enter_mmx(CPUX86State *env)
3019 {
3020 env->fpstt = 0;
3021 *(uint32_t *)(env->fptags) = 0;
3022 *(uint32_t *)(env->fptags + 4) = 0;
3023 }
3024
3025 void helper_emms(CPUX86State *env)
3026 {
3027 /* set to empty state */
3028 *(uint32_t *)(env->fptags) = 0x01010101;
3029 *(uint32_t *)(env->fptags + 4) = 0x01010101;
3030 }
3031
3032 /* XXX: suppress */
3033 void helper_movq(CPUX86State *env, void *d, void *s)
3034 {
3035 *(uint64_t *)d = *(uint64_t *)s;
3036 }
3037
3038 #define SHIFT 0
3039 #include "ops_sse.h"
3040
3041 #define SHIFT 1
3042 #include "ops_sse.h"