target/arm: Convert Neon VCVT fixed-point to gvec
[qemu.git] / target / i386 / translate.c
1 /*
2 * i386 translation
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31
32 #include "trace-tcg.h"
33 #include "exec/log.h"
34
35 #define PREFIX_REPZ 0x01
36 #define PREFIX_REPNZ 0x02
37 #define PREFIX_LOCK 0x04
38 #define PREFIX_DATA 0x08
39 #define PREFIX_ADR 0x10
40 #define PREFIX_VEX 0x20
41
42 #ifdef TARGET_X86_64
43 #define CODE64(s) ((s)->code64)
44 #define REX_X(s) ((s)->rex_x)
45 #define REX_B(s) ((s)->rex_b)
46 #else
47 #define CODE64(s) 0
48 #define REX_X(s) 0
49 #define REX_B(s) 0
50 #endif
51
52 #ifdef TARGET_X86_64
53 # define ctztl ctz64
54 # define clztl clz64
55 #else
56 # define ctztl ctz32
57 # define clztl clz32
58 #endif
59
60 /* For a switch indexed by MODRM, match all memory operands for a given OP. */
61 #define CASE_MODRM_MEM_OP(OP) \
62 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
63 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
64 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
65
66 #define CASE_MODRM_OP(OP) \
67 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
68 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
69 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
70 case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
71
72 //#define MACRO_TEST 1
73
74 /* global register indexes */
75 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
76 static TCGv_i32 cpu_cc_op;
77 static TCGv cpu_regs[CPU_NB_REGS];
78 static TCGv cpu_seg_base[6];
79 static TCGv_i64 cpu_bndl[4];
80 static TCGv_i64 cpu_bndu[4];
81
82 #include "exec/gen-icount.h"
83
84 typedef struct DisasContext {
85 DisasContextBase base;
86
87 /* current insn context */
88 int override; /* -1 if no override */
89 int prefix;
90 MemOp aflag;
91 MemOp dflag;
92 target_ulong pc_start;
93 target_ulong pc; /* pc = eip + cs_base */
94 /* current block context */
95 target_ulong cs_base; /* base of CS segment */
96 int pe; /* protected mode */
97 int code32; /* 32 bit code segment */
98 #ifdef TARGET_X86_64
99 int lma; /* long mode active */
100 int code64; /* 64 bit code segment */
101 int rex_x, rex_b;
102 #endif
103 int vex_l; /* vex vector length */
104 int vex_v; /* vex vvvv register, without 1's complement. */
105 int ss32; /* 32 bit stack segment */
106 CCOp cc_op; /* current CC operation */
107 bool cc_op_dirty;
108 #ifdef TARGET_X86_64
109 bool x86_64_hregs;
110 #endif
111 int addseg; /* non zero if either DS/ES/SS have a non zero base */
112 int f_st; /* currently unused */
113 int vm86; /* vm86 mode */
114 int cpl;
115 int iopl;
116 int tf; /* TF cpu flag */
117 int jmp_opt; /* use direct block chaining for direct jumps */
118 int repz_opt; /* optimize jumps within repz instructions */
119 int mem_index; /* select memory access functions */
120 uint64_t flags; /* all execution flags */
121 int popl_esp_hack; /* for correct popl with esp base handling */
122 int rip_offset; /* only used in x86_64, but left for simplicity */
123 int cpuid_features;
124 int cpuid_ext_features;
125 int cpuid_ext2_features;
126 int cpuid_ext3_features;
127 int cpuid_7_0_ebx_features;
128 int cpuid_xsave_features;
129
130 /* TCG local temps */
131 TCGv cc_srcT;
132 TCGv A0;
133 TCGv T0;
134 TCGv T1;
135
136 /* TCG local register indexes (only used inside old micro ops) */
137 TCGv tmp0;
138 TCGv tmp4;
139 TCGv_ptr ptr0;
140 TCGv_ptr ptr1;
141 TCGv_i32 tmp2_i32;
142 TCGv_i32 tmp3_i32;
143 TCGv_i64 tmp1_i64;
144
145 sigjmp_buf jmpbuf;
146 } DisasContext;
147
148 static void gen_eob(DisasContext *s);
149 static void gen_jr(DisasContext *s, TCGv dest);
150 static void gen_jmp(DisasContext *s, target_ulong eip);
151 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
152 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
153
154 /* i386 arith/logic operations */
155 enum {
156 OP_ADDL,
157 OP_ORL,
158 OP_ADCL,
159 OP_SBBL,
160 OP_ANDL,
161 OP_SUBL,
162 OP_XORL,
163 OP_CMPL,
164 };
165
166 /* i386 shift ops */
167 enum {
168 OP_ROL,
169 OP_ROR,
170 OP_RCL,
171 OP_RCR,
172 OP_SHL,
173 OP_SHR,
174 OP_SHL1, /* undocumented */
175 OP_SAR = 7,
176 };
177
178 enum {
179 JCC_O,
180 JCC_B,
181 JCC_Z,
182 JCC_BE,
183 JCC_S,
184 JCC_P,
185 JCC_L,
186 JCC_LE,
187 };
188
189 enum {
190 /* I386 int registers */
191 OR_EAX, /* MUST be even numbered */
192 OR_ECX,
193 OR_EDX,
194 OR_EBX,
195 OR_ESP,
196 OR_EBP,
197 OR_ESI,
198 OR_EDI,
199
200 OR_TMP0 = 16, /* temporary operand register */
201 OR_TMP1,
202 OR_A0, /* temporary register used when doing address evaluation */
203 };
204
205 enum {
206 USES_CC_DST = 1,
207 USES_CC_SRC = 2,
208 USES_CC_SRC2 = 4,
209 USES_CC_SRCT = 8,
210 };
211
212 /* Bit set if the global variable is live after setting CC_OP to X. */
213 static const uint8_t cc_op_live[CC_OP_NB] = {
214 [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
215 [CC_OP_EFLAGS] = USES_CC_SRC,
216 [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
217 [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
218 [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
219 [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
220 [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
221 [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
222 [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
223 [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
224 [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
225 [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
226 [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
227 [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
228 [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
229 [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
230 [CC_OP_CLR] = 0,
231 [CC_OP_POPCNT] = USES_CC_SRC,
232 };
233
234 static void set_cc_op(DisasContext *s, CCOp op)
235 {
236 int dead;
237
238 if (s->cc_op == op) {
239 return;
240 }
241
242 /* Discard CC computation that will no longer be used. */
243 dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
244 if (dead & USES_CC_DST) {
245 tcg_gen_discard_tl(cpu_cc_dst);
246 }
247 if (dead & USES_CC_SRC) {
248 tcg_gen_discard_tl(cpu_cc_src);
249 }
250 if (dead & USES_CC_SRC2) {
251 tcg_gen_discard_tl(cpu_cc_src2);
252 }
253 if (dead & USES_CC_SRCT) {
254 tcg_gen_discard_tl(s->cc_srcT);
255 }
256
257 if (op == CC_OP_DYNAMIC) {
258 /* The DYNAMIC setting is translator only, and should never be
259 stored. Thus we always consider it clean. */
260 s->cc_op_dirty = false;
261 } else {
262 /* Discard any computed CC_OP value (see shifts). */
263 if (s->cc_op == CC_OP_DYNAMIC) {
264 tcg_gen_discard_i32(cpu_cc_op);
265 }
266 s->cc_op_dirty = true;
267 }
268 s->cc_op = op;
269 }
270
271 static void gen_update_cc_op(DisasContext *s)
272 {
273 if (s->cc_op_dirty) {
274 tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
275 s->cc_op_dirty = false;
276 }
277 }
278
279 #ifdef TARGET_X86_64
280
281 #define NB_OP_SIZES 4
282
283 #else /* !TARGET_X86_64 */
284
285 #define NB_OP_SIZES 3
286
287 #endif /* !TARGET_X86_64 */
288
289 #if defined(HOST_WORDS_BIGENDIAN)
290 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
291 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
292 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
293 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
294 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
295 #else
296 #define REG_B_OFFSET 0
297 #define REG_H_OFFSET 1
298 #define REG_W_OFFSET 0
299 #define REG_L_OFFSET 0
300 #define REG_LH_OFFSET 4
301 #endif
302
303 /* In instruction encodings for byte register accesses the
304 * register number usually indicates "low 8 bits of register N";
305 * however there are some special cases where N 4..7 indicates
306 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
307 * true for this special case, false otherwise.
308 */
309 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
310 {
311 if (reg < 4) {
312 return false;
313 }
314 #ifdef TARGET_X86_64
315 if (reg >= 8 || s->x86_64_hregs) {
316 return false;
317 }
318 #endif
319 return true;
320 }
321
322 /* Select the size of a push/pop operation. */
323 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
324 {
325 if (CODE64(s)) {
326 return ot == MO_16 ? MO_16 : MO_64;
327 } else {
328 return ot;
329 }
330 }
331
332 /* Select the size of the stack pointer. */
333 static inline MemOp mo_stacksize(DisasContext *s)
334 {
335 return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
336 }
337
338 /* Select only size 64 else 32. Used for SSE operand sizes. */
339 static inline MemOp mo_64_32(MemOp ot)
340 {
341 #ifdef TARGET_X86_64
342 return ot == MO_64 ? MO_64 : MO_32;
343 #else
344 return MO_32;
345 #endif
346 }
347
348 /* Select size 8 if lsb of B is clear, else OT. Used for decoding
349 byte vs word opcodes. */
350 static inline MemOp mo_b_d(int b, MemOp ot)
351 {
352 return b & 1 ? ot : MO_8;
353 }
354
355 /* Select size 8 if lsb of B is clear, else OT capped at 32.
356 Used for decoding operand size of port opcodes. */
357 static inline MemOp mo_b_d32(int b, MemOp ot)
358 {
359 return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
360 }
361
362 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
363 {
364 switch(ot) {
365 case MO_8:
366 if (!byte_reg_is_xH(s, reg)) {
367 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
368 } else {
369 tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
370 }
371 break;
372 case MO_16:
373 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
374 break;
375 case MO_32:
376 /* For x86_64, this sets the higher half of register to zero.
377 For i386, this is equivalent to a mov. */
378 tcg_gen_ext32u_tl(cpu_regs[reg], t0);
379 break;
380 #ifdef TARGET_X86_64
381 case MO_64:
382 tcg_gen_mov_tl(cpu_regs[reg], t0);
383 break;
384 #endif
385 default:
386 tcg_abort();
387 }
388 }
389
390 static inline
391 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
392 {
393 if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
394 tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
395 } else {
396 tcg_gen_mov_tl(t0, cpu_regs[reg]);
397 }
398 }
399
400 static void gen_add_A0_im(DisasContext *s, int val)
401 {
402 tcg_gen_addi_tl(s->A0, s->A0, val);
403 if (!CODE64(s)) {
404 tcg_gen_ext32u_tl(s->A0, s->A0);
405 }
406 }
407
408 static inline void gen_op_jmp_v(TCGv dest)
409 {
410 tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
411 }
412
413 static inline
414 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
415 {
416 tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
417 gen_op_mov_reg_v(s, size, reg, s->tmp0);
418 }
419
420 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
421 {
422 tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
423 gen_op_mov_reg_v(s, size, reg, s->tmp0);
424 }
425
426 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
427 {
428 tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
429 }
430
431 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
432 {
433 tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
434 }
435
436 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
437 {
438 if (d == OR_TMP0) {
439 gen_op_st_v(s, idx, s->T0, s->A0);
440 } else {
441 gen_op_mov_reg_v(s, idx, d, s->T0);
442 }
443 }
444
445 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
446 {
447 tcg_gen_movi_tl(s->tmp0, pc);
448 gen_op_jmp_v(s->tmp0);
449 }
450
451 /* Compute SEG:REG into A0. SEG is selected from the override segment
452 (OVR_SEG) and the default segment (DEF_SEG). OVR_SEG may be -1 to
453 indicate no override. */
454 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
455 int def_seg, int ovr_seg)
456 {
457 switch (aflag) {
458 #ifdef TARGET_X86_64
459 case MO_64:
460 if (ovr_seg < 0) {
461 tcg_gen_mov_tl(s->A0, a0);
462 return;
463 }
464 break;
465 #endif
466 case MO_32:
467 /* 32 bit address */
468 if (ovr_seg < 0 && s->addseg) {
469 ovr_seg = def_seg;
470 }
471 if (ovr_seg < 0) {
472 tcg_gen_ext32u_tl(s->A0, a0);
473 return;
474 }
475 break;
476 case MO_16:
477 /* 16 bit address */
478 tcg_gen_ext16u_tl(s->A0, a0);
479 a0 = s->A0;
480 if (ovr_seg < 0) {
481 if (s->addseg) {
482 ovr_seg = def_seg;
483 } else {
484 return;
485 }
486 }
487 break;
488 default:
489 tcg_abort();
490 }
491
492 if (ovr_seg >= 0) {
493 TCGv seg = cpu_seg_base[ovr_seg];
494
495 if (aflag == MO_64) {
496 tcg_gen_add_tl(s->A0, a0, seg);
497 } else if (CODE64(s)) {
498 tcg_gen_ext32u_tl(s->A0, a0);
499 tcg_gen_add_tl(s->A0, s->A0, seg);
500 } else {
501 tcg_gen_add_tl(s->A0, a0, seg);
502 tcg_gen_ext32u_tl(s->A0, s->A0);
503 }
504 }
505 }
506
507 static inline void gen_string_movl_A0_ESI(DisasContext *s)
508 {
509 gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
510 }
511
512 static inline void gen_string_movl_A0_EDI(DisasContext *s)
513 {
514 gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
515 }
516
517 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
518 {
519 tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
520 tcg_gen_shli_tl(s->T0, s->T0, ot);
521 };
522
523 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
524 {
525 switch (size) {
526 case MO_8:
527 if (sign) {
528 tcg_gen_ext8s_tl(dst, src);
529 } else {
530 tcg_gen_ext8u_tl(dst, src);
531 }
532 return dst;
533 case MO_16:
534 if (sign) {
535 tcg_gen_ext16s_tl(dst, src);
536 } else {
537 tcg_gen_ext16u_tl(dst, src);
538 }
539 return dst;
540 #ifdef TARGET_X86_64
541 case MO_32:
542 if (sign) {
543 tcg_gen_ext32s_tl(dst, src);
544 } else {
545 tcg_gen_ext32u_tl(dst, src);
546 }
547 return dst;
548 #endif
549 default:
550 return src;
551 }
552 }
553
554 static void gen_extu(MemOp ot, TCGv reg)
555 {
556 gen_ext_tl(reg, reg, ot, false);
557 }
558
559 static void gen_exts(MemOp ot, TCGv reg)
560 {
561 gen_ext_tl(reg, reg, ot, true);
562 }
563
564 static inline
565 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
566 {
567 tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
568 gen_extu(size, s->tmp0);
569 tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
570 }
571
572 static inline
573 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
574 {
575 tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
576 gen_extu(size, s->tmp0);
577 tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
578 }
579
580 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
581 {
582 switch (ot) {
583 case MO_8:
584 gen_helper_inb(v, cpu_env, n);
585 break;
586 case MO_16:
587 gen_helper_inw(v, cpu_env, n);
588 break;
589 case MO_32:
590 gen_helper_inl(v, cpu_env, n);
591 break;
592 default:
593 tcg_abort();
594 }
595 }
596
597 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
598 {
599 switch (ot) {
600 case MO_8:
601 gen_helper_outb(cpu_env, v, n);
602 break;
603 case MO_16:
604 gen_helper_outw(cpu_env, v, n);
605 break;
606 case MO_32:
607 gen_helper_outl(cpu_env, v, n);
608 break;
609 default:
610 tcg_abort();
611 }
612 }
613
614 static void gen_check_io(DisasContext *s, MemOp ot, target_ulong cur_eip,
615 uint32_t svm_flags)
616 {
617 target_ulong next_eip;
618
619 if (s->pe && (s->cpl > s->iopl || s->vm86)) {
620 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
621 switch (ot) {
622 case MO_8:
623 gen_helper_check_iob(cpu_env, s->tmp2_i32);
624 break;
625 case MO_16:
626 gen_helper_check_iow(cpu_env, s->tmp2_i32);
627 break;
628 case MO_32:
629 gen_helper_check_iol(cpu_env, s->tmp2_i32);
630 break;
631 default:
632 tcg_abort();
633 }
634 }
635 if(s->flags & HF_GUEST_MASK) {
636 gen_update_cc_op(s);
637 gen_jmp_im(s, cur_eip);
638 svm_flags |= (1 << (4 + ot));
639 next_eip = s->pc - s->cs_base;
640 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
641 gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
642 tcg_const_i32(svm_flags),
643 tcg_const_i32(next_eip - cur_eip));
644 }
645 }
646
647 static inline void gen_movs(DisasContext *s, MemOp ot)
648 {
649 gen_string_movl_A0_ESI(s);
650 gen_op_ld_v(s, ot, s->T0, s->A0);
651 gen_string_movl_A0_EDI(s);
652 gen_op_st_v(s, ot, s->T0, s->A0);
653 gen_op_movl_T0_Dshift(s, ot);
654 gen_op_add_reg_T0(s, s->aflag, R_ESI);
655 gen_op_add_reg_T0(s, s->aflag, R_EDI);
656 }
657
658 static void gen_op_update1_cc(DisasContext *s)
659 {
660 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
661 }
662
663 static void gen_op_update2_cc(DisasContext *s)
664 {
665 tcg_gen_mov_tl(cpu_cc_src, s->T1);
666 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
667 }
668
669 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
670 {
671 tcg_gen_mov_tl(cpu_cc_src2, reg);
672 tcg_gen_mov_tl(cpu_cc_src, s->T1);
673 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
674 }
675
676 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
677 {
678 tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
679 }
680
681 static void gen_op_update_neg_cc(DisasContext *s)
682 {
683 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
684 tcg_gen_neg_tl(cpu_cc_src, s->T0);
685 tcg_gen_movi_tl(s->cc_srcT, 0);
686 }
687
688 /* compute all eflags to cc_src */
689 static void gen_compute_eflags(DisasContext *s)
690 {
691 TCGv zero, dst, src1, src2;
692 int live, dead;
693
694 if (s->cc_op == CC_OP_EFLAGS) {
695 return;
696 }
697 if (s->cc_op == CC_OP_CLR) {
698 tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
699 set_cc_op(s, CC_OP_EFLAGS);
700 return;
701 }
702
703 zero = NULL;
704 dst = cpu_cc_dst;
705 src1 = cpu_cc_src;
706 src2 = cpu_cc_src2;
707
708 /* Take care to not read values that are not live. */
709 live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
710 dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
711 if (dead) {
712 zero = tcg_const_tl(0);
713 if (dead & USES_CC_DST) {
714 dst = zero;
715 }
716 if (dead & USES_CC_SRC) {
717 src1 = zero;
718 }
719 if (dead & USES_CC_SRC2) {
720 src2 = zero;
721 }
722 }
723
724 gen_update_cc_op(s);
725 gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
726 set_cc_op(s, CC_OP_EFLAGS);
727
728 if (dead) {
729 tcg_temp_free(zero);
730 }
731 }
732
733 typedef struct CCPrepare {
734 TCGCond cond;
735 TCGv reg;
736 TCGv reg2;
737 target_ulong imm;
738 target_ulong mask;
739 bool use_reg2;
740 bool no_setcond;
741 } CCPrepare;
742
743 /* compute eflags.C to reg */
744 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
745 {
746 TCGv t0, t1;
747 int size, shift;
748
749 switch (s->cc_op) {
750 case CC_OP_SUBB ... CC_OP_SUBQ:
751 /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
752 size = s->cc_op - CC_OP_SUBB;
753 t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
754 /* If no temporary was used, be careful not to alias t1 and t0. */
755 t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
756 tcg_gen_mov_tl(t0, s->cc_srcT);
757 gen_extu(size, t0);
758 goto add_sub;
759
760 case CC_OP_ADDB ... CC_OP_ADDQ:
761 /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
762 size = s->cc_op - CC_OP_ADDB;
763 t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
764 t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
765 add_sub:
766 return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
767 .reg2 = t1, .mask = -1, .use_reg2 = true };
768
769 case CC_OP_LOGICB ... CC_OP_LOGICQ:
770 case CC_OP_CLR:
771 case CC_OP_POPCNT:
772 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
773
774 case CC_OP_INCB ... CC_OP_INCQ:
775 case CC_OP_DECB ... CC_OP_DECQ:
776 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
777 .mask = -1, .no_setcond = true };
778
779 case CC_OP_SHLB ... CC_OP_SHLQ:
780 /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
781 size = s->cc_op - CC_OP_SHLB;
782 shift = (8 << size) - 1;
783 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
784 .mask = (target_ulong)1 << shift };
785
786 case CC_OP_MULB ... CC_OP_MULQ:
787 return (CCPrepare) { .cond = TCG_COND_NE,
788 .reg = cpu_cc_src, .mask = -1 };
789
790 case CC_OP_BMILGB ... CC_OP_BMILGQ:
791 size = s->cc_op - CC_OP_BMILGB;
792 t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
793 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
794
795 case CC_OP_ADCX:
796 case CC_OP_ADCOX:
797 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
798 .mask = -1, .no_setcond = true };
799
800 case CC_OP_EFLAGS:
801 case CC_OP_SARB ... CC_OP_SARQ:
802 /* CC_SRC & 1 */
803 return (CCPrepare) { .cond = TCG_COND_NE,
804 .reg = cpu_cc_src, .mask = CC_C };
805
806 default:
807 /* The need to compute only C from CC_OP_DYNAMIC is important
808 in efficiently implementing e.g. INC at the start of a TB. */
809 gen_update_cc_op(s);
810 gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
811 cpu_cc_src2, cpu_cc_op);
812 return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
813 .mask = -1, .no_setcond = true };
814 }
815 }
816
817 /* compute eflags.P to reg */
818 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
819 {
820 gen_compute_eflags(s);
821 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
822 .mask = CC_P };
823 }
824
825 /* compute eflags.S to reg */
826 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
827 {
828 switch (s->cc_op) {
829 case CC_OP_DYNAMIC:
830 gen_compute_eflags(s);
831 /* FALLTHRU */
832 case CC_OP_EFLAGS:
833 case CC_OP_ADCX:
834 case CC_OP_ADOX:
835 case CC_OP_ADCOX:
836 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
837 .mask = CC_S };
838 case CC_OP_CLR:
839 case CC_OP_POPCNT:
840 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
841 default:
842 {
843 MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
844 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
845 return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
846 }
847 }
848 }
849
850 /* compute eflags.O to reg */
851 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
852 {
853 switch (s->cc_op) {
854 case CC_OP_ADOX:
855 case CC_OP_ADCOX:
856 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
857 .mask = -1, .no_setcond = true };
858 case CC_OP_CLR:
859 case CC_OP_POPCNT:
860 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
861 default:
862 gen_compute_eflags(s);
863 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
864 .mask = CC_O };
865 }
866 }
867
868 /* compute eflags.Z to reg */
869 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
870 {
871 switch (s->cc_op) {
872 case CC_OP_DYNAMIC:
873 gen_compute_eflags(s);
874 /* FALLTHRU */
875 case CC_OP_EFLAGS:
876 case CC_OP_ADCX:
877 case CC_OP_ADOX:
878 case CC_OP_ADCOX:
879 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
880 .mask = CC_Z };
881 case CC_OP_CLR:
882 return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
883 case CC_OP_POPCNT:
884 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
885 .mask = -1 };
886 default:
887 {
888 MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
889 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
890 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
891 }
892 }
893 }
894
895 /* perform a conditional store into register 'reg' according to jump opcode
896 value 'b'. In the fast case, T0 is guaranted not to be used. */
897 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
898 {
899 int inv, jcc_op, cond;
900 MemOp size;
901 CCPrepare cc;
902 TCGv t0;
903
904 inv = b & 1;
905 jcc_op = (b >> 1) & 7;
906
907 switch (s->cc_op) {
908 case CC_OP_SUBB ... CC_OP_SUBQ:
909 /* We optimize relational operators for the cmp/jcc case. */
910 size = s->cc_op - CC_OP_SUBB;
911 switch (jcc_op) {
912 case JCC_BE:
913 tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
914 gen_extu(size, s->tmp4);
915 t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
916 cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
917 .reg2 = t0, .mask = -1, .use_reg2 = true };
918 break;
919
920 case JCC_L:
921 cond = TCG_COND_LT;
922 goto fast_jcc_l;
923 case JCC_LE:
924 cond = TCG_COND_LE;
925 fast_jcc_l:
926 tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
927 gen_exts(size, s->tmp4);
928 t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
929 cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
930 .reg2 = t0, .mask = -1, .use_reg2 = true };
931 break;
932
933 default:
934 goto slow_jcc;
935 }
936 break;
937
938 default:
939 slow_jcc:
940 /* This actually generates good code for JC, JZ and JS. */
941 switch (jcc_op) {
942 case JCC_O:
943 cc = gen_prepare_eflags_o(s, reg);
944 break;
945 case JCC_B:
946 cc = gen_prepare_eflags_c(s, reg);
947 break;
948 case JCC_Z:
949 cc = gen_prepare_eflags_z(s, reg);
950 break;
951 case JCC_BE:
952 gen_compute_eflags(s);
953 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
954 .mask = CC_Z | CC_C };
955 break;
956 case JCC_S:
957 cc = gen_prepare_eflags_s(s, reg);
958 break;
959 case JCC_P:
960 cc = gen_prepare_eflags_p(s, reg);
961 break;
962 case JCC_L:
963 gen_compute_eflags(s);
964 if (reg == cpu_cc_src) {
965 reg = s->tmp0;
966 }
967 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
968 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
969 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
970 .mask = CC_S };
971 break;
972 default:
973 case JCC_LE:
974 gen_compute_eflags(s);
975 if (reg == cpu_cc_src) {
976 reg = s->tmp0;
977 }
978 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
979 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
980 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
981 .mask = CC_S | CC_Z };
982 break;
983 }
984 break;
985 }
986
987 if (inv) {
988 cc.cond = tcg_invert_cond(cc.cond);
989 }
990 return cc;
991 }
992
993 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
994 {
995 CCPrepare cc = gen_prepare_cc(s, b, reg);
996
997 if (cc.no_setcond) {
998 if (cc.cond == TCG_COND_EQ) {
999 tcg_gen_xori_tl(reg, cc.reg, 1);
1000 } else {
1001 tcg_gen_mov_tl(reg, cc.reg);
1002 }
1003 return;
1004 }
1005
1006 if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1007 cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1008 tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1009 tcg_gen_andi_tl(reg, reg, 1);
1010 return;
1011 }
1012 if (cc.mask != -1) {
1013 tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1014 cc.reg = reg;
1015 }
1016 if (cc.use_reg2) {
1017 tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1018 } else {
1019 tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1020 }
1021 }
1022
1023 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1024 {
1025 gen_setcc1(s, JCC_B << 1, reg);
1026 }
1027
1028 /* generate a conditional jump to label 'l1' according to jump opcode
1029 value 'b'. In the fast case, T0 is guaranted not to be used. */
1030 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1031 {
1032 CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1033
1034 if (cc.mask != -1) {
1035 tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1036 cc.reg = s->T0;
1037 }
1038 if (cc.use_reg2) {
1039 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1040 } else {
1041 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1042 }
1043 }
1044
1045 /* Generate a conditional jump to label 'l1' according to jump opcode
1046 value 'b'. In the fast case, T0 is guaranted not to be used.
1047 A translation block must end soon. */
1048 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1049 {
1050 CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1051
1052 gen_update_cc_op(s);
1053 if (cc.mask != -1) {
1054 tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1055 cc.reg = s->T0;
1056 }
1057 set_cc_op(s, CC_OP_DYNAMIC);
1058 if (cc.use_reg2) {
1059 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1060 } else {
1061 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1062 }
1063 }
1064
1065 /* XXX: does not work with gdbstub "ice" single step - not a
1066 serious problem */
1067 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1068 {
1069 TCGLabel *l1 = gen_new_label();
1070 TCGLabel *l2 = gen_new_label();
1071 gen_op_jnz_ecx(s, s->aflag, l1);
1072 gen_set_label(l2);
1073 gen_jmp_tb(s, next_eip, 1);
1074 gen_set_label(l1);
1075 return l2;
1076 }
1077
1078 static inline void gen_stos(DisasContext *s, MemOp ot)
1079 {
1080 gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1081 gen_string_movl_A0_EDI(s);
1082 gen_op_st_v(s, ot, s->T0, s->A0);
1083 gen_op_movl_T0_Dshift(s, ot);
1084 gen_op_add_reg_T0(s, s->aflag, R_EDI);
1085 }
1086
1087 static inline void gen_lods(DisasContext *s, MemOp ot)
1088 {
1089 gen_string_movl_A0_ESI(s);
1090 gen_op_ld_v(s, ot, s->T0, s->A0);
1091 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1092 gen_op_movl_T0_Dshift(s, ot);
1093 gen_op_add_reg_T0(s, s->aflag, R_ESI);
1094 }
1095
1096 static inline void gen_scas(DisasContext *s, MemOp ot)
1097 {
1098 gen_string_movl_A0_EDI(s);
1099 gen_op_ld_v(s, ot, s->T1, s->A0);
1100 gen_op(s, OP_CMPL, ot, R_EAX);
1101 gen_op_movl_T0_Dshift(s, ot);
1102 gen_op_add_reg_T0(s, s->aflag, R_EDI);
1103 }
1104
1105 static inline void gen_cmps(DisasContext *s, MemOp ot)
1106 {
1107 gen_string_movl_A0_EDI(s);
1108 gen_op_ld_v(s, ot, s->T1, s->A0);
1109 gen_string_movl_A0_ESI(s);
1110 gen_op(s, OP_CMPL, ot, OR_TMP0);
1111 gen_op_movl_T0_Dshift(s, ot);
1112 gen_op_add_reg_T0(s, s->aflag, R_ESI);
1113 gen_op_add_reg_T0(s, s->aflag, R_EDI);
1114 }
1115
1116 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1117 {
1118 if (s->flags & HF_IOBPT_MASK) {
1119 TCGv_i32 t_size = tcg_const_i32(1 << ot);
1120 TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1121
1122 gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1123 tcg_temp_free_i32(t_size);
1124 tcg_temp_free(t_next);
1125 }
1126 }
1127
1128
1129 static inline void gen_ins(DisasContext *s, MemOp ot)
1130 {
1131 gen_string_movl_A0_EDI(s);
1132 /* Note: we must do this dummy write first to be restartable in
1133 case of page fault. */
1134 tcg_gen_movi_tl(s->T0, 0);
1135 gen_op_st_v(s, ot, s->T0, s->A0);
1136 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1137 tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1138 gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1139 gen_op_st_v(s, ot, s->T0, s->A0);
1140 gen_op_movl_T0_Dshift(s, ot);
1141 gen_op_add_reg_T0(s, s->aflag, R_EDI);
1142 gen_bpt_io(s, s->tmp2_i32, ot);
1143 }
1144
1145 static inline void gen_outs(DisasContext *s, MemOp ot)
1146 {
1147 gen_string_movl_A0_ESI(s);
1148 gen_op_ld_v(s, ot, s->T0, s->A0);
1149
1150 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1151 tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1152 tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1153 gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1154 gen_op_movl_T0_Dshift(s, ot);
1155 gen_op_add_reg_T0(s, s->aflag, R_ESI);
1156 gen_bpt_io(s, s->tmp2_i32, ot);
1157 }
1158
1159 /* same method as Valgrind : we generate jumps to current or next
1160 instruction */
1161 #define GEN_REPZ(op) \
1162 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot, \
1163 target_ulong cur_eip, target_ulong next_eip) \
1164 { \
1165 TCGLabel *l2; \
1166 gen_update_cc_op(s); \
1167 l2 = gen_jz_ecx_string(s, next_eip); \
1168 gen_ ## op(s, ot); \
1169 gen_op_add_reg_im(s, s->aflag, R_ECX, -1); \
1170 /* a loop would cause two single step exceptions if ECX = 1 \
1171 before rep string_insn */ \
1172 if (s->repz_opt) \
1173 gen_op_jz_ecx(s, s->aflag, l2); \
1174 gen_jmp(s, cur_eip); \
1175 }
1176
1177 #define GEN_REPZ2(op) \
1178 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot, \
1179 target_ulong cur_eip, \
1180 target_ulong next_eip, \
1181 int nz) \
1182 { \
1183 TCGLabel *l2; \
1184 gen_update_cc_op(s); \
1185 l2 = gen_jz_ecx_string(s, next_eip); \
1186 gen_ ## op(s, ot); \
1187 gen_op_add_reg_im(s, s->aflag, R_ECX, -1); \
1188 gen_update_cc_op(s); \
1189 gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \
1190 if (s->repz_opt) \
1191 gen_op_jz_ecx(s, s->aflag, l2); \
1192 gen_jmp(s, cur_eip); \
1193 }
1194
1195 GEN_REPZ(movs)
1196 GEN_REPZ(stos)
1197 GEN_REPZ(lods)
1198 GEN_REPZ(ins)
1199 GEN_REPZ(outs)
1200 GEN_REPZ2(scas)
1201 GEN_REPZ2(cmps)
1202
1203 static void gen_helper_fp_arith_ST0_FT0(int op)
1204 {
1205 switch (op) {
1206 case 0:
1207 gen_helper_fadd_ST0_FT0(cpu_env);
1208 break;
1209 case 1:
1210 gen_helper_fmul_ST0_FT0(cpu_env);
1211 break;
1212 case 2:
1213 gen_helper_fcom_ST0_FT0(cpu_env);
1214 break;
1215 case 3:
1216 gen_helper_fcom_ST0_FT0(cpu_env);
1217 break;
1218 case 4:
1219 gen_helper_fsub_ST0_FT0(cpu_env);
1220 break;
1221 case 5:
1222 gen_helper_fsubr_ST0_FT0(cpu_env);
1223 break;
1224 case 6:
1225 gen_helper_fdiv_ST0_FT0(cpu_env);
1226 break;
1227 case 7:
1228 gen_helper_fdivr_ST0_FT0(cpu_env);
1229 break;
1230 }
1231 }
1232
1233 /* NOTE the exception in "r" op ordering */
1234 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1235 {
1236 TCGv_i32 tmp = tcg_const_i32(opreg);
1237 switch (op) {
1238 case 0:
1239 gen_helper_fadd_STN_ST0(cpu_env, tmp);
1240 break;
1241 case 1:
1242 gen_helper_fmul_STN_ST0(cpu_env, tmp);
1243 break;
1244 case 4:
1245 gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1246 break;
1247 case 5:
1248 gen_helper_fsub_STN_ST0(cpu_env, tmp);
1249 break;
1250 case 6:
1251 gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1252 break;
1253 case 7:
1254 gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1255 break;
1256 }
1257 }
1258
1259 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1260 {
1261 gen_update_cc_op(s);
1262 gen_jmp_im(s, cur_eip);
1263 gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1264 s->base.is_jmp = DISAS_NORETURN;
1265 }
1266
1267 /* Generate #UD for the current instruction. The assumption here is that
1268 the instruction is known, but it isn't allowed in the current cpu mode. */
1269 static void gen_illegal_opcode(DisasContext *s)
1270 {
1271 gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1272 }
1273
1274 /* if d == OR_TMP0, it means memory operand (address in A0) */
1275 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1276 {
1277 if (d != OR_TMP0) {
1278 if (s1->prefix & PREFIX_LOCK) {
1279 /* Lock prefix when destination is not memory. */
1280 gen_illegal_opcode(s1);
1281 return;
1282 }
1283 gen_op_mov_v_reg(s1, ot, s1->T0, d);
1284 } else if (!(s1->prefix & PREFIX_LOCK)) {
1285 gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1286 }
1287 switch(op) {
1288 case OP_ADCL:
1289 gen_compute_eflags_c(s1, s1->tmp4);
1290 if (s1->prefix & PREFIX_LOCK) {
1291 tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1292 tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1293 s1->mem_index, ot | MO_LE);
1294 } else {
1295 tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1296 tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1297 gen_op_st_rm_T0_A0(s1, ot, d);
1298 }
1299 gen_op_update3_cc(s1, s1->tmp4);
1300 set_cc_op(s1, CC_OP_ADCB + ot);
1301 break;
1302 case OP_SBBL:
1303 gen_compute_eflags_c(s1, s1->tmp4);
1304 if (s1->prefix & PREFIX_LOCK) {
1305 tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1306 tcg_gen_neg_tl(s1->T0, s1->T0);
1307 tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1308 s1->mem_index, ot | MO_LE);
1309 } else {
1310 tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1311 tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1312 gen_op_st_rm_T0_A0(s1, ot, d);
1313 }
1314 gen_op_update3_cc(s1, s1->tmp4);
1315 set_cc_op(s1, CC_OP_SBBB + ot);
1316 break;
1317 case OP_ADDL:
1318 if (s1->prefix & PREFIX_LOCK) {
1319 tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1320 s1->mem_index, ot | MO_LE);
1321 } else {
1322 tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1323 gen_op_st_rm_T0_A0(s1, ot, d);
1324 }
1325 gen_op_update2_cc(s1);
1326 set_cc_op(s1, CC_OP_ADDB + ot);
1327 break;
1328 case OP_SUBL:
1329 if (s1->prefix & PREFIX_LOCK) {
1330 tcg_gen_neg_tl(s1->T0, s1->T1);
1331 tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1332 s1->mem_index, ot | MO_LE);
1333 tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1334 } else {
1335 tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1336 tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1337 gen_op_st_rm_T0_A0(s1, ot, d);
1338 }
1339 gen_op_update2_cc(s1);
1340 set_cc_op(s1, CC_OP_SUBB + ot);
1341 break;
1342 default:
1343 case OP_ANDL:
1344 if (s1->prefix & PREFIX_LOCK) {
1345 tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1346 s1->mem_index, ot | MO_LE);
1347 } else {
1348 tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1349 gen_op_st_rm_T0_A0(s1, ot, d);
1350 }
1351 gen_op_update1_cc(s1);
1352 set_cc_op(s1, CC_OP_LOGICB + ot);
1353 break;
1354 case OP_ORL:
1355 if (s1->prefix & PREFIX_LOCK) {
1356 tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1357 s1->mem_index, ot | MO_LE);
1358 } else {
1359 tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1360 gen_op_st_rm_T0_A0(s1, ot, d);
1361 }
1362 gen_op_update1_cc(s1);
1363 set_cc_op(s1, CC_OP_LOGICB + ot);
1364 break;
1365 case OP_XORL:
1366 if (s1->prefix & PREFIX_LOCK) {
1367 tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1368 s1->mem_index, ot | MO_LE);
1369 } else {
1370 tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1371 gen_op_st_rm_T0_A0(s1, ot, d);
1372 }
1373 gen_op_update1_cc(s1);
1374 set_cc_op(s1, CC_OP_LOGICB + ot);
1375 break;
1376 case OP_CMPL:
1377 tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1378 tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1379 tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1380 set_cc_op(s1, CC_OP_SUBB + ot);
1381 break;
1382 }
1383 }
1384
1385 /* if d == OR_TMP0, it means memory operand (address in A0) */
1386 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1387 {
1388 if (s1->prefix & PREFIX_LOCK) {
1389 if (d != OR_TMP0) {
1390 /* Lock prefix when destination is not memory */
1391 gen_illegal_opcode(s1);
1392 return;
1393 }
1394 tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1395 tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1396 s1->mem_index, ot | MO_LE);
1397 } else {
1398 if (d != OR_TMP0) {
1399 gen_op_mov_v_reg(s1, ot, s1->T0, d);
1400 } else {
1401 gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1402 }
1403 tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1404 gen_op_st_rm_T0_A0(s1, ot, d);
1405 }
1406
1407 gen_compute_eflags_c(s1, cpu_cc_src);
1408 tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1409 set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1410 }
1411
1412 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1413 TCGv shm1, TCGv count, bool is_right)
1414 {
1415 TCGv_i32 z32, s32, oldop;
1416 TCGv z_tl;
1417
1418 /* Store the results into the CC variables. If we know that the
1419 variable must be dead, store unconditionally. Otherwise we'll
1420 need to not disrupt the current contents. */
1421 z_tl = tcg_const_tl(0);
1422 if (cc_op_live[s->cc_op] & USES_CC_DST) {
1423 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1424 result, cpu_cc_dst);
1425 } else {
1426 tcg_gen_mov_tl(cpu_cc_dst, result);
1427 }
1428 if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1429 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1430 shm1, cpu_cc_src);
1431 } else {
1432 tcg_gen_mov_tl(cpu_cc_src, shm1);
1433 }
1434 tcg_temp_free(z_tl);
1435
1436 /* Get the two potential CC_OP values into temporaries. */
1437 tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1438 if (s->cc_op == CC_OP_DYNAMIC) {
1439 oldop = cpu_cc_op;
1440 } else {
1441 tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1442 oldop = s->tmp3_i32;
1443 }
1444
1445 /* Conditionally store the CC_OP value. */
1446 z32 = tcg_const_i32(0);
1447 s32 = tcg_temp_new_i32();
1448 tcg_gen_trunc_tl_i32(s32, count);
1449 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1450 tcg_temp_free_i32(z32);
1451 tcg_temp_free_i32(s32);
1452
1453 /* The CC_OP value is no longer predictable. */
1454 set_cc_op(s, CC_OP_DYNAMIC);
1455 }
1456
1457 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1458 int is_right, int is_arith)
1459 {
1460 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1461
1462 /* load */
1463 if (op1 == OR_TMP0) {
1464 gen_op_ld_v(s, ot, s->T0, s->A0);
1465 } else {
1466 gen_op_mov_v_reg(s, ot, s->T0, op1);
1467 }
1468
1469 tcg_gen_andi_tl(s->T1, s->T1, mask);
1470 tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1471
1472 if (is_right) {
1473 if (is_arith) {
1474 gen_exts(ot, s->T0);
1475 tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1476 tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1477 } else {
1478 gen_extu(ot, s->T0);
1479 tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1480 tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1481 }
1482 } else {
1483 tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1484 tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1485 }
1486
1487 /* store */
1488 gen_op_st_rm_T0_A0(s, ot, op1);
1489
1490 gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1491 }
1492
1493 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1494 int is_right, int is_arith)
1495 {
1496 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1497
1498 /* load */
1499 if (op1 == OR_TMP0)
1500 gen_op_ld_v(s, ot, s->T0, s->A0);
1501 else
1502 gen_op_mov_v_reg(s, ot, s->T0, op1);
1503
1504 op2 &= mask;
1505 if (op2 != 0) {
1506 if (is_right) {
1507 if (is_arith) {
1508 gen_exts(ot, s->T0);
1509 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1510 tcg_gen_sari_tl(s->T0, s->T0, op2);
1511 } else {
1512 gen_extu(ot, s->T0);
1513 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1514 tcg_gen_shri_tl(s->T0, s->T0, op2);
1515 }
1516 } else {
1517 tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1518 tcg_gen_shli_tl(s->T0, s->T0, op2);
1519 }
1520 }
1521
1522 /* store */
1523 gen_op_st_rm_T0_A0(s, ot, op1);
1524
1525 /* update eflags if non zero shift */
1526 if (op2 != 0) {
1527 tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1528 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1529 set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1530 }
1531 }
1532
1533 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1534 {
1535 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1536 TCGv_i32 t0, t1;
1537
1538 /* load */
1539 if (op1 == OR_TMP0) {
1540 gen_op_ld_v(s, ot, s->T0, s->A0);
1541 } else {
1542 gen_op_mov_v_reg(s, ot, s->T0, op1);
1543 }
1544
1545 tcg_gen_andi_tl(s->T1, s->T1, mask);
1546
1547 switch (ot) {
1548 case MO_8:
1549 /* Replicate the 8-bit input so that a 32-bit rotate works. */
1550 tcg_gen_ext8u_tl(s->T0, s->T0);
1551 tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1552 goto do_long;
1553 case MO_16:
1554 /* Replicate the 16-bit input so that a 32-bit rotate works. */
1555 tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1556 goto do_long;
1557 do_long:
1558 #ifdef TARGET_X86_64
1559 case MO_32:
1560 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1561 tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1562 if (is_right) {
1563 tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1564 } else {
1565 tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1566 }
1567 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1568 break;
1569 #endif
1570 default:
1571 if (is_right) {
1572 tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1573 } else {
1574 tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1575 }
1576 break;
1577 }
1578
1579 /* store */
1580 gen_op_st_rm_T0_A0(s, ot, op1);
1581
1582 /* We'll need the flags computed into CC_SRC. */
1583 gen_compute_eflags(s);
1584
1585 /* The value that was "rotated out" is now present at the other end
1586 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1587 since we've computed the flags into CC_SRC, these variables are
1588 currently dead. */
1589 if (is_right) {
1590 tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1591 tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1592 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1593 } else {
1594 tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1595 tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1596 }
1597 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1598 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1599
1600 /* Now conditionally store the new CC_OP value. If the shift count
1601 is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1602 Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1603 exactly as we computed above. */
1604 t0 = tcg_const_i32(0);
1605 t1 = tcg_temp_new_i32();
1606 tcg_gen_trunc_tl_i32(t1, s->T1);
1607 tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1608 tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1609 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1610 s->tmp2_i32, s->tmp3_i32);
1611 tcg_temp_free_i32(t0);
1612 tcg_temp_free_i32(t1);
1613
1614 /* The CC_OP value is no longer predictable. */
1615 set_cc_op(s, CC_OP_DYNAMIC);
1616 }
1617
1618 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1619 int is_right)
1620 {
1621 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1622 int shift;
1623
1624 /* load */
1625 if (op1 == OR_TMP0) {
1626 gen_op_ld_v(s, ot, s->T0, s->A0);
1627 } else {
1628 gen_op_mov_v_reg(s, ot, s->T0, op1);
1629 }
1630
1631 op2 &= mask;
1632 if (op2 != 0) {
1633 switch (ot) {
1634 #ifdef TARGET_X86_64
1635 case MO_32:
1636 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1637 if (is_right) {
1638 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1639 } else {
1640 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1641 }
1642 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1643 break;
1644 #endif
1645 default:
1646 if (is_right) {
1647 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1648 } else {
1649 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1650 }
1651 break;
1652 case MO_8:
1653 mask = 7;
1654 goto do_shifts;
1655 case MO_16:
1656 mask = 15;
1657 do_shifts:
1658 shift = op2 & mask;
1659 if (is_right) {
1660 shift = mask + 1 - shift;
1661 }
1662 gen_extu(ot, s->T0);
1663 tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1664 tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1665 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1666 break;
1667 }
1668 }
1669
1670 /* store */
1671 gen_op_st_rm_T0_A0(s, ot, op1);
1672
1673 if (op2 != 0) {
1674 /* Compute the flags into CC_SRC. */
1675 gen_compute_eflags(s);
1676
1677 /* The value that was "rotated out" is now present at the other end
1678 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1679 since we've computed the flags into CC_SRC, these variables are
1680 currently dead. */
1681 if (is_right) {
1682 tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1683 tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1684 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1685 } else {
1686 tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1687 tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1688 }
1689 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1690 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1691 set_cc_op(s, CC_OP_ADCOX);
1692 }
1693 }
1694
1695 /* XXX: add faster immediate = 1 case */
1696 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1697 int is_right)
1698 {
1699 gen_compute_eflags(s);
1700 assert(s->cc_op == CC_OP_EFLAGS);
1701
1702 /* load */
1703 if (op1 == OR_TMP0)
1704 gen_op_ld_v(s, ot, s->T0, s->A0);
1705 else
1706 gen_op_mov_v_reg(s, ot, s->T0, op1);
1707
1708 if (is_right) {
1709 switch (ot) {
1710 case MO_8:
1711 gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1712 break;
1713 case MO_16:
1714 gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1715 break;
1716 case MO_32:
1717 gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1718 break;
1719 #ifdef TARGET_X86_64
1720 case MO_64:
1721 gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1722 break;
1723 #endif
1724 default:
1725 tcg_abort();
1726 }
1727 } else {
1728 switch (ot) {
1729 case MO_8:
1730 gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1731 break;
1732 case MO_16:
1733 gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1734 break;
1735 case MO_32:
1736 gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1737 break;
1738 #ifdef TARGET_X86_64
1739 case MO_64:
1740 gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1741 break;
1742 #endif
1743 default:
1744 tcg_abort();
1745 }
1746 }
1747 /* store */
1748 gen_op_st_rm_T0_A0(s, ot, op1);
1749 }
1750
1751 /* XXX: add faster immediate case */
1752 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1753 bool is_right, TCGv count_in)
1754 {
1755 target_ulong mask = (ot == MO_64 ? 63 : 31);
1756 TCGv count;
1757
1758 /* load */
1759 if (op1 == OR_TMP0) {
1760 gen_op_ld_v(s, ot, s->T0, s->A0);
1761 } else {
1762 gen_op_mov_v_reg(s, ot, s->T0, op1);
1763 }
1764
1765 count = tcg_temp_new();
1766 tcg_gen_andi_tl(count, count_in, mask);
1767
1768 switch (ot) {
1769 case MO_16:
1770 /* Note: we implement the Intel behaviour for shift count > 16.
1771 This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
1772 portion by constructing it as a 32-bit value. */
1773 if (is_right) {
1774 tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1775 tcg_gen_mov_tl(s->T1, s->T0);
1776 tcg_gen_mov_tl(s->T0, s->tmp0);
1777 } else {
1778 tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1779 }
1780 /* FALLTHRU */
1781 #ifdef TARGET_X86_64
1782 case MO_32:
1783 /* Concatenate the two 32-bit values and use a 64-bit shift. */
1784 tcg_gen_subi_tl(s->tmp0, count, 1);
1785 if (is_right) {
1786 tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1787 tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1788 tcg_gen_shr_i64(s->T0, s->T0, count);
1789 } else {
1790 tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1791 tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1792 tcg_gen_shl_i64(s->T0, s->T0, count);
1793 tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1794 tcg_gen_shri_i64(s->T0, s->T0, 32);
1795 }
1796 break;
1797 #endif
1798 default:
1799 tcg_gen_subi_tl(s->tmp0, count, 1);
1800 if (is_right) {
1801 tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1802
1803 tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1804 tcg_gen_shr_tl(s->T0, s->T0, count);
1805 tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1806 } else {
1807 tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1808 if (ot == MO_16) {
1809 /* Only needed if count > 16, for Intel behaviour. */
1810 tcg_gen_subfi_tl(s->tmp4, 33, count);
1811 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1812 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1813 }
1814
1815 tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1816 tcg_gen_shl_tl(s->T0, s->T0, count);
1817 tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1818 }
1819 tcg_gen_movi_tl(s->tmp4, 0);
1820 tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1821 s->tmp4, s->T1);
1822 tcg_gen_or_tl(s->T0, s->T0, s->T1);
1823 break;
1824 }
1825
1826 /* store */
1827 gen_op_st_rm_T0_A0(s, ot, op1);
1828
1829 gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1830 tcg_temp_free(count);
1831 }
1832
1833 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1834 {
1835 if (s != OR_TMP1)
1836 gen_op_mov_v_reg(s1, ot, s1->T1, s);
1837 switch(op) {
1838 case OP_ROL:
1839 gen_rot_rm_T1(s1, ot, d, 0);
1840 break;
1841 case OP_ROR:
1842 gen_rot_rm_T1(s1, ot, d, 1);
1843 break;
1844 case OP_SHL:
1845 case OP_SHL1:
1846 gen_shift_rm_T1(s1, ot, d, 0, 0);
1847 break;
1848 case OP_SHR:
1849 gen_shift_rm_T1(s1, ot, d, 1, 0);
1850 break;
1851 case OP_SAR:
1852 gen_shift_rm_T1(s1, ot, d, 1, 1);
1853 break;
1854 case OP_RCL:
1855 gen_rotc_rm_T1(s1, ot, d, 0);
1856 break;
1857 case OP_RCR:
1858 gen_rotc_rm_T1(s1, ot, d, 1);
1859 break;
1860 }
1861 }
1862
1863 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1864 {
1865 switch(op) {
1866 case OP_ROL:
1867 gen_rot_rm_im(s1, ot, d, c, 0);
1868 break;
1869 case OP_ROR:
1870 gen_rot_rm_im(s1, ot, d, c, 1);
1871 break;
1872 case OP_SHL:
1873 case OP_SHL1:
1874 gen_shift_rm_im(s1, ot, d, c, 0, 0);
1875 break;
1876 case OP_SHR:
1877 gen_shift_rm_im(s1, ot, d, c, 1, 0);
1878 break;
1879 case OP_SAR:
1880 gen_shift_rm_im(s1, ot, d, c, 1, 1);
1881 break;
1882 default:
1883 /* currently not optimized */
1884 tcg_gen_movi_tl(s1->T1, c);
1885 gen_shift(s1, op, ot, d, OR_TMP1);
1886 break;
1887 }
1888 }
1889
1890 #define X86_MAX_INSN_LENGTH 15
1891
1892 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1893 {
1894 uint64_t pc = s->pc;
1895
1896 s->pc += num_bytes;
1897 if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1898 /* If the instruction's 16th byte is on a different page than the 1st, a
1899 * page fault on the second page wins over the general protection fault
1900 * caused by the instruction being too long.
1901 * This can happen even if the operand is only one byte long!
1902 */
1903 if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1904 volatile uint8_t unused =
1905 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1906 (void) unused;
1907 }
1908 siglongjmp(s->jmpbuf, 1);
1909 }
1910
1911 return pc;
1912 }
1913
1914 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1915 {
1916 return translator_ldub(env, advance_pc(env, s, 1));
1917 }
1918
1919 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1920 {
1921 return translator_ldsw(env, advance_pc(env, s, 2));
1922 }
1923
1924 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1925 {
1926 return translator_lduw(env, advance_pc(env, s, 2));
1927 }
1928
1929 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1930 {
1931 return translator_ldl(env, advance_pc(env, s, 4));
1932 }
1933
1934 #ifdef TARGET_X86_64
1935 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1936 {
1937 return translator_ldq(env, advance_pc(env, s, 8));
1938 }
1939 #endif
1940
1941 /* Decompose an address. */
1942
1943 typedef struct AddressParts {
1944 int def_seg;
1945 int base;
1946 int index;
1947 int scale;
1948 target_long disp;
1949 } AddressParts;
1950
1951 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1952 int modrm)
1953 {
1954 int def_seg, base, index, scale, mod, rm;
1955 target_long disp;
1956 bool havesib;
1957
1958 def_seg = R_DS;
1959 index = -1;
1960 scale = 0;
1961 disp = 0;
1962
1963 mod = (modrm >> 6) & 3;
1964 rm = modrm & 7;
1965 base = rm | REX_B(s);
1966
1967 if (mod == 3) {
1968 /* Normally filtered out earlier, but including this path
1969 simplifies multi-byte nop, as well as bndcl, bndcu, bndcn. */
1970 goto done;
1971 }
1972
1973 switch (s->aflag) {
1974 case MO_64:
1975 case MO_32:
1976 havesib = 0;
1977 if (rm == 4) {
1978 int code = x86_ldub_code(env, s);
1979 scale = (code >> 6) & 3;
1980 index = ((code >> 3) & 7) | REX_X(s);
1981 if (index == 4) {
1982 index = -1; /* no index */
1983 }
1984 base = (code & 7) | REX_B(s);
1985 havesib = 1;
1986 }
1987
1988 switch (mod) {
1989 case 0:
1990 if ((base & 7) == 5) {
1991 base = -1;
1992 disp = (int32_t)x86_ldl_code(env, s);
1993 if (CODE64(s) && !havesib) {
1994 base = -2;
1995 disp += s->pc + s->rip_offset;
1996 }
1997 }
1998 break;
1999 case 1:
2000 disp = (int8_t)x86_ldub_code(env, s);
2001 break;
2002 default:
2003 case 2:
2004 disp = (int32_t)x86_ldl_code(env, s);
2005 break;
2006 }
2007
2008 /* For correct popl handling with esp. */
2009 if (base == R_ESP && s->popl_esp_hack) {
2010 disp += s->popl_esp_hack;
2011 }
2012 if (base == R_EBP || base == R_ESP) {
2013 def_seg = R_SS;
2014 }
2015 break;
2016
2017 case MO_16:
2018 if (mod == 0) {
2019 if (rm == 6) {
2020 base = -1;
2021 disp = x86_lduw_code(env, s);
2022 break;
2023 }
2024 } else if (mod == 1) {
2025 disp = (int8_t)x86_ldub_code(env, s);
2026 } else {
2027 disp = (int16_t)x86_lduw_code(env, s);
2028 }
2029
2030 switch (rm) {
2031 case 0:
2032 base = R_EBX;
2033 index = R_ESI;
2034 break;
2035 case 1:
2036 base = R_EBX;
2037 index = R_EDI;
2038 break;
2039 case 2:
2040 base = R_EBP;
2041 index = R_ESI;
2042 def_seg = R_SS;
2043 break;
2044 case 3:
2045 base = R_EBP;
2046 index = R_EDI;
2047 def_seg = R_SS;
2048 break;
2049 case 4:
2050 base = R_ESI;
2051 break;
2052 case 5:
2053 base = R_EDI;
2054 break;
2055 case 6:
2056 base = R_EBP;
2057 def_seg = R_SS;
2058 break;
2059 default:
2060 case 7:
2061 base = R_EBX;
2062 break;
2063 }
2064 break;
2065
2066 default:
2067 tcg_abort();
2068 }
2069
2070 done:
2071 return (AddressParts){ def_seg, base, index, scale, disp };
2072 }
2073
2074 /* Compute the address, with a minimum number of TCG ops. */
2075 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2076 {
2077 TCGv ea = NULL;
2078
2079 if (a.index >= 0) {
2080 if (a.scale == 0) {
2081 ea = cpu_regs[a.index];
2082 } else {
2083 tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2084 ea = s->A0;
2085 }
2086 if (a.base >= 0) {
2087 tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2088 ea = s->A0;
2089 }
2090 } else if (a.base >= 0) {
2091 ea = cpu_regs[a.base];
2092 }
2093 if (!ea) {
2094 tcg_gen_movi_tl(s->A0, a.disp);
2095 ea = s->A0;
2096 } else if (a.disp != 0) {
2097 tcg_gen_addi_tl(s->A0, ea, a.disp);
2098 ea = s->A0;
2099 }
2100
2101 return ea;
2102 }
2103
2104 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2105 {
2106 AddressParts a = gen_lea_modrm_0(env, s, modrm);
2107 TCGv ea = gen_lea_modrm_1(s, a);
2108 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2109 }
2110
2111 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2112 {
2113 (void)gen_lea_modrm_0(env, s, modrm);
2114 }
2115
2116 /* Used for BNDCL, BNDCU, BNDCN. */
2117 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2118 TCGCond cond, TCGv_i64 bndv)
2119 {
2120 TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2121
2122 tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2123 if (!CODE64(s)) {
2124 tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2125 }
2126 tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2127 tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2128 gen_helper_bndck(cpu_env, s->tmp2_i32);
2129 }
2130
2131 /* used for LEA and MOV AX, mem */
2132 static void gen_add_A0_ds_seg(DisasContext *s)
2133 {
2134 gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2135 }
2136
2137 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2138 OR_TMP0 */
2139 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2140 MemOp ot, int reg, int is_store)
2141 {
2142 int mod, rm;
2143
2144 mod = (modrm >> 6) & 3;
2145 rm = (modrm & 7) | REX_B(s);
2146 if (mod == 3) {
2147 if (is_store) {
2148 if (reg != OR_TMP0)
2149 gen_op_mov_v_reg(s, ot, s->T0, reg);
2150 gen_op_mov_reg_v(s, ot, rm, s->T0);
2151 } else {
2152 gen_op_mov_v_reg(s, ot, s->T0, rm);
2153 if (reg != OR_TMP0)
2154 gen_op_mov_reg_v(s, ot, reg, s->T0);
2155 }
2156 } else {
2157 gen_lea_modrm(env, s, modrm);
2158 if (is_store) {
2159 if (reg != OR_TMP0)
2160 gen_op_mov_v_reg(s, ot, s->T0, reg);
2161 gen_op_st_v(s, ot, s->T0, s->A0);
2162 } else {
2163 gen_op_ld_v(s, ot, s->T0, s->A0);
2164 if (reg != OR_TMP0)
2165 gen_op_mov_reg_v(s, ot, reg, s->T0);
2166 }
2167 }
2168 }
2169
2170 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2171 {
2172 uint32_t ret;
2173
2174 switch (ot) {
2175 case MO_8:
2176 ret = x86_ldub_code(env, s);
2177 break;
2178 case MO_16:
2179 ret = x86_lduw_code(env, s);
2180 break;
2181 case MO_32:
2182 #ifdef TARGET_X86_64
2183 case MO_64:
2184 #endif
2185 ret = x86_ldl_code(env, s);
2186 break;
2187 default:
2188 tcg_abort();
2189 }
2190 return ret;
2191 }
2192
2193 static inline int insn_const_size(MemOp ot)
2194 {
2195 if (ot <= MO_32) {
2196 return 1 << ot;
2197 } else {
2198 return 4;
2199 }
2200 }
2201
2202 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2203 {
2204 #ifndef CONFIG_USER_ONLY
2205 return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2206 (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2207 #else
2208 return true;
2209 #endif
2210 }
2211
2212 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2213 {
2214 target_ulong pc = s->cs_base + eip;
2215
2216 if (use_goto_tb(s, pc)) {
2217 /* jump to same page: we can use a direct jump */
2218 tcg_gen_goto_tb(tb_num);
2219 gen_jmp_im(s, eip);
2220 tcg_gen_exit_tb(s->base.tb, tb_num);
2221 s->base.is_jmp = DISAS_NORETURN;
2222 } else {
2223 /* jump to another page */
2224 gen_jmp_im(s, eip);
2225 gen_jr(s, s->tmp0);
2226 }
2227 }
2228
2229 static inline void gen_jcc(DisasContext *s, int b,
2230 target_ulong val, target_ulong next_eip)
2231 {
2232 TCGLabel *l1, *l2;
2233
2234 if (s->jmp_opt) {
2235 l1 = gen_new_label();
2236 gen_jcc1(s, b, l1);
2237
2238 gen_goto_tb(s, 0, next_eip);
2239
2240 gen_set_label(l1);
2241 gen_goto_tb(s, 1, val);
2242 } else {
2243 l1 = gen_new_label();
2244 l2 = gen_new_label();
2245 gen_jcc1(s, b, l1);
2246
2247 gen_jmp_im(s, next_eip);
2248 tcg_gen_br(l2);
2249
2250 gen_set_label(l1);
2251 gen_jmp_im(s, val);
2252 gen_set_label(l2);
2253 gen_eob(s);
2254 }
2255 }
2256
2257 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2258 int modrm, int reg)
2259 {
2260 CCPrepare cc;
2261
2262 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2263
2264 cc = gen_prepare_cc(s, b, s->T1);
2265 if (cc.mask != -1) {
2266 TCGv t0 = tcg_temp_new();
2267 tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2268 cc.reg = t0;
2269 }
2270 if (!cc.use_reg2) {
2271 cc.reg2 = tcg_const_tl(cc.imm);
2272 }
2273
2274 tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2275 s->T0, cpu_regs[reg]);
2276 gen_op_mov_reg_v(s, ot, reg, s->T0);
2277
2278 if (cc.mask != -1) {
2279 tcg_temp_free(cc.reg);
2280 }
2281 if (!cc.use_reg2) {
2282 tcg_temp_free(cc.reg2);
2283 }
2284 }
2285
2286 static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg)
2287 {
2288 tcg_gen_ld32u_tl(s->T0, cpu_env,
2289 offsetof(CPUX86State,segs[seg_reg].selector));
2290 }
2291
2292 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg)
2293 {
2294 tcg_gen_ext16u_tl(s->T0, s->T0);
2295 tcg_gen_st32_tl(s->T0, cpu_env,
2296 offsetof(CPUX86State,segs[seg_reg].selector));
2297 tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2298 }
2299
2300 /* move T0 to seg_reg and compute if the CPU state may change. Never
2301 call this function with seg_reg == R_CS */
2302 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2303 {
2304 if (s->pe && !s->vm86) {
2305 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2306 gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2307 /* abort translation because the addseg value may change or
2308 because ss32 may change. For R_SS, translation must always
2309 stop as a special handling must be done to disable hardware
2310 interrupts for the next instruction */
2311 if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2312 s->base.is_jmp = DISAS_TOO_MANY;
2313 }
2314 } else {
2315 gen_op_movl_seg_T0_vm(s, seg_reg);
2316 if (seg_reg == R_SS) {
2317 s->base.is_jmp = DISAS_TOO_MANY;
2318 }
2319 }
2320 }
2321
2322 static inline int svm_is_rep(int prefixes)
2323 {
2324 return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2325 }
2326
2327 static inline void
2328 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2329 uint32_t type, uint64_t param)
2330 {
2331 /* no SVM activated; fast case */
2332 if (likely(!(s->flags & HF_GUEST_MASK)))
2333 return;
2334 gen_update_cc_op(s);
2335 gen_jmp_im(s, pc_start - s->cs_base);
2336 gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2337 tcg_const_i64(param));
2338 }
2339
2340 static inline void
2341 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2342 {
2343 gen_svm_check_intercept_param(s, pc_start, type, 0);
2344 }
2345
2346 static inline void gen_stack_update(DisasContext *s, int addend)
2347 {
2348 gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2349 }
2350
2351 /* Generate a push. It depends on ss32, addseg and dflag. */
2352 static void gen_push_v(DisasContext *s, TCGv val)
2353 {
2354 MemOp d_ot = mo_pushpop(s, s->dflag);
2355 MemOp a_ot = mo_stacksize(s);
2356 int size = 1 << d_ot;
2357 TCGv new_esp = s->A0;
2358
2359 tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2360
2361 if (!CODE64(s)) {
2362 if (s->addseg) {
2363 new_esp = s->tmp4;
2364 tcg_gen_mov_tl(new_esp, s->A0);
2365 }
2366 gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2367 }
2368
2369 gen_op_st_v(s, d_ot, val, s->A0);
2370 gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2371 }
2372
2373 /* two step pop is necessary for precise exceptions */
2374 static MemOp gen_pop_T0(DisasContext *s)
2375 {
2376 MemOp d_ot = mo_pushpop(s, s->dflag);
2377
2378 gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2379 gen_op_ld_v(s, d_ot, s->T0, s->A0);
2380
2381 return d_ot;
2382 }
2383
2384 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2385 {
2386 gen_stack_update(s, 1 << ot);
2387 }
2388
2389 static inline void gen_stack_A0(DisasContext *s)
2390 {
2391 gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2392 }
2393
2394 static void gen_pusha(DisasContext *s)
2395 {
2396 MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2397 MemOp d_ot = s->dflag;
2398 int size = 1 << d_ot;
2399 int i;
2400
2401 for (i = 0; i < 8; i++) {
2402 tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2403 gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2404 gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2405 }
2406
2407 gen_stack_update(s, -8 * size);
2408 }
2409
2410 static void gen_popa(DisasContext *s)
2411 {
2412 MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2413 MemOp d_ot = s->dflag;
2414 int size = 1 << d_ot;
2415 int i;
2416
2417 for (i = 0; i < 8; i++) {
2418 /* ESP is not reloaded */
2419 if (7 - i == R_ESP) {
2420 continue;
2421 }
2422 tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2423 gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2424 gen_op_ld_v(s, d_ot, s->T0, s->A0);
2425 gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2426 }
2427
2428 gen_stack_update(s, 8 * size);
2429 }
2430
2431 static void gen_enter(DisasContext *s, int esp_addend, int level)
2432 {
2433 MemOp d_ot = mo_pushpop(s, s->dflag);
2434 MemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2435 int size = 1 << d_ot;
2436
2437 /* Push BP; compute FrameTemp into T1. */
2438 tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2439 gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2440 gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2441
2442 level &= 31;
2443 if (level != 0) {
2444 int i;
2445
2446 /* Copy level-1 pointers from the previous frame. */
2447 for (i = 1; i < level; ++i) {
2448 tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2449 gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2450 gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2451
2452 tcg_gen_subi_tl(s->A0, s->T1, size * i);
2453 gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2454 gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2455 }
2456
2457 /* Push the current FrameTemp as the last level. */
2458 tcg_gen_subi_tl(s->A0, s->T1, size * level);
2459 gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2460 gen_op_st_v(s, d_ot, s->T1, s->A0);
2461 }
2462
2463 /* Copy the FrameTemp value to EBP. */
2464 gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2465
2466 /* Compute the final value of ESP. */
2467 tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2468 gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2469 }
2470
2471 static void gen_leave(DisasContext *s)
2472 {
2473 MemOp d_ot = mo_pushpop(s, s->dflag);
2474 MemOp a_ot = mo_stacksize(s);
2475
2476 gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2477 gen_op_ld_v(s, d_ot, s->T0, s->A0);
2478
2479 tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2480
2481 gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2482 gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2483 }
2484
2485 /* Similarly, except that the assumption here is that we don't decode
2486 the instruction at all -- either a missing opcode, an unimplemented
2487 feature, or just a bogus instruction stream. */
2488 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2489 {
2490 gen_illegal_opcode(s);
2491
2492 if (qemu_loglevel_mask(LOG_UNIMP)) {
2493 FILE *logfile = qemu_log_lock();
2494 target_ulong pc = s->pc_start, end = s->pc;
2495
2496 qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2497 for (; pc < end; ++pc) {
2498 qemu_log(" %02x", cpu_ldub_code(env, pc));
2499 }
2500 qemu_log("\n");
2501 qemu_log_unlock(logfile);
2502 }
2503 }
2504
2505 /* an interrupt is different from an exception because of the
2506 privilege checks */
2507 static void gen_interrupt(DisasContext *s, int intno,
2508 target_ulong cur_eip, target_ulong next_eip)
2509 {
2510 gen_update_cc_op(s);
2511 gen_jmp_im(s, cur_eip);
2512 gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2513 tcg_const_i32(next_eip - cur_eip));
2514 s->base.is_jmp = DISAS_NORETURN;
2515 }
2516
2517 static void gen_debug(DisasContext *s, target_ulong cur_eip)
2518 {
2519 gen_update_cc_op(s);
2520 gen_jmp_im(s, cur_eip);
2521 gen_helper_debug(cpu_env);
2522 s->base.is_jmp = DISAS_NORETURN;
2523 }
2524
2525 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2526 {
2527 if ((s->flags & mask) == 0) {
2528 TCGv_i32 t = tcg_temp_new_i32();
2529 tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2530 tcg_gen_ori_i32(t, t, mask);
2531 tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2532 tcg_temp_free_i32(t);
2533 s->flags |= mask;
2534 }
2535 }
2536
2537 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2538 {
2539 if (s->flags & mask) {
2540 TCGv_i32 t = tcg_temp_new_i32();
2541 tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2542 tcg_gen_andi_i32(t, t, ~mask);
2543 tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2544 tcg_temp_free_i32(t);
2545 s->flags &= ~mask;
2546 }
2547 }
2548
2549 /* Clear BND registers during legacy branches. */
2550 static void gen_bnd_jmp(DisasContext *s)
2551 {
2552 /* Clear the registers only if BND prefix is missing, MPX is enabled,
2553 and if the BNDREGs are known to be in use (non-zero) already.
2554 The helper itself will check BNDPRESERVE at runtime. */
2555 if ((s->prefix & PREFIX_REPNZ) == 0
2556 && (s->flags & HF_MPX_EN_MASK) != 0
2557 && (s->flags & HF_MPX_IU_MASK) != 0) {
2558 gen_helper_bnd_jmp(cpu_env);
2559 }
2560 }
2561
2562 /* Generate an end of block. Trace exception is also generated if needed.
2563 If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2564 If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2565 S->TF. This is used by the syscall/sysret insns. */
2566 static void
2567 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2568 {
2569 gen_update_cc_op(s);
2570
2571 /* If several instructions disable interrupts, only the first does it. */
2572 if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2573 gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2574 } else {
2575 gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2576 }
2577
2578 if (s->base.tb->flags & HF_RF_MASK) {
2579 gen_helper_reset_rf(cpu_env);
2580 }
2581 if (s->base.singlestep_enabled) {
2582 gen_helper_debug(cpu_env);
2583 } else if (recheck_tf) {
2584 gen_helper_rechecking_single_step(cpu_env);
2585 tcg_gen_exit_tb(NULL, 0);
2586 } else if (s->tf) {
2587 gen_helper_single_step(cpu_env);
2588 } else if (jr) {
2589 tcg_gen_lookup_and_goto_ptr();
2590 } else {
2591 tcg_gen_exit_tb(NULL, 0);
2592 }
2593 s->base.is_jmp = DISAS_NORETURN;
2594 }
2595
2596 static inline void
2597 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2598 {
2599 do_gen_eob_worker(s, inhibit, recheck_tf, false);
2600 }
2601
2602 /* End of block.
2603 If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. */
2604 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2605 {
2606 gen_eob_worker(s, inhibit, false);
2607 }
2608
2609 /* End of block, resetting the inhibit irq flag. */
2610 static void gen_eob(DisasContext *s)
2611 {
2612 gen_eob_worker(s, false, false);
2613 }
2614
2615 /* Jump to register */
2616 static void gen_jr(DisasContext *s, TCGv dest)
2617 {
2618 do_gen_eob_worker(s, false, false, true);
2619 }
2620
2621 /* generate a jump to eip. No segment change must happen before as a
2622 direct call to the next block may occur */
2623 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2624 {
2625 gen_update_cc_op(s);
2626 set_cc_op(s, CC_OP_DYNAMIC);
2627 if (s->jmp_opt) {
2628 gen_goto_tb(s, tb_num, eip);
2629 } else {
2630 gen_jmp_im(s, eip);
2631 gen_eob(s);
2632 }
2633 }
2634
2635 static void gen_jmp(DisasContext *s, target_ulong eip)
2636 {
2637 gen_jmp_tb(s, eip, 0);
2638 }
2639
2640 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2641 {
2642 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2643 tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2644 }
2645
2646 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2647 {
2648 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2649 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2650 }
2651
2652 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2653 {
2654 int mem_index = s->mem_index;
2655 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2656 tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2657 tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2658 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2659 tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2660 }
2661
2662 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2663 {
2664 int mem_index = s->mem_index;
2665 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2666 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2667 tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2668 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2669 tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2670 }
2671
2672 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2673 {
2674 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2675 tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2676 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2677 tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2678 }
2679
2680 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2681 {
2682 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2683 tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2684 }
2685
2686 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2687 {
2688 tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2689 tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2690 }
2691
2692 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2693 {
2694 tcg_gen_movi_i64(s->tmp1_i64, 0);
2695 tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2696 }
2697
2698 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2699 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2700 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2701 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2702 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2703 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2704 TCGv_i32 val);
2705 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2706 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2707 TCGv val);
2708
2709 #define SSE_SPECIAL ((void *)1)
2710 #define SSE_DUMMY ((void *)2)
2711
2712 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2713 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2714 gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2715
2716 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2717 /* 3DNow! extensions */
2718 [0x0e] = { SSE_DUMMY }, /* femms */
2719 [0x0f] = { SSE_DUMMY }, /* pf... */
2720 /* pure SSE operations */
2721 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2722 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2723 [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2724 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2725 [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2726 [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2727 [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
2728 [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
2729
2730 [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2731 [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2732 [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2733 [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2734 [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2735 [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2736 [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2737 [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2738 [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2739 [0x51] = SSE_FOP(sqrt),
2740 [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2741 [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2742 [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2743 [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2744 [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2745 [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2746 [0x58] = SSE_FOP(add),
2747 [0x59] = SSE_FOP(mul),
2748 [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2749 gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2750 [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2751 [0x5c] = SSE_FOP(sub),
2752 [0x5d] = SSE_FOP(min),
2753 [0x5e] = SSE_FOP(div),
2754 [0x5f] = SSE_FOP(max),
2755
2756 [0xc2] = SSE_FOP(cmpeq),
2757 [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2758 (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2759
2760 /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */
2761 [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2762 [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2763
2764 /* MMX ops and their SSE extensions */
2765 [0x60] = MMX_OP2(punpcklbw),
2766 [0x61] = MMX_OP2(punpcklwd),
2767 [0x62] = MMX_OP2(punpckldq),
2768 [0x63] = MMX_OP2(packsswb),
2769 [0x64] = MMX_OP2(pcmpgtb),
2770 [0x65] = MMX_OP2(pcmpgtw),
2771 [0x66] = MMX_OP2(pcmpgtl),
2772 [0x67] = MMX_OP2(packuswb),
2773 [0x68] = MMX_OP2(punpckhbw),
2774 [0x69] = MMX_OP2(punpckhwd),
2775 [0x6a] = MMX_OP2(punpckhdq),
2776 [0x6b] = MMX_OP2(packssdw),
2777 [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2778 [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2779 [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2780 [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2781 [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2782 (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2783 (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2784 (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2785 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2786 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2787 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2788 [0x74] = MMX_OP2(pcmpeqb),
2789 [0x75] = MMX_OP2(pcmpeqw),
2790 [0x76] = MMX_OP2(pcmpeql),
2791 [0x77] = { SSE_DUMMY }, /* emms */
2792 [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2793 [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2794 [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2795 [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2796 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2797 [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2798 [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2799 [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2800 [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2801 [0xd1] = MMX_OP2(psrlw),
2802 [0xd2] = MMX_OP2(psrld),
2803 [0xd3] = MMX_OP2(psrlq),
2804 [0xd4] = MMX_OP2(paddq),
2805 [0xd5] = MMX_OP2(pmullw),
2806 [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2807 [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2808 [0xd8] = MMX_OP2(psubusb),
2809 [0xd9] = MMX_OP2(psubusw),
2810 [0xda] = MMX_OP2(pminub),
2811 [0xdb] = MMX_OP2(pand),
2812 [0xdc] = MMX_OP2(paddusb),
2813 [0xdd] = MMX_OP2(paddusw),
2814 [0xde] = MMX_OP2(pmaxub),
2815 [0xdf] = MMX_OP2(pandn),
2816 [0xe0] = MMX_OP2(pavgb),
2817 [0xe1] = MMX_OP2(psraw),
2818 [0xe2] = MMX_OP2(psrad),
2819 [0xe3] = MMX_OP2(pavgw),
2820 [0xe4] = MMX_OP2(pmulhuw),
2821 [0xe5] = MMX_OP2(pmulhw),
2822 [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2823 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
2824 [0xe8] = MMX_OP2(psubsb),
2825 [0xe9] = MMX_OP2(psubsw),
2826 [0xea] = MMX_OP2(pminsw),
2827 [0xeb] = MMX_OP2(por),
2828 [0xec] = MMX_OP2(paddsb),
2829 [0xed] = MMX_OP2(paddsw),
2830 [0xee] = MMX_OP2(pmaxsw),
2831 [0xef] = MMX_OP2(pxor),
2832 [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2833 [0xf1] = MMX_OP2(psllw),
2834 [0xf2] = MMX_OP2(pslld),
2835 [0xf3] = MMX_OP2(psllq),
2836 [0xf4] = MMX_OP2(pmuludq),
2837 [0xf5] = MMX_OP2(pmaddwd),
2838 [0xf6] = MMX_OP2(psadbw),
2839 [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2840 (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2841 [0xf8] = MMX_OP2(psubb),
2842 [0xf9] = MMX_OP2(psubw),
2843 [0xfa] = MMX_OP2(psubl),
2844 [0xfb] = MMX_OP2(psubq),
2845 [0xfc] = MMX_OP2(paddb),
2846 [0xfd] = MMX_OP2(paddw),
2847 [0xfe] = MMX_OP2(paddl),
2848 };
2849
2850 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2851 [0 + 2] = MMX_OP2(psrlw),
2852 [0 + 4] = MMX_OP2(psraw),
2853 [0 + 6] = MMX_OP2(psllw),
2854 [8 + 2] = MMX_OP2(psrld),
2855 [8 + 4] = MMX_OP2(psrad),
2856 [8 + 6] = MMX_OP2(pslld),
2857 [16 + 2] = MMX_OP2(psrlq),
2858 [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2859 [16 + 6] = MMX_OP2(psllq),
2860 [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2861 };
2862
2863 static const SSEFunc_0_epi sse_op_table3ai[] = {
2864 gen_helper_cvtsi2ss,
2865 gen_helper_cvtsi2sd
2866 };
2867
2868 #ifdef TARGET_X86_64
2869 static const SSEFunc_0_epl sse_op_table3aq[] = {
2870 gen_helper_cvtsq2ss,
2871 gen_helper_cvtsq2sd
2872 };
2873 #endif
2874
2875 static const SSEFunc_i_ep sse_op_table3bi[] = {
2876 gen_helper_cvttss2si,
2877 gen_helper_cvtss2si,
2878 gen_helper_cvttsd2si,
2879 gen_helper_cvtsd2si
2880 };
2881
2882 #ifdef TARGET_X86_64
2883 static const SSEFunc_l_ep sse_op_table3bq[] = {
2884 gen_helper_cvttss2sq,
2885 gen_helper_cvtss2sq,
2886 gen_helper_cvttsd2sq,
2887 gen_helper_cvtsd2sq
2888 };
2889 #endif
2890
2891 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2892 SSE_FOP(cmpeq),
2893 SSE_FOP(cmplt),
2894 SSE_FOP(cmple),
2895 SSE_FOP(cmpunord),
2896 SSE_FOP(cmpneq),
2897 SSE_FOP(cmpnlt),
2898 SSE_FOP(cmpnle),
2899 SSE_FOP(cmpord),
2900 };
2901
2902 static const SSEFunc_0_epp sse_op_table5[256] = {
2903 [0x0c] = gen_helper_pi2fw,
2904 [0x0d] = gen_helper_pi2fd,
2905 [0x1c] = gen_helper_pf2iw,
2906 [0x1d] = gen_helper_pf2id,
2907 [0x8a] = gen_helper_pfnacc,
2908 [0x8e] = gen_helper_pfpnacc,
2909 [0x90] = gen_helper_pfcmpge,
2910 [0x94] = gen_helper_pfmin,
2911 [0x96] = gen_helper_pfrcp,
2912 [0x97] = gen_helper_pfrsqrt,
2913 [0x9a] = gen_helper_pfsub,
2914 [0x9e] = gen_helper_pfadd,
2915 [0xa0] = gen_helper_pfcmpgt,
2916 [0xa4] = gen_helper_pfmax,
2917 [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2918 [0xa7] = gen_helper_movq, /* pfrsqit1 */
2919 [0xaa] = gen_helper_pfsubr,
2920 [0xae] = gen_helper_pfacc,
2921 [0xb0] = gen_helper_pfcmpeq,
2922 [0xb4] = gen_helper_pfmul,
2923 [0xb6] = gen_helper_movq, /* pfrcpit2 */
2924 [0xb7] = gen_helper_pmulhrw_mmx,
2925 [0xbb] = gen_helper_pswapd,
2926 [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2927 };
2928
2929 struct SSEOpHelper_epp {
2930 SSEFunc_0_epp op[2];
2931 uint32_t ext_mask;
2932 };
2933
2934 struct SSEOpHelper_eppi {
2935 SSEFunc_0_eppi op[2];
2936 uint32_t ext_mask;
2937 };
2938
2939 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2940 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2941 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2942 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2943 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2944 CPUID_EXT_PCLMULQDQ }
2945 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2946
2947 static const struct SSEOpHelper_epp sse_op_table6[256] = {
2948 [0x00] = SSSE3_OP(pshufb),
2949 [0x01] = SSSE3_OP(phaddw),
2950 [0x02] = SSSE3_OP(phaddd),
2951 [0x03] = SSSE3_OP(phaddsw),
2952 [0x04] = SSSE3_OP(pmaddubsw),
2953 [0x05] = SSSE3_OP(phsubw),
2954 [0x06] = SSSE3_OP(phsubd),
2955 [0x07] = SSSE3_OP(phsubsw),
2956 [0x08] = SSSE3_OP(psignb),
2957 [0x09] = SSSE3_OP(psignw),
2958 [0x0a] = SSSE3_OP(psignd),
2959 [0x0b] = SSSE3_OP(pmulhrsw),
2960 [0x10] = SSE41_OP(pblendvb),
2961 [0x14] = SSE41_OP(blendvps),
2962 [0x15] = SSE41_OP(blendvpd),
2963 [0x17] = SSE41_OP(ptest),
2964 [0x1c] = SSSE3_OP(pabsb),
2965 [0x1d] = SSSE3_OP(pabsw),
2966 [0x1e] = SSSE3_OP(pabsd),
2967 [0x20] = SSE41_OP(pmovsxbw),
2968 [0x21] = SSE41_OP(pmovsxbd),
2969 [0x22] = SSE41_OP(pmovsxbq),
2970 [0x23] = SSE41_OP(pmovsxwd),
2971 [0x24] = SSE41_OP(pmovsxwq),
2972 [0x25] = SSE41_OP(pmovsxdq),
2973 [0x28] = SSE41_OP(pmuldq),
2974 [0x29] = SSE41_OP(pcmpeqq),
2975 [0x2a] = SSE41_SPECIAL, /* movntqda */
2976 [0x2b] = SSE41_OP(packusdw),
2977 [0x30] = SSE41_OP(pmovzxbw),
2978 [0x31] = SSE41_OP(pmovzxbd),
2979 [0x32] = SSE41_OP(pmovzxbq),
2980 [0x33] = SSE41_OP(pmovzxwd),
2981 [0x34] = SSE41_OP(pmovzxwq),
2982 [0x35] = SSE41_OP(pmovzxdq),
2983 [0x37] = SSE42_OP(pcmpgtq),
2984 [0x38] = SSE41_OP(pminsb),
2985 [0x39] = SSE41_OP(pminsd),
2986 [0x3a] = SSE41_OP(pminuw),
2987 [0x3b] = SSE41_OP(pminud),
2988 [0x3c] = SSE41_OP(pmaxsb),
2989 [0x3d] = SSE41_OP(pmaxsd),
2990 [0x3e] = SSE41_OP(pmaxuw),
2991 [0x3f] = SSE41_OP(pmaxud),
2992 [0x40] = SSE41_OP(pmulld),
2993 [0x41] = SSE41_OP(phminposuw),
2994 [0xdb] = AESNI_OP(aesimc),
2995 [0xdc] = AESNI_OP(aesenc),
2996 [0xdd] = AESNI_OP(aesenclast),
2997 [0xde] = AESNI_OP(aesdec),
2998 [0xdf] = AESNI_OP(aesdeclast),
2999 };
3000
3001 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3002 [0x08] = SSE41_OP(roundps),
3003 [0x09] = SSE41_OP(roundpd),
3004 [0x0a] = SSE41_OP(roundss),
3005 [0x0b] = SSE41_OP(roundsd),
3006 [0x0c] = SSE41_OP(blendps),
3007 [0x0d] = SSE41_OP(blendpd),
3008 [0x0e] = SSE41_OP(pblendw),
3009 [0x0f] = SSSE3_OP(palignr),
3010 [0x14] = SSE41_SPECIAL, /* pextrb */
3011 [0x15] = SSE41_SPECIAL, /* pextrw */
3012 [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3013 [0x17] = SSE41_SPECIAL, /* extractps */
3014 [0x20] = SSE41_SPECIAL, /* pinsrb */
3015 [0x21] = SSE41_SPECIAL, /* insertps */
3016 [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3017 [0x40] = SSE41_OP(dpps),
3018 [0x41] = SSE41_OP(dppd),
3019 [0x42] = SSE41_OP(mpsadbw),
3020 [0x44] = PCLMULQDQ_OP(pclmulqdq),
3021 [0x60] = SSE42_OP(pcmpestrm),
3022 [0x61] = SSE42_OP(pcmpestri),
3023 [0x62] = SSE42_OP(pcmpistrm),
3024 [0x63] = SSE42_OP(pcmpistri),
3025 [0xdf] = AESNI_OP(aeskeygenassist),
3026 };
3027
3028 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3029 target_ulong pc_start, int rex_r)
3030 {
3031 int b1, op1_offset, op2_offset, is_xmm, val;
3032 int modrm, mod, rm, reg;
3033 SSEFunc_0_epp sse_fn_epp;
3034 SSEFunc_0_eppi sse_fn_eppi;
3035 SSEFunc_0_ppi sse_fn_ppi;
3036 SSEFunc_0_eppt sse_fn_eppt;
3037 MemOp ot;
3038
3039 b &= 0xff;
3040 if (s->prefix & PREFIX_DATA)
3041 b1 = 1;
3042 else if (s->prefix & PREFIX_REPZ)
3043 b1 = 2;
3044 else if (s->prefix & PREFIX_REPNZ)
3045 b1 = 3;
3046 else
3047 b1 = 0;
3048 sse_fn_epp = sse_op_table1[b][b1];
3049 if (!sse_fn_epp) {
3050 goto unknown_op;
3051 }
3052 if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3053 is_xmm = 1;
3054 } else {
3055 if (b1 == 0) {
3056 /* MMX case */
3057 is_xmm = 0;
3058 } else {
3059 is_xmm = 1;
3060 }
3061 }
3062 /* simple MMX/SSE operation */
3063 if (s->flags & HF_TS_MASK) {
3064 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3065 return;
3066 }
3067 if (s->flags & HF_EM_MASK) {
3068 illegal_op:
3069 gen_illegal_opcode(s);
3070 return;
3071 }
3072 if (is_xmm
3073 && !(s->flags & HF_OSFXSR_MASK)
3074 && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
3075 goto unknown_op;
3076 }
3077 if (b == 0x0e) {
3078 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3079 /* If we were fully decoding this we might use illegal_op. */
3080 goto unknown_op;
3081 }
3082 /* femms */
3083 gen_helper_emms(cpu_env);
3084 return;
3085 }
3086 if (b == 0x77) {
3087 /* emms */
3088 gen_helper_emms(cpu_env);
3089 return;
3090 }
3091 /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3092 the static cpu state) */
3093 if (!is_xmm) {
3094 gen_helper_enter_mmx(cpu_env);
3095 }
3096
3097 modrm = x86_ldub_code(env, s);
3098 reg = ((modrm >> 3) & 7);
3099 if (is_xmm)
3100 reg |= rex_r;
3101 mod = (modrm >> 6) & 3;
3102 if (sse_fn_epp == SSE_SPECIAL) {
3103 b |= (b1 << 8);
3104 switch(b) {
3105 case 0x0e7: /* movntq */
3106 if (mod == 3) {
3107 goto illegal_op;
3108 }
3109 gen_lea_modrm(env, s, modrm);
3110 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3111 break;
3112 case 0x1e7: /* movntdq */
3113 case 0x02b: /* movntps */
3114 case 0x12b: /* movntps */
3115 if (mod == 3)
3116 goto illegal_op;
3117 gen_lea_modrm(env, s, modrm);
3118 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3119 break;
3120 case 0x3f0: /* lddqu */
3121 if (mod == 3)
3122 goto illegal_op;
3123 gen_lea_modrm(env, s, modrm);
3124 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3125 break;
3126 case 0x22b: /* movntss */
3127 case 0x32b: /* movntsd */
3128 if (mod == 3)
3129 goto illegal_op;
3130 gen_lea_modrm(env, s, modrm);
3131 if (b1 & 1) {
3132 gen_stq_env_A0(s, offsetof(CPUX86State,
3133 xmm_regs[reg].ZMM_Q(0)));
3134 } else {
3135 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3136 xmm_regs[reg].ZMM_L(0)));
3137 gen_op_st_v(s, MO_32, s->T0, s->A0);
3138 }
3139 break;
3140 case 0x6e: /* movd mm, ea */
3141 #ifdef TARGET_X86_64
3142 if (s->dflag == MO_64) {
3143 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3144 tcg_gen_st_tl(s->T0, cpu_env,
3145 offsetof(CPUX86State, fpregs[reg].mmx));
3146 } else
3147 #endif
3148 {
3149 gen_ldst_modrm(env, s