4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "qemu/host-utils.h"
23 #include "disas/disas.h"
25 #include "exec/cpu_ldst.h"
27 #include "exec/helper-proto.h"
28 #include "exec/helper-gen.h"
30 #include "trace-tcg.h"
34 #define PREFIX_REPZ 0x01
35 #define PREFIX_REPNZ 0x02
36 #define PREFIX_LOCK 0x04
37 #define PREFIX_DATA 0x08
38 #define PREFIX_ADR 0x10
39 #define PREFIX_VEX 0x20
42 #define CODE64(s) ((s)->code64)
43 #define REX_X(s) ((s)->rex_x)
44 #define REX_B(s) ((s)->rex_b)
59 /* For a switch indexed by MODRM, match all memory operands for a given OP. */
60 #define CASE_MEM_OP(OP) \
61 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
62 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
63 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
65 //#define MACRO_TEST 1
67 /* global register indexes */
68 static TCGv_ptr cpu_env
;
70 static TCGv cpu_cc_dst
, cpu_cc_src
, cpu_cc_src2
, cpu_cc_srcT
;
71 static TCGv_i32 cpu_cc_op
;
72 static TCGv cpu_regs
[CPU_NB_REGS
];
73 static TCGv cpu_seg_base
[6];
75 static TCGv cpu_T0
, cpu_T1
;
76 /* local register indexes (only used inside old micro ops) */
77 static TCGv cpu_tmp0
, cpu_tmp4
;
78 static TCGv_ptr cpu_ptr0
, cpu_ptr1
;
79 static TCGv_i32 cpu_tmp2_i32
, cpu_tmp3_i32
;
80 static TCGv_i64 cpu_tmp1_i64
;
82 #include "exec/gen-icount.h"
85 static int x86_64_hregs
;
88 typedef struct DisasContext
{
89 /* current insn context */
90 int override
; /* -1 if no override */
94 target_ulong pc
; /* pc = eip + cs_base */
95 int is_jmp
; /* 1 = means jump (stop translation), 2 means CPU
96 static state change (stop translation) */
97 /* current block context */
98 target_ulong cs_base
; /* base of CS segment */
99 int pe
; /* protected mode */
100 int code32
; /* 32 bit code segment */
102 int lma
; /* long mode active */
103 int code64
; /* 64 bit code segment */
106 int vex_l
; /* vex vector length */
107 int vex_v
; /* vex vvvv register, without 1's compliment. */
108 int ss32
; /* 32 bit stack segment */
109 CCOp cc_op
; /* current CC operation */
111 int addseg
; /* non zero if either DS/ES/SS have a non zero base */
112 int f_st
; /* currently unused */
113 int vm86
; /* vm86 mode */
116 int tf
; /* TF cpu flag */
117 int singlestep_enabled
; /* "hardware" single step enabled */
118 int jmp_opt
; /* use direct block chaining for direct jumps */
119 int repz_opt
; /* optimize jumps within repz instructions */
120 int mem_index
; /* select memory access functions */
121 uint64_t flags
; /* all execution flags */
122 struct TranslationBlock
*tb
;
123 int popl_esp_hack
; /* for correct popl with esp base handling */
124 int rip_offset
; /* only used in x86_64, but left for simplicity */
126 int cpuid_ext_features
;
127 int cpuid_ext2_features
;
128 int cpuid_ext3_features
;
129 int cpuid_7_0_ebx_features
;
132 static void gen_eob(DisasContext
*s
);
133 static void gen_jmp(DisasContext
*s
, target_ulong eip
);
134 static void gen_jmp_tb(DisasContext
*s
, target_ulong eip
, int tb_num
);
135 static void gen_op(DisasContext
*s1
, int op
, TCGMemOp ot
, int d
);
137 /* i386 arith/logic operations */
157 OP_SHL1
, /* undocumented */
173 /* I386 int registers */
174 OR_EAX
, /* MUST be even numbered */
183 OR_TMP0
= 16, /* temporary operand register */
185 OR_A0
, /* temporary register used when doing address evaluation */
195 /* Bit set if the global variable is live after setting CC_OP to X. */
196 static const uint8_t cc_op_live
[CC_OP_NB
] = {
197 [CC_OP_DYNAMIC
] = USES_CC_DST
| USES_CC_SRC
| USES_CC_SRC2
,
198 [CC_OP_EFLAGS
] = USES_CC_SRC
,
199 [CC_OP_MULB
... CC_OP_MULQ
] = USES_CC_DST
| USES_CC_SRC
,
200 [CC_OP_ADDB
... CC_OP_ADDQ
] = USES_CC_DST
| USES_CC_SRC
,
201 [CC_OP_ADCB
... CC_OP_ADCQ
] = USES_CC_DST
| USES_CC_SRC
| USES_CC_SRC2
,
202 [CC_OP_SUBB
... CC_OP_SUBQ
] = USES_CC_DST
| USES_CC_SRC
| USES_CC_SRCT
,
203 [CC_OP_SBBB
... CC_OP_SBBQ
] = USES_CC_DST
| USES_CC_SRC
| USES_CC_SRC2
,
204 [CC_OP_LOGICB
... CC_OP_LOGICQ
] = USES_CC_DST
,
205 [CC_OP_INCB
... CC_OP_INCQ
] = USES_CC_DST
| USES_CC_SRC
,
206 [CC_OP_DECB
... CC_OP_DECQ
] = USES_CC_DST
| USES_CC_SRC
,
207 [CC_OP_SHLB
... CC_OP_SHLQ
] = USES_CC_DST
| USES_CC_SRC
,
208 [CC_OP_SARB
... CC_OP_SARQ
] = USES_CC_DST
| USES_CC_SRC
,
209 [CC_OP_BMILGB
... CC_OP_BMILGQ
] = USES_CC_DST
| USES_CC_SRC
,
210 [CC_OP_ADCX
] = USES_CC_DST
| USES_CC_SRC
,
211 [CC_OP_ADOX
] = USES_CC_SRC
| USES_CC_SRC2
,
212 [CC_OP_ADCOX
] = USES_CC_DST
| USES_CC_SRC
| USES_CC_SRC2
,
216 static void set_cc_op(DisasContext
*s
, CCOp op
)
220 if (s
->cc_op
== op
) {
224 /* Discard CC computation that will no longer be used. */
225 dead
= cc_op_live
[s
->cc_op
] & ~cc_op_live
[op
];
226 if (dead
& USES_CC_DST
) {
227 tcg_gen_discard_tl(cpu_cc_dst
);
229 if (dead
& USES_CC_SRC
) {
230 tcg_gen_discard_tl(cpu_cc_src
);
232 if (dead
& USES_CC_SRC2
) {
233 tcg_gen_discard_tl(cpu_cc_src2
);
235 if (dead
& USES_CC_SRCT
) {
236 tcg_gen_discard_tl(cpu_cc_srcT
);
239 if (op
== CC_OP_DYNAMIC
) {
240 /* The DYNAMIC setting is translator only, and should never be
241 stored. Thus we always consider it clean. */
242 s
->cc_op_dirty
= false;
244 /* Discard any computed CC_OP value (see shifts). */
245 if (s
->cc_op
== CC_OP_DYNAMIC
) {
246 tcg_gen_discard_i32(cpu_cc_op
);
248 s
->cc_op_dirty
= true;
253 static void gen_update_cc_op(DisasContext
*s
)
255 if (s
->cc_op_dirty
) {
256 tcg_gen_movi_i32(cpu_cc_op
, s
->cc_op
);
257 s
->cc_op_dirty
= false;
263 #define NB_OP_SIZES 4
265 #else /* !TARGET_X86_64 */
267 #define NB_OP_SIZES 3
269 #endif /* !TARGET_X86_64 */
271 #if defined(HOST_WORDS_BIGENDIAN)
272 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
273 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
274 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
275 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
276 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
278 #define REG_B_OFFSET 0
279 #define REG_H_OFFSET 1
280 #define REG_W_OFFSET 0
281 #define REG_L_OFFSET 0
282 #define REG_LH_OFFSET 4
285 /* In instruction encodings for byte register accesses the
286 * register number usually indicates "low 8 bits of register N";
287 * however there are some special cases where N 4..7 indicates
288 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
289 * true for this special case, false otherwise.
291 static inline bool byte_reg_is_xH(int reg
)
297 if (reg
>= 8 || x86_64_hregs
) {
304 /* Select the size of a push/pop operation. */
305 static inline TCGMemOp
mo_pushpop(DisasContext
*s
, TCGMemOp ot
)
308 return ot
== MO_16 ? MO_16
: MO_64
;
314 /* Select the size of the stack pointer. */
315 static inline TCGMemOp
mo_stacksize(DisasContext
*s
)
317 return CODE64(s
) ? MO_64
: s
->ss32 ? MO_32
: MO_16
;
320 /* Select only size 64 else 32. Used for SSE operand sizes. */
321 static inline TCGMemOp
mo_64_32(TCGMemOp ot
)
324 return ot
== MO_64 ? MO_64
: MO_32
;
330 /* Select size 8 if lsb of B is clear, else OT. Used for decoding
331 byte vs word opcodes. */
332 static inline TCGMemOp
mo_b_d(int b
, TCGMemOp ot
)
334 return b
& 1 ? ot
: MO_8
;
337 /* Select size 8 if lsb of B is clear, else OT capped at 32.
338 Used for decoding operand size of port opcodes. */
339 static inline TCGMemOp
mo_b_d32(int b
, TCGMemOp ot
)
341 return b
& 1 ?
(ot
== MO_16 ? MO_16
: MO_32
) : MO_8
;
344 static void gen_op_mov_reg_v(TCGMemOp ot
, int reg
, TCGv t0
)
348 if (!byte_reg_is_xH(reg
)) {
349 tcg_gen_deposit_tl(cpu_regs
[reg
], cpu_regs
[reg
], t0
, 0, 8);
351 tcg_gen_deposit_tl(cpu_regs
[reg
- 4], cpu_regs
[reg
- 4], t0
, 8, 8);
355 tcg_gen_deposit_tl(cpu_regs
[reg
], cpu_regs
[reg
], t0
, 0, 16);
358 /* For x86_64, this sets the higher half of register to zero.
359 For i386, this is equivalent to a mov. */
360 tcg_gen_ext32u_tl(cpu_regs
[reg
], t0
);
364 tcg_gen_mov_tl(cpu_regs
[reg
], t0
);
372 static inline void gen_op_mov_v_reg(TCGMemOp ot
, TCGv t0
, int reg
)
374 if (ot
== MO_8
&& byte_reg_is_xH(reg
)) {
375 tcg_gen_shri_tl(t0
, cpu_regs
[reg
- 4], 8);
376 tcg_gen_ext8u_tl(t0
, t0
);
378 tcg_gen_mov_tl(t0
, cpu_regs
[reg
]);
382 static void gen_add_A0_im(DisasContext
*s
, int val
)
384 tcg_gen_addi_tl(cpu_A0
, cpu_A0
, val
);
386 tcg_gen_ext32u_tl(cpu_A0
, cpu_A0
);
390 static inline void gen_op_jmp_v(TCGv dest
)
392 tcg_gen_st_tl(dest
, cpu_env
, offsetof(CPUX86State
, eip
));
395 static inline void gen_op_add_reg_im(TCGMemOp size
, int reg
, int32_t val
)
397 tcg_gen_addi_tl(cpu_tmp0
, cpu_regs
[reg
], val
);
398 gen_op_mov_reg_v(size
, reg
, cpu_tmp0
);
401 static inline void gen_op_add_reg_T0(TCGMemOp size
, int reg
)
403 tcg_gen_add_tl(cpu_tmp0
, cpu_regs
[reg
], cpu_T0
);
404 gen_op_mov_reg_v(size
, reg
, cpu_tmp0
);
407 static inline void gen_op_ld_v(DisasContext
*s
, int idx
, TCGv t0
, TCGv a0
)
409 tcg_gen_qemu_ld_tl(t0
, a0
, s
->mem_index
, idx
| MO_LE
);
412 static inline void gen_op_st_v(DisasContext
*s
, int idx
, TCGv t0
, TCGv a0
)
414 tcg_gen_qemu_st_tl(t0
, a0
, s
->mem_index
, idx
| MO_LE
);
417 static inline void gen_op_st_rm_T0_A0(DisasContext
*s
, int idx
, int d
)
420 gen_op_st_v(s
, idx
, cpu_T0
, cpu_A0
);
422 gen_op_mov_reg_v(idx
, d
, cpu_T0
);
426 static inline void gen_jmp_im(target_ulong pc
)
428 tcg_gen_movi_tl(cpu_tmp0
, pc
);
429 gen_op_jmp_v(cpu_tmp0
);
432 /* Compute SEG:REG into A0. SEG is selected from the override segment
433 (OVR_SEG) and the default segment (DEF_SEG). OVR_SEG may be -1 to
434 indicate no override. */
435 static void gen_lea_v_seg(DisasContext
*s
, TCGMemOp aflag
, TCGv a0
,
436 int def_seg
, int ovr_seg
)
442 tcg_gen_mov_tl(cpu_A0
, a0
);
453 tcg_gen_ext32u_tl(cpu_A0
, a0
);
463 tcg_gen_ext16u_tl(cpu_A0
, a0
);
464 /* ADDSEG will only be false in 16-bit mode for LEA. */
475 TCGv seg
= cpu_seg_base
[ovr_seg
];
477 if (aflag
== MO_64
) {
478 tcg_gen_add_tl(cpu_A0
, a0
, seg
);
479 } else if (CODE64(s
)) {
480 tcg_gen_ext32u_tl(cpu_A0
, a0
);
481 tcg_gen_add_tl(cpu_A0
, cpu_A0
, seg
);
483 tcg_gen_add_tl(cpu_A0
, a0
, seg
);
484 tcg_gen_ext32u_tl(cpu_A0
, cpu_A0
);
489 static inline void gen_string_movl_A0_ESI(DisasContext
*s
)
491 gen_lea_v_seg(s
, s
->aflag
, cpu_regs
[R_ESI
], R_DS
, s
->override
);
494 static inline void gen_string_movl_A0_EDI(DisasContext
*s
)
496 gen_lea_v_seg(s
, s
->aflag
, cpu_regs
[R_EDI
], R_ES
, -1);
499 static inline void gen_op_movl_T0_Dshift(TCGMemOp ot
)
501 tcg_gen_ld32s_tl(cpu_T0
, cpu_env
, offsetof(CPUX86State
, df
));
502 tcg_gen_shli_tl(cpu_T0
, cpu_T0
, ot
);
505 static TCGv
gen_ext_tl(TCGv dst
, TCGv src
, TCGMemOp size
, bool sign
)
510 tcg_gen_ext8s_tl(dst
, src
);
512 tcg_gen_ext8u_tl(dst
, src
);
517 tcg_gen_ext16s_tl(dst
, src
);
519 tcg_gen_ext16u_tl(dst
, src
);
525 tcg_gen_ext32s_tl(dst
, src
);
527 tcg_gen_ext32u_tl(dst
, src
);
536 static void gen_extu(TCGMemOp ot
, TCGv reg
)
538 gen_ext_tl(reg
, reg
, ot
, false);
541 static void gen_exts(TCGMemOp ot
, TCGv reg
)
543 gen_ext_tl(reg
, reg
, ot
, true);
546 static inline void gen_op_jnz_ecx(TCGMemOp size
, TCGLabel
*label1
)
548 tcg_gen_mov_tl(cpu_tmp0
, cpu_regs
[R_ECX
]);
549 gen_extu(size
, cpu_tmp0
);
550 tcg_gen_brcondi_tl(TCG_COND_NE
, cpu_tmp0
, 0, label1
);
553 static inline void gen_op_jz_ecx(TCGMemOp size
, TCGLabel
*label1
)
555 tcg_gen_mov_tl(cpu_tmp0
, cpu_regs
[R_ECX
]);
556 gen_extu(size
, cpu_tmp0
);
557 tcg_gen_brcondi_tl(TCG_COND_EQ
, cpu_tmp0
, 0, label1
);
560 static void gen_helper_in_func(TCGMemOp ot
, TCGv v
, TCGv_i32 n
)
564 gen_helper_inb(v
, cpu_env
, n
);
567 gen_helper_inw(v
, cpu_env
, n
);
570 gen_helper_inl(v
, cpu_env
, n
);
577 static void gen_helper_out_func(TCGMemOp ot
, TCGv_i32 v
, TCGv_i32 n
)
581 gen_helper_outb(cpu_env
, v
, n
);
584 gen_helper_outw(cpu_env
, v
, n
);
587 gen_helper_outl(cpu_env
, v
, n
);
594 static void gen_check_io(DisasContext
*s
, TCGMemOp ot
, target_ulong cur_eip
,
597 target_ulong next_eip
;
599 if (s
->pe
&& (s
->cpl
> s
->iopl
|| s
->vm86
)) {
600 tcg_gen_trunc_tl_i32(cpu_tmp2_i32
, cpu_T0
);
603 gen_helper_check_iob(cpu_env
, cpu_tmp2_i32
);
606 gen_helper_check_iow(cpu_env
, cpu_tmp2_i32
);
609 gen_helper_check_iol(cpu_env
, cpu_tmp2_i32
);
615 if(s
->flags
& HF_SVMI_MASK
) {
618 svm_flags
|= (1 << (4 + ot
));
619 next_eip
= s
->pc
- s
->cs_base
;
620 tcg_gen_trunc_tl_i32(cpu_tmp2_i32
, cpu_T0
);
621 gen_helper_svm_check_io(cpu_env
, cpu_tmp2_i32
,
622 tcg_const_i32(svm_flags
),
623 tcg_const_i32(next_eip
- cur_eip
));
627 static inline void gen_movs(DisasContext
*s
, TCGMemOp ot
)
629 gen_string_movl_A0_ESI(s
);
630 gen_op_ld_v(s
, ot
, cpu_T0
, cpu_A0
);
631 gen_string_movl_A0_EDI(s
);
632 gen_op_st_v(s
, ot
, cpu_T0
, cpu_A0
);
633 gen_op_movl_T0_Dshift(ot
);
634 gen_op_add_reg_T0(s
->aflag
, R_ESI
);
635 gen_op_add_reg_T0(s
->aflag
, R_EDI
);
638 static void gen_op_update1_cc(void)
640 tcg_gen_mov_tl(cpu_cc_dst
, cpu_T0
);
643 static void gen_op_update2_cc(void)
645 tcg_gen_mov_tl(cpu_cc_src
, cpu_T1
);
646 tcg_gen_mov_tl(cpu_cc_dst
, cpu_T0
);
649 static void gen_op_update3_cc(TCGv reg
)
651 tcg_gen_mov_tl(cpu_cc_src2
, reg
);
652 tcg_gen_mov_tl(cpu_cc_src
, cpu_T1
);
653 tcg_gen_mov_tl(cpu_cc_dst
, cpu_T0
);
656 static inline void gen_op_testl_T0_T1_cc(void)
658 tcg_gen_and_tl(cpu_cc_dst
, cpu_T0
, cpu_T1
);
661 static void gen_op_update_neg_cc(void)
663 tcg_gen_mov_tl(cpu_cc_dst
, cpu_T0
);
664 tcg_gen_neg_tl(cpu_cc_src
, cpu_T0
);
665 tcg_gen_movi_tl(cpu_cc_srcT
, 0);
668 /* compute all eflags to cc_src */
669 static void gen_compute_eflags(DisasContext
*s
)
671 TCGv zero
, dst
, src1
, src2
;
674 if (s
->cc_op
== CC_OP_EFLAGS
) {
677 if (s
->cc_op
== CC_OP_CLR
) {
678 tcg_gen_movi_tl(cpu_cc_src
, CC_Z
| CC_P
);
679 set_cc_op(s
, CC_OP_EFLAGS
);
688 /* Take care to not read values that are not live. */
689 live
= cc_op_live
[s
->cc_op
] & ~USES_CC_SRCT
;
690 dead
= live
^ (USES_CC_DST
| USES_CC_SRC
| USES_CC_SRC2
);
692 zero
= tcg_const_tl(0);
693 if (dead
& USES_CC_DST
) {
696 if (dead
& USES_CC_SRC
) {
699 if (dead
& USES_CC_SRC2
) {
705 gen_helper_cc_compute_all(cpu_cc_src
, dst
, src1
, src2
, cpu_cc_op
);
706 set_cc_op(s
, CC_OP_EFLAGS
);
713 typedef struct CCPrepare
{
723 /* compute eflags.C to reg */
724 static CCPrepare
gen_prepare_eflags_c(DisasContext
*s
, TCGv reg
)
730 case CC_OP_SUBB
... CC_OP_SUBQ
:
731 /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
732 size
= s
->cc_op
- CC_OP_SUBB
;
733 t1
= gen_ext_tl(cpu_tmp0
, cpu_cc_src
, size
, false);
734 /* If no temporary was used, be careful not to alias t1 and t0. */
735 t0
= TCGV_EQUAL(t1
, cpu_cc_src
) ? cpu_tmp0
: reg
;
736 tcg_gen_mov_tl(t0
, cpu_cc_srcT
);
740 case CC_OP_ADDB
... CC_OP_ADDQ
:
741 /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
742 size
= s
->cc_op
- CC_OP_ADDB
;
743 t1
= gen_ext_tl(cpu_tmp0
, cpu_cc_src
, size
, false);
744 t0
= gen_ext_tl(reg
, cpu_cc_dst
, size
, false);
746 return (CCPrepare
) { .cond
= TCG_COND_LTU
, .reg
= t0
,
747 .reg2
= t1
, .mask
= -1, .use_reg2
= true };
749 case CC_OP_LOGICB
... CC_OP_LOGICQ
:
751 return (CCPrepare
) { .cond
= TCG_COND_NEVER
, .mask
= -1 };
753 case CC_OP_INCB
... CC_OP_INCQ
:
754 case CC_OP_DECB
... CC_OP_DECQ
:
755 return (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= cpu_cc_src
,
756 .mask
= -1, .no_setcond
= true };
758 case CC_OP_SHLB
... CC_OP_SHLQ
:
759 /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
760 size
= s
->cc_op
- CC_OP_SHLB
;
761 shift
= (8 << size
) - 1;
762 return (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= cpu_cc_src
,
763 .mask
= (target_ulong
)1 << shift
};
765 case CC_OP_MULB
... CC_OP_MULQ
:
766 return (CCPrepare
) { .cond
= TCG_COND_NE
,
767 .reg
= cpu_cc_src
, .mask
= -1 };
769 case CC_OP_BMILGB
... CC_OP_BMILGQ
:
770 size
= s
->cc_op
- CC_OP_BMILGB
;
771 t0
= gen_ext_tl(reg
, cpu_cc_src
, size
, false);
772 return (CCPrepare
) { .cond
= TCG_COND_EQ
, .reg
= t0
, .mask
= -1 };
776 return (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= cpu_cc_dst
,
777 .mask
= -1, .no_setcond
= true };
780 case CC_OP_SARB
... CC_OP_SARQ
:
782 return (CCPrepare
) { .cond
= TCG_COND_NE
,
783 .reg
= cpu_cc_src
, .mask
= CC_C
};
786 /* The need to compute only C from CC_OP_DYNAMIC is important
787 in efficiently implementing e.g. INC at the start of a TB. */
789 gen_helper_cc_compute_c(reg
, cpu_cc_dst
, cpu_cc_src
,
790 cpu_cc_src2
, cpu_cc_op
);
791 return (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= reg
,
792 .mask
= -1, .no_setcond
= true };
796 /* compute eflags.P to reg */
797 static CCPrepare
gen_prepare_eflags_p(DisasContext
*s
, TCGv reg
)
799 gen_compute_eflags(s
);
800 return (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= cpu_cc_src
,
804 /* compute eflags.S to reg */
805 static CCPrepare
gen_prepare_eflags_s(DisasContext
*s
, TCGv reg
)
809 gen_compute_eflags(s
);
815 return (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= cpu_cc_src
,
818 return (CCPrepare
) { .cond
= TCG_COND_NEVER
, .mask
= -1 };
821 TCGMemOp size
= (s
->cc_op
- CC_OP_ADDB
) & 3;
822 TCGv t0
= gen_ext_tl(reg
, cpu_cc_dst
, size
, true);
823 return (CCPrepare
) { .cond
= TCG_COND_LT
, .reg
= t0
, .mask
= -1 };
828 /* compute eflags.O to reg */
829 static CCPrepare
gen_prepare_eflags_o(DisasContext
*s
, TCGv reg
)
834 return (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= cpu_cc_src2
,
835 .mask
= -1, .no_setcond
= true };
837 return (CCPrepare
) { .cond
= TCG_COND_NEVER
, .mask
= -1 };
839 gen_compute_eflags(s
);
840 return (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= cpu_cc_src
,
845 /* compute eflags.Z to reg */
846 static CCPrepare
gen_prepare_eflags_z(DisasContext
*s
, TCGv reg
)
850 gen_compute_eflags(s
);
856 return (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= cpu_cc_src
,
859 return (CCPrepare
) { .cond
= TCG_COND_ALWAYS
, .mask
= -1 };
862 TCGMemOp size
= (s
->cc_op
- CC_OP_ADDB
) & 3;
863 TCGv t0
= gen_ext_tl(reg
, cpu_cc_dst
, size
, false);
864 return (CCPrepare
) { .cond
= TCG_COND_EQ
, .reg
= t0
, .mask
= -1 };
869 /* perform a conditional store into register 'reg' according to jump opcode
870 value 'b'. In the fast case, T0 is guaranted not to be used. */
871 static CCPrepare
gen_prepare_cc(DisasContext
*s
, int b
, TCGv reg
)
873 int inv
, jcc_op
, cond
;
879 jcc_op
= (b
>> 1) & 7;
882 case CC_OP_SUBB
... CC_OP_SUBQ
:
883 /* We optimize relational operators for the cmp/jcc case. */
884 size
= s
->cc_op
- CC_OP_SUBB
;
887 tcg_gen_mov_tl(cpu_tmp4
, cpu_cc_srcT
);
888 gen_extu(size
, cpu_tmp4
);
889 t0
= gen_ext_tl(cpu_tmp0
, cpu_cc_src
, size
, false);
890 cc
= (CCPrepare
) { .cond
= TCG_COND_LEU
, .reg
= cpu_tmp4
,
891 .reg2
= t0
, .mask
= -1, .use_reg2
= true };
900 tcg_gen_mov_tl(cpu_tmp4
, cpu_cc_srcT
);
901 gen_exts(size
, cpu_tmp4
);
902 t0
= gen_ext_tl(cpu_tmp0
, cpu_cc_src
, size
, true);
903 cc
= (CCPrepare
) { .cond
= cond
, .reg
= cpu_tmp4
,
904 .reg2
= t0
, .mask
= -1, .use_reg2
= true };
914 /* This actually generates good code for JC, JZ and JS. */
917 cc
= gen_prepare_eflags_o(s
, reg
);
920 cc
= gen_prepare_eflags_c(s
, reg
);
923 cc
= gen_prepare_eflags_z(s
, reg
);
926 gen_compute_eflags(s
);
927 cc
= (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= cpu_cc_src
,
928 .mask
= CC_Z
| CC_C
};
931 cc
= gen_prepare_eflags_s(s
, reg
);
934 cc
= gen_prepare_eflags_p(s
, reg
);
937 gen_compute_eflags(s
);
938 if (TCGV_EQUAL(reg
, cpu_cc_src
)) {
941 tcg_gen_shri_tl(reg
, cpu_cc_src
, 4); /* CC_O -> CC_S */
942 tcg_gen_xor_tl(reg
, reg
, cpu_cc_src
);
943 cc
= (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= reg
,
948 gen_compute_eflags(s
);
949 if (TCGV_EQUAL(reg
, cpu_cc_src
)) {
952 tcg_gen_shri_tl(reg
, cpu_cc_src
, 4); /* CC_O -> CC_S */
953 tcg_gen_xor_tl(reg
, reg
, cpu_cc_src
);
954 cc
= (CCPrepare
) { .cond
= TCG_COND_NE
, .reg
= reg
,
955 .mask
= CC_S
| CC_Z
};
962 cc
.cond
= tcg_invert_cond(cc
.cond
);
967 static void gen_setcc1(DisasContext
*s
, int b
, TCGv reg
)
969 CCPrepare cc
= gen_prepare_cc(s
, b
, reg
);
972 if (cc
.cond
== TCG_COND_EQ
) {
973 tcg_gen_xori_tl(reg
, cc
.reg
, 1);
975 tcg_gen_mov_tl(reg
, cc
.reg
);
980 if (cc
.cond
== TCG_COND_NE
&& !cc
.use_reg2
&& cc
.imm
== 0 &&
981 cc
.mask
!= 0 && (cc
.mask
& (cc
.mask
- 1)) == 0) {
982 tcg_gen_shri_tl(reg
, cc
.reg
, ctztl(cc
.mask
));
983 tcg_gen_andi_tl(reg
, reg
, 1);
987 tcg_gen_andi_tl(reg
, cc
.reg
, cc
.mask
);
991 tcg_gen_setcond_tl(cc
.cond
, reg
, cc
.reg
, cc
.reg2
);
993 tcg_gen_setcondi_tl(cc
.cond
, reg
, cc
.reg
, cc
.imm
);
997 static inline void gen_compute_eflags_c(DisasContext
*s
, TCGv reg
)
999 gen_setcc1(s
, JCC_B
<< 1, reg
);
1002 /* generate a conditional jump to label 'l1' according to jump opcode
1003 value 'b'. In the fast case, T0 is guaranted not to be used. */
1004 static inline void gen_jcc1_noeob(DisasContext
*s
, int b
, TCGLabel
*l1
)
1006 CCPrepare cc
= gen_prepare_cc(s
, b
, cpu_T0
);
1008 if (cc
.mask
!= -1) {
1009 tcg_gen_andi_tl(cpu_T0
, cc
.reg
, cc
.mask
);
1013 tcg_gen_brcond_tl(cc
.cond
, cc
.reg
, cc
.reg2
, l1
);
1015 tcg_gen_brcondi_tl(cc
.cond
, cc
.reg
, cc
.imm
, l1
);
1019 /* Generate a conditional jump to label 'l1' according to jump opcode
1020 value 'b'. In the fast case, T0 is guaranted not to be used.
1021 A translation block must end soon. */
1022 static inline void gen_jcc1(DisasContext
*s
, int b
, TCGLabel
*l1
)
1024 CCPrepare cc
= gen_prepare_cc(s
, b
, cpu_T0
);
1026 gen_update_cc_op(s
);
1027 if (cc
.mask
!= -1) {
1028 tcg_gen_andi_tl(cpu_T0
, cc
.reg
, cc
.mask
);
1031 set_cc_op(s
, CC_OP_DYNAMIC
);
1033 tcg_gen_brcond_tl(cc
.cond
, cc
.reg
, cc
.reg2
, l1
);
1035 tcg_gen_brcondi_tl(cc
.cond
, cc
.reg
, cc
.imm
, l1
);
1039 /* XXX: does not work with gdbstub "ice" single step - not a
1041 static TCGLabel
*gen_jz_ecx_string(DisasContext
*s
, target_ulong next_eip
)
1043 TCGLabel
*l1
= gen_new_label();
1044 TCGLabel
*l2
= gen_new_label();
1045 gen_op_jnz_ecx(s
->aflag
, l1
);
1047 gen_jmp_tb(s
, next_eip
, 1);
1052 static inline void gen_stos(DisasContext
*s
, TCGMemOp ot
)
1054 gen_op_mov_v_reg(MO_32
, cpu_T0
, R_EAX
);
1055 gen_string_movl_A0_EDI(s
);
1056 gen_op_st_v(s
, ot
, cpu_T0
, cpu_A0
);
1057 gen_op_movl_T0_Dshift(ot
);
1058 gen_op_add_reg_T0(s
->aflag
, R_EDI
);
1061 static inline void gen_lods(DisasContext
*s
, TCGMemOp ot
)
1063 gen_string_movl_A0_ESI(s
);
1064 gen_op_ld_v(s
, ot
, cpu_T0
, cpu_A0
);
1065 gen_op_mov_reg_v(ot
, R_EAX
, cpu_T0
);
1066 gen_op_movl_T0_Dshift(ot
);
1067 gen_op_add_reg_T0(s
->aflag
, R_ESI
);
1070 static inline void gen_scas(DisasContext
*s
, TCGMemOp ot
)
1072 gen_string_movl_A0_EDI(s
);
1073 gen_op_ld_v(s
, ot
, cpu_T1
, cpu_A0
);
1074 gen_op(s
, OP_CMPL
, ot
, R_EAX
);
1075 gen_op_movl_T0_Dshift(ot
);
1076 gen_op_add_reg_T0(s
->aflag
, R_EDI
);
1079 static inline void gen_cmps(DisasContext
*s
, TCGMemOp ot
)
1081 gen_string_movl_A0_EDI(s
);
1082 gen_op_ld_v(s
, ot
, cpu_T1
, cpu_A0
);
1083 gen_string_movl_A0_ESI(s
);
1084 gen_op(s
, OP_CMPL
, ot
, OR_TMP0
);
1085 gen_op_movl_T0_Dshift(ot
);
1086 gen_op_add_reg_T0(s
->aflag
, R_ESI
);
1087 gen_op_add_reg_T0(s
->aflag
, R_EDI
);
1090 static void gen_bpt_io(DisasContext
*s
, TCGv_i32 t_port
, int ot
)
1092 if (s
->flags
& HF_IOBPT_MASK
) {
1093 TCGv_i32 t_size
= tcg_const_i32(1 << ot
);
1094 TCGv t_next
= tcg_const_tl(s
->pc
- s
->cs_base
);
1096 gen_helper_bpt_io(cpu_env
, t_port
, t_size
, t_next
);
1097 tcg_temp_free_i32(t_size
);
1098 tcg_temp_free(t_next
);
1103 static inline void gen_ins(DisasContext
*s
, TCGMemOp ot
)
1105 if (s
->tb
->cflags
& CF_USE_ICOUNT
) {
1108 gen_string_movl_A0_EDI(s
);
1109 /* Note: we must do this dummy write first to be restartable in
1110 case of page fault. */
1111 tcg_gen_movi_tl(cpu_T0
, 0);
1112 gen_op_st_v(s
, ot
, cpu_T0
, cpu_A0
);
1113 tcg_gen_trunc_tl_i32(cpu_tmp2_i32
, cpu_regs
[R_EDX
]);
1114 tcg_gen_andi_i32(cpu_tmp2_i32
, cpu_tmp2_i32
, 0xffff);
1115 gen_helper_in_func(ot
, cpu_T0
, cpu_tmp2_i32
);
1116 gen_op_st_v(s
, ot
, cpu_T0
, cpu_A0
);
1117 gen_op_movl_T0_Dshift(ot
);
1118 gen_op_add_reg_T0(s
->aflag
, R_EDI
);
1119 gen_bpt_io(s
, cpu_tmp2_i32
, ot
);
1120 if (s
->tb
->cflags
& CF_USE_ICOUNT
) {
1125 static inline void gen_outs(DisasContext
*s
, TCGMemOp ot
)
1127 if (s
->tb
->cflags
& CF_USE_ICOUNT
) {
1130 gen_string_movl_A0_ESI(s
);
1131 gen_op_ld_v(s
, ot
, cpu_T0
, cpu_A0
);
1133 tcg_gen_trunc_tl_i32(cpu_tmp2_i32
, cpu_regs
[R_EDX
]);
1134 tcg_gen_andi_i32(cpu_tmp2_i32
, cpu_tmp2_i32
, 0xffff);
1135 tcg_gen_trunc_tl_i32(cpu_tmp3_i32
, cpu_T0
);
1136 gen_helper_out_func(ot
, cpu_tmp2_i32
, cpu_tmp3_i32
);
1137 gen_op_movl_T0_Dshift(ot
);
1138 gen_op_add_reg_T0(s
->aflag
, R_ESI
);
1139 gen_bpt_io(s
, cpu_tmp2_i32
, ot
);
1140 if (s
->tb
->cflags
& CF_USE_ICOUNT
) {
1145 /* same method as Valgrind : we generate jumps to current or next
1147 #define GEN_REPZ(op) \
1148 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1149 target_ulong cur_eip, target_ulong next_eip) \
1152 gen_update_cc_op(s); \
1153 l2 = gen_jz_ecx_string(s, next_eip); \
1154 gen_ ## op(s, ot); \
1155 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1156 /* a loop would cause two single step exceptions if ECX = 1 \
1157 before rep string_insn */ \
1159 gen_op_jz_ecx(s->aflag, l2); \
1160 gen_jmp(s, cur_eip); \
1163 #define GEN_REPZ2(op) \
1164 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1165 target_ulong cur_eip, \
1166 target_ulong next_eip, \
1170 gen_update_cc_op(s); \
1171 l2 = gen_jz_ecx_string(s, next_eip); \
1172 gen_ ## op(s, ot); \
1173 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1174 gen_update_cc_op(s); \
1175 gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \
1177 gen_op_jz_ecx(s->aflag, l2); \
1178 gen_jmp(s, cur_eip); \
1189 static void gen_helper_fp_arith_ST0_FT0(int op
)
1193 gen_helper_fadd_ST0_FT0(cpu_env
);
1196 gen_helper_fmul_ST0_FT0(cpu_env
);
1199 gen_helper_fcom_ST0_FT0(cpu_env
);
1202 gen_helper_fcom_ST0_FT0(cpu_env
);
1205 gen_helper_fsub_ST0_FT0(cpu_env
);
1208 gen_helper_fsubr_ST0_FT0(cpu_env
);
1211 gen_helper_fdiv_ST0_FT0(cpu_env
);
1214 gen_helper_fdivr_ST0_FT0(cpu_env
);
1219 /* NOTE the exception in "r" op ordering */
1220 static void gen_helper_fp_arith_STN_ST0(int op
, int opreg
)
1222 TCGv_i32 tmp
= tcg_const_i32(opreg
);
1225 gen_helper_fadd_STN_ST0(cpu_env
, tmp
);
1228 gen_helper_fmul_STN_ST0(cpu_env
, tmp
);
1231 gen_helper_fsubr_STN_ST0(cpu_env
, tmp
);
1234 gen_helper_fsub_STN_ST0(cpu_env
, tmp
);
1237 gen_helper_fdivr_STN_ST0(cpu_env
, tmp
);
1240 gen_helper_fdiv_STN_ST0(cpu_env
, tmp
);
1245 /* if d == OR_TMP0, it means memory operand (address in A0) */
1246 static void gen_op(DisasContext
*s1
, int op
, TCGMemOp ot
, int d
)
1249 gen_op_mov_v_reg(ot
, cpu_T0
, d
);
1251 gen_op_ld_v(s1
, ot
, cpu_T0
, cpu_A0
);
1255 gen_compute_eflags_c(s1
, cpu_tmp4
);
1256 tcg_gen_add_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1257 tcg_gen_add_tl(cpu_T0
, cpu_T0
, cpu_tmp4
);
1258 gen_op_st_rm_T0_A0(s1
, ot
, d
);
1259 gen_op_update3_cc(cpu_tmp4
);
1260 set_cc_op(s1
, CC_OP_ADCB
+ ot
);
1263 gen_compute_eflags_c(s1
, cpu_tmp4
);
1264 tcg_gen_sub_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1265 tcg_gen_sub_tl(cpu_T0
, cpu_T0
, cpu_tmp4
);
1266 gen_op_st_rm_T0_A0(s1
, ot
, d
);
1267 gen_op_update3_cc(cpu_tmp4
);
1268 set_cc_op(s1
, CC_OP_SBBB
+ ot
);
1271 tcg_gen_add_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1272 gen_op_st_rm_T0_A0(s1
, ot
, d
);
1273 gen_op_update2_cc();
1274 set_cc_op(s1
, CC_OP_ADDB
+ ot
);
1277 tcg_gen_mov_tl(cpu_cc_srcT
, cpu_T0
);
1278 tcg_gen_sub_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1279 gen_op_st_rm_T0_A0(s1
, ot
, d
);
1280 gen_op_update2_cc();
1281 set_cc_op(s1
, CC_OP_SUBB
+ ot
);
1285 tcg_gen_and_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1286 gen_op_st_rm_T0_A0(s1
, ot
, d
);
1287 gen_op_update1_cc();
1288 set_cc_op(s1
, CC_OP_LOGICB
+ ot
);
1291 tcg_gen_or_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1292 gen_op_st_rm_T0_A0(s1
, ot
, d
);
1293 gen_op_update1_cc();
1294 set_cc_op(s1
, CC_OP_LOGICB
+ ot
);
1297 tcg_gen_xor_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1298 gen_op_st_rm_T0_A0(s1
, ot
, d
);
1299 gen_op_update1_cc();
1300 set_cc_op(s1
, CC_OP_LOGICB
+ ot
);
1303 tcg_gen_mov_tl(cpu_cc_src
, cpu_T1
);
1304 tcg_gen_mov_tl(cpu_cc_srcT
, cpu_T0
);
1305 tcg_gen_sub_tl(cpu_cc_dst
, cpu_T0
, cpu_T1
);
1306 set_cc_op(s1
, CC_OP_SUBB
+ ot
);
1311 /* if d == OR_TMP0, it means memory operand (address in A0) */
1312 static void gen_inc(DisasContext
*s1
, TCGMemOp ot
, int d
, int c
)
1315 gen_op_mov_v_reg(ot
, cpu_T0
, d
);
1317 gen_op_ld_v(s1
, ot
, cpu_T0
, cpu_A0
);
1319 gen_compute_eflags_c(s1
, cpu_cc_src
);
1321 tcg_gen_addi_tl(cpu_T0
, cpu_T0
, 1);
1322 set_cc_op(s1
, CC_OP_INCB
+ ot
);
1324 tcg_gen_addi_tl(cpu_T0
, cpu_T0
, -1);
1325 set_cc_op(s1
, CC_OP_DECB
+ ot
);
1327 gen_op_st_rm_T0_A0(s1
, ot
, d
);
1328 tcg_gen_mov_tl(cpu_cc_dst
, cpu_T0
);
1331 static void gen_shift_flags(DisasContext
*s
, TCGMemOp ot
, TCGv result
,
1332 TCGv shm1
, TCGv count
, bool is_right
)
1334 TCGv_i32 z32
, s32
, oldop
;
1337 /* Store the results into the CC variables. If we know that the
1338 variable must be dead, store unconditionally. Otherwise we'll
1339 need to not disrupt the current contents. */
1340 z_tl
= tcg_const_tl(0);
1341 if (cc_op_live
[s
->cc_op
] & USES_CC_DST
) {
1342 tcg_gen_movcond_tl(TCG_COND_NE
, cpu_cc_dst
, count
, z_tl
,
1343 result
, cpu_cc_dst
);
1345 tcg_gen_mov_tl(cpu_cc_dst
, result
);
1347 if (cc_op_live
[s
->cc_op
] & USES_CC_SRC
) {
1348 tcg_gen_movcond_tl(TCG_COND_NE
, cpu_cc_src
, count
, z_tl
,
1351 tcg_gen_mov_tl(cpu_cc_src
, shm1
);
1353 tcg_temp_free(z_tl
);
1355 /* Get the two potential CC_OP values into temporaries. */
1356 tcg_gen_movi_i32(cpu_tmp2_i32
, (is_right ? CC_OP_SARB
: CC_OP_SHLB
) + ot
);
1357 if (s
->cc_op
== CC_OP_DYNAMIC
) {
1360 tcg_gen_movi_i32(cpu_tmp3_i32
, s
->cc_op
);
1361 oldop
= cpu_tmp3_i32
;
1364 /* Conditionally store the CC_OP value. */
1365 z32
= tcg_const_i32(0);
1366 s32
= tcg_temp_new_i32();
1367 tcg_gen_trunc_tl_i32(s32
, count
);
1368 tcg_gen_movcond_i32(TCG_COND_NE
, cpu_cc_op
, s32
, z32
, cpu_tmp2_i32
, oldop
);
1369 tcg_temp_free_i32(z32
);
1370 tcg_temp_free_i32(s32
);
1372 /* The CC_OP value is no longer predictable. */
1373 set_cc_op(s
, CC_OP_DYNAMIC
);
1376 static void gen_shift_rm_T1(DisasContext
*s
, TCGMemOp ot
, int op1
,
1377 int is_right
, int is_arith
)
1379 target_ulong mask
= (ot
== MO_64 ?
0x3f : 0x1f);
1382 if (op1
== OR_TMP0
) {
1383 gen_op_ld_v(s
, ot
, cpu_T0
, cpu_A0
);
1385 gen_op_mov_v_reg(ot
, cpu_T0
, op1
);
1388 tcg_gen_andi_tl(cpu_T1
, cpu_T1
, mask
);
1389 tcg_gen_subi_tl(cpu_tmp0
, cpu_T1
, 1);
1393 gen_exts(ot
, cpu_T0
);
1394 tcg_gen_sar_tl(cpu_tmp0
, cpu_T0
, cpu_tmp0
);
1395 tcg_gen_sar_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1397 gen_extu(ot
, cpu_T0
);
1398 tcg_gen_shr_tl(cpu_tmp0
, cpu_T0
, cpu_tmp0
);
1399 tcg_gen_shr_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1402 tcg_gen_shl_tl(cpu_tmp0
, cpu_T0
, cpu_tmp0
);
1403 tcg_gen_shl_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1407 gen_op_st_rm_T0_A0(s
, ot
, op1
);
1409 gen_shift_flags(s
, ot
, cpu_T0
, cpu_tmp0
, cpu_T1
, is_right
);
1412 static void gen_shift_rm_im(DisasContext
*s
, TCGMemOp ot
, int op1
, int op2
,
1413 int is_right
, int is_arith
)
1415 int mask
= (ot
== MO_64 ?
0x3f : 0x1f);
1419 gen_op_ld_v(s
, ot
, cpu_T0
, cpu_A0
);
1421 gen_op_mov_v_reg(ot
, cpu_T0
, op1
);
1427 gen_exts(ot
, cpu_T0
);
1428 tcg_gen_sari_tl(cpu_tmp4
, cpu_T0
, op2
- 1);
1429 tcg_gen_sari_tl(cpu_T0
, cpu_T0
, op2
);
1431 gen_extu(ot
, cpu_T0
);
1432 tcg_gen_shri_tl(cpu_tmp4
, cpu_T0
, op2
- 1);
1433 tcg_gen_shri_tl(cpu_T0
, cpu_T0
, op2
);
1436 tcg_gen_shli_tl(cpu_tmp4
, cpu_T0
, op2
- 1);
1437 tcg_gen_shli_tl(cpu_T0
, cpu_T0
, op2
);
1442 gen_op_st_rm_T0_A0(s
, ot
, op1
);
1444 /* update eflags if non zero shift */
1446 tcg_gen_mov_tl(cpu_cc_src
, cpu_tmp4
);
1447 tcg_gen_mov_tl(cpu_cc_dst
, cpu_T0
);
1448 set_cc_op(s
, (is_right ? CC_OP_SARB
: CC_OP_SHLB
) + ot
);
1452 static void gen_rot_rm_T1(DisasContext
*s
, TCGMemOp ot
, int op1
, int is_right
)
1454 target_ulong mask
= (ot
== MO_64 ?
0x3f : 0x1f);
1458 if (op1
== OR_TMP0
) {
1459 gen_op_ld_v(s
, ot
, cpu_T0
, cpu_A0
);
1461 gen_op_mov_v_reg(ot
, cpu_T0
, op1
);
1464 tcg_gen_andi_tl(cpu_T1
, cpu_T1
, mask
);
1468 /* Replicate the 8-bit input so that a 32-bit rotate works. */
1469 tcg_gen_ext8u_tl(cpu_T0
, cpu_T0
);
1470 tcg_gen_muli_tl(cpu_T0
, cpu_T0
, 0x01010101);
1473 /* Replicate the 16-bit input so that a 32-bit rotate works. */
1474 tcg_gen_deposit_tl(cpu_T0
, cpu_T0
, cpu_T0
, 16, 16);
1477 #ifdef TARGET_X86_64
1479 tcg_gen_trunc_tl_i32(cpu_tmp2_i32
, cpu_T0
);
1480 tcg_gen_trunc_tl_i32(cpu_tmp3_i32
, cpu_T1
);
1482 tcg_gen_rotr_i32(cpu_tmp2_i32
, cpu_tmp2_i32
, cpu_tmp3_i32
);
1484 tcg_gen_rotl_i32(cpu_tmp2_i32
, cpu_tmp2_i32
, cpu_tmp3_i32
);
1486 tcg_gen_extu_i32_tl(cpu_T0
, cpu_tmp2_i32
);
1491 tcg_gen_rotr_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1493 tcg_gen_rotl_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1499 gen_op_st_rm_T0_A0(s
, ot
, op1
);
1501 /* We'll need the flags computed into CC_SRC. */
1502 gen_compute_eflags(s
);
1504 /* The value that was "rotated out" is now present at the other end
1505 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1506 since we've computed the flags into CC_SRC, these variables are
1509 tcg_gen_shri_tl(cpu_cc_src2
, cpu_T0
, mask
- 1);
1510 tcg_gen_shri_tl(cpu_cc_dst
, cpu_T0
, mask
);
1511 tcg_gen_andi_tl(cpu_cc_dst
, cpu_cc_dst
, 1);
1513 tcg_gen_shri_tl(cpu_cc_src2
, cpu_T0
, mask
);
1514 tcg_gen_andi_tl(cpu_cc_dst
, cpu_T0
, 1);
1516 tcg_gen_andi_tl(cpu_cc_src2
, cpu_cc_src2
, 1);
1517 tcg_gen_xor_tl(cpu_cc_src2
, cpu_cc_src2
, cpu_cc_dst
);
1519 /* Now conditionally store the new CC_OP value. If the shift count
1520 is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1521 Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1522 exactly as we computed above. */
1523 t0
= tcg_const_i32(0);
1524 t1
= tcg_temp_new_i32();
1525 tcg_gen_trunc_tl_i32(t1
, cpu_T1
);
1526 tcg_gen_movi_i32(cpu_tmp2_i32
, CC_OP_ADCOX
);
1527 tcg_gen_movi_i32(cpu_tmp3_i32
, CC_OP_EFLAGS
);
1528 tcg_gen_movcond_i32(TCG_COND_NE
, cpu_cc_op
, t1
, t0
,
1529 cpu_tmp2_i32
, cpu_tmp3_i32
);
1530 tcg_temp_free_i32(t0
);
1531 tcg_temp_free_i32(t1
);
1533 /* The CC_OP value is no longer predictable. */
1534 set_cc_op(s
, CC_OP_DYNAMIC
);
1537 static void gen_rot_rm_im(DisasContext
*s
, TCGMemOp ot
, int op1
, int op2
,
1540 int mask
= (ot
== MO_64 ?
0x3f : 0x1f);
1544 if (op1
== OR_TMP0
) {
1545 gen_op_ld_v(s
, ot
, cpu_T0
, cpu_A0
);
1547 gen_op_mov_v_reg(ot
, cpu_T0
, op1
);
1553 #ifdef TARGET_X86_64
1555 tcg_gen_trunc_tl_i32(cpu_tmp2_i32
, cpu_T0
);
1557 tcg_gen_rotri_i32(cpu_tmp2_i32
, cpu_tmp2_i32
, op2
);
1559 tcg_gen_rotli_i32(cpu_tmp2_i32
, cpu_tmp2_i32
, op2
);
1561 tcg_gen_extu_i32_tl(cpu_T0
, cpu_tmp2_i32
);
1566 tcg_gen_rotri_tl(cpu_T0
, cpu_T0
, op2
);
1568 tcg_gen_rotli_tl(cpu_T0
, cpu_T0
, op2
);
1579 shift
= mask
+ 1 - shift
;
1581 gen_extu(ot
, cpu_T0
);
1582 tcg_gen_shli_tl(cpu_tmp0
, cpu_T0
, shift
);
1583 tcg_gen_shri_tl(cpu_T0
, cpu_T0
, mask
+ 1 - shift
);
1584 tcg_gen_or_tl(cpu_T0
, cpu_T0
, cpu_tmp0
);
1590 gen_op_st_rm_T0_A0(s
, ot
, op1
);
1593 /* Compute the flags into CC_SRC. */
1594 gen_compute_eflags(s
);
1596 /* The value that was "rotated out" is now present at the other end
1597 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1598 since we've computed the flags into CC_SRC, these variables are
1601 tcg_gen_shri_tl(cpu_cc_src2
, cpu_T0
, mask
- 1);
1602 tcg_gen_shri_tl(cpu_cc_dst
, cpu_T0
, mask
);
1603 tcg_gen_andi_tl(cpu_cc_dst
, cpu_cc_dst
, 1);
1605 tcg_gen_shri_tl(cpu_cc_src2
, cpu_T0
, mask
);
1606 tcg_gen_andi_tl(cpu_cc_dst
, cpu_T0
, 1);
1608 tcg_gen_andi_tl(cpu_cc_src2
, cpu_cc_src2
, 1);
1609 tcg_gen_xor_tl(cpu_cc_src2
, cpu_cc_src2
, cpu_cc_dst
);
1610 set_cc_op(s
, CC_OP_ADCOX
);
1614 /* XXX: add faster immediate = 1 case */
1615 static void gen_rotc_rm_T1(DisasContext
*s
, TCGMemOp ot
, int op1
,
1618 gen_compute_eflags(s
);
1619 assert(s
->cc_op
== CC_OP_EFLAGS
);
1623 gen_op_ld_v(s
, ot
, cpu_T0
, cpu_A0
);
1625 gen_op_mov_v_reg(ot
, cpu_T0
, op1
);
1630 gen_helper_rcrb(cpu_T0
, cpu_env
, cpu_T0
, cpu_T1
);
1633 gen_helper_rcrw(cpu_T0
, cpu_env
, cpu_T0
, cpu_T1
);
1636 gen_helper_rcrl(cpu_T0
, cpu_env
, cpu_T0
, cpu_T1
);
1638 #ifdef TARGET_X86_64
1640 gen_helper_rcrq(cpu_T0
, cpu_env
, cpu_T0
, cpu_T1
);
1649 gen_helper_rclb(cpu_T0
, cpu_env
, cpu_T0
, cpu_T1
);
1652 gen_helper_rclw(cpu_T0
, cpu_env
, cpu_T0
, cpu_T1
);
1655 gen_helper_rcll(cpu_T0
, cpu_env
, cpu_T0
, cpu_T1
);
1657 #ifdef TARGET_X86_64
1659 gen_helper_rclq(cpu_T0
, cpu_env
, cpu_T0
, cpu_T1
);
1667 gen_op_st_rm_T0_A0(s
, ot
, op1
);
1670 /* XXX: add faster immediate case */
1671 static void gen_shiftd_rm_T1(DisasContext
*s
, TCGMemOp ot
, int op1
,
1672 bool is_right
, TCGv count_in
)
1674 target_ulong mask
= (ot
== MO_64 ?
63 : 31);
1678 if (op1
== OR_TMP0
) {
1679 gen_op_ld_v(s
, ot
, cpu_T0
, cpu_A0
);
1681 gen_op_mov_v_reg(ot
, cpu_T0
, op1
);
1684 count
= tcg_temp_new();
1685 tcg_gen_andi_tl(count
, count_in
, mask
);
1689 /* Note: we implement the Intel behaviour for shift count > 16.
1690 This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
1691 portion by constructing it as a 32-bit value. */
1693 tcg_gen_deposit_tl(cpu_tmp0
, cpu_T0
, cpu_T1
, 16, 16);
1694 tcg_gen_mov_tl(cpu_T1
, cpu_T0
);
1695 tcg_gen_mov_tl(cpu_T0
, cpu_tmp0
);
1697 tcg_gen_deposit_tl(cpu_T1
, cpu_T0
, cpu_T1
, 16, 16);
1700 #ifdef TARGET_X86_64
1702 /* Concatenate the two 32-bit values and use a 64-bit shift. */
1703 tcg_gen_subi_tl(cpu_tmp0
, count
, 1);
1705 tcg_gen_concat_tl_i64(cpu_T0
, cpu_T0
, cpu_T1
);
1706 tcg_gen_shr_i64(cpu_tmp0
, cpu_T0
, cpu_tmp0
);
1707 tcg_gen_shr_i64(cpu_T0
, cpu_T0
, count
);
1709 tcg_gen_concat_tl_i64(cpu_T0
, cpu_T1
, cpu_T0
);
1710 tcg_gen_shl_i64(cpu_tmp0
, cpu_T0
, cpu_tmp0
);
1711 tcg_gen_shl_i64(cpu_T0
, cpu_T0
, count
);
1712 tcg_gen_shri_i64(cpu_tmp0
, cpu_tmp0
, 32);
1713 tcg_gen_shri_i64(cpu_T0
, cpu_T0
, 32);
1718 tcg_gen_subi_tl(cpu_tmp0
, count
, 1);
1720 tcg_gen_shr_tl(cpu_tmp0
, cpu_T0
, cpu_tmp0
);
1722 tcg_gen_subfi_tl(cpu_tmp4
, mask
+ 1, count
);
1723 tcg_gen_shr_tl(cpu_T0
, cpu_T0
, count
);
1724 tcg_gen_shl_tl(cpu_T1
, cpu_T1
, cpu_tmp4
);
1726 tcg_gen_shl_tl(cpu_tmp0
, cpu_T0
, cpu_tmp0
);
1728 /* Only needed if count > 16, for Intel behaviour. */
1729 tcg_gen_subfi_tl(cpu_tmp4
, 33, count
);
1730 tcg_gen_shr_tl(cpu_tmp4
, cpu_T1
, cpu_tmp4
);
1731 tcg_gen_or_tl(cpu_tmp0
, cpu_tmp0
, cpu_tmp4
);
1734 tcg_gen_subfi_tl(cpu_tmp4
, mask
+ 1, count
);
1735 tcg_gen_shl_tl(cpu_T0
, cpu_T0
, count
);
1736 tcg_gen_shr_tl(cpu_T1
, cpu_T1
, cpu_tmp4
);
1738 tcg_gen_movi_tl(cpu_tmp4
, 0);
1739 tcg_gen_movcond_tl(TCG_COND_EQ
, cpu_T1
, count
, cpu_tmp4
,
1741 tcg_gen_or_tl(cpu_T0
, cpu_T0
, cpu_T1
);
1746 gen_op_st_rm_T0_A0(s
, ot
, op1
);
1748 gen_shift_flags(s
, ot
, cpu_T0
, cpu_tmp0
, count
, is_right
);
1749 tcg_temp_free(count
);
1752 static void gen_shift(DisasContext
*s1
, int op
, TCGMemOp ot
, int d
, int s
)
1755 gen_op_mov_v_reg(ot
, cpu_T1
, s
);
1758 gen_rot_rm_T1(s1
, ot
, d
, 0);
1761 gen_rot_rm_T1(s1
, ot
, d
, 1);
1765 gen_shift_rm_T1(s1
, ot
, d
, 0, 0);
1768 gen_shift_rm_T1(s1
, ot
, d
, 1, 0);
1771 gen_shift_rm_T1(s1
, ot
, d
, 1, 1);
1774 gen_rotc_rm_T1(s1
, ot
, d
, 0);
1777 gen_rotc_rm_T1(s1
, ot
, d
, 1);
1782 static void gen_shifti(DisasContext
*s1
, int op
, TCGMemOp ot
, int d
, int c
)
1786 gen_rot_rm_im(s1
, ot
, d
, c
, 0);
1789 gen_rot_rm_im(s1
, ot
, d
, c
, 1);
1793 gen_shift_rm_im(s1
, ot
, d
, c
, 0, 0);
1796 gen_shift_rm_im(s1
, ot
, d
, c
, 1, 0);
1799 gen_shift_rm_im(s1
, ot
, d
, c
, 1, 1);
1802 /* currently not optimized */
1803 tcg_gen_movi_tl(cpu_T1
, c
);
1804 gen_shift(s1
, op
, ot
, d
, OR_TMP1
);
1809 static void gen_lea_modrm(CPUX86State
*env
, DisasContext
*s
, int modrm
)
1812 int havesib
, base
, index
, scale
;
1813 int mod
, rm
, code
, def_seg
, ovr_seg
;
1817 ovr_seg
= s
->override
;
1818 mod
= (modrm
>> 6) & 3;
1831 code
= cpu_ldub_code(env
, s
->pc
++);
1832 scale
= (code
>> 6) & 3;
1833 index
= ((code
>> 3) & 7) | REX_X(s
);
1835 index
= -1; /* no index */
1843 if ((base
& 7) == 5) {
1845 disp
= (int32_t)cpu_ldl_code(env
, s
->pc
);
1847 if (CODE64(s
) && !havesib
) {
1848 disp
+= s
->pc
+ s
->rip_offset
;
1855 disp
= (int8_t)cpu_ldub_code(env
, s
->pc
++);
1859 disp
= (int32_t)cpu_ldl_code(env
, s
->pc
);
1864 /* For correct popl handling with esp. */
1865 if (base
== R_ESP
&& s
->popl_esp_hack
) {
1866 disp
+= s
->popl_esp_hack
;
1869 /* Compute the address, with a minimum number of TCG ops. */
1873 sum
= cpu_regs
[index
];
1875 tcg_gen_shli_tl(cpu_A0
, cpu_regs
[index
], scale
);
1879 tcg_gen_add_tl(cpu_A0
, sum
, cpu_regs
[base
]);
1882 } else if (base
>= 0) {
1883 sum
= cpu_regs
[base
];
1885 if (TCGV_IS_UNUSED(sum
)) {
1886 tcg_gen_movi_tl(cpu_A0
, disp
);
1888 } else if (disp
!= 0) {
1889 tcg_gen_addi_tl(cpu_A0
, sum
, disp
);
1893 if (base
== R_EBP
|| base
== R_ESP
) {
1902 disp
= cpu_lduw_code(env
, s
->pc
);
1904 tcg_gen_movi_tl(cpu_A0
, disp
);
1908 } else if (mod
== 1) {
1909 disp
= (int8_t)cpu_ldub_code(env
, s
->pc
++);
1911 disp
= (int16_t)cpu_lduw_code(env
, s
->pc
);
1917 tcg_gen_add_tl(cpu_A0
, cpu_regs
[R_EBX
], cpu_regs
[R_ESI
]);
1920 tcg_gen_add_tl(cpu_A0
, cpu_regs
[R_EBX
], cpu_regs
[R_EDI
]);
1923 tcg_gen_add_tl(cpu_A0
, cpu_regs
[R_EBP
], cpu_regs
[R_ESI
]);
1927 tcg_gen_add_tl(cpu_A0
, cpu_regs
[R_EBP
], cpu_regs
[R_EDI
]);
1931 sum
= cpu_regs
[R_ESI
];
1934 sum
= cpu_regs
[R_EDI
];
1937 sum
= cpu_regs
[R_EBP
];
1942 sum
= cpu_regs
[R_EBX
];
1946 tcg_gen_addi_tl(cpu_A0
, sum
, disp
);
1955 gen_lea_v_seg(s
, s
->aflag
, sum
, def_seg
, ovr_seg
);
1958 static void gen_nop_modrm(CPUX86State
*env
, DisasContext
*s
, int modrm
)
1960 int mod
, rm
, base
, code
;
1962 mod
= (modrm
>> 6) & 3;
1973 code
= cpu_ldub_code(env
, s
->pc
++);
2015 /* used for LEA and MOV AX, mem */
2016 static void gen_add_A0_ds_seg(DisasContext
*s
)
2018 gen_lea_v_seg(s
, s
->aflag
, cpu_A0
, R_DS
, s
->override
);
2021 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2023 static void gen_ldst_modrm(CPUX86State
*env
, DisasContext
*s
, int modrm
,
2024 TCGMemOp ot
, int reg
, int is_store
)
2028 mod
= (modrm
>> 6) & 3;
2029 rm
= (modrm
& 7) | REX_B(s
);
2033 gen_op_mov_v_reg(ot
, cpu_T0
, reg
);
2034 gen_op_mov_reg_v(ot
, rm
, cpu_T0
);
2036 gen_op_mov_v_reg(ot
, cpu_T0
, rm
);
2038 gen_op_mov_reg_v(ot
, reg
, cpu_T0
);
2041 gen_lea_modrm(env
, s
, modrm
);
2044 gen_op_mov_v_reg(ot
, cpu_T0
, reg
);
2045 gen_op_st_v(s
, ot
, cpu_T0
, cpu_A0
);
2047 gen_op_ld_v(s
, ot
, cpu_T0
, cpu_A0
);
2049 gen_op_mov_reg_v(ot
, reg
, cpu_T0
);
2054 static inline uint32_t insn_get(CPUX86State
*env
, DisasContext
*s
, TCGMemOp ot
)
2060 ret
= cpu_ldub_code(env
, s
->pc
);
2064 ret
= cpu_lduw_code(env
, s
->pc
);
2068 #ifdef TARGET_X86_64
2071 ret
= cpu_ldl_code(env
, s
->pc
);
2080 static inline int insn_const_size(TCGMemOp ot
)
2089 static inline void gen_goto_tb(DisasContext
*s
, int tb_num
, target_ulong eip
)
2091 TranslationBlock
*tb
;
2094 pc
= s
->cs_base
+ eip
;
2096 /* NOTE: we handle the case where the TB spans two pages here */
2097 if ((pc
& TARGET_PAGE_MASK
) == (tb
->pc
& TARGET_PAGE_MASK
) ||
2098 (pc
& TARGET_PAGE_MASK
) == ((s
->pc
- 1) & TARGET_PAGE_MASK
)) {
2099 /* jump to same page: we can use a direct jump */
2100 tcg_gen_goto_tb(tb_num
);
2102 tcg_gen_exit_tb((uintptr_t)tb
+ tb_num
);
2104 /* jump to another page: currently not optimized */
2110 static inline void gen_jcc(DisasContext
*s
, int b
,
2111 target_ulong val
, target_ulong next_eip
)
2116 l1
= gen_new_label();
2119 gen_goto_tb(s
, 0, next_eip
);
2122 gen_goto_tb(s
, 1, val
);
2123 s
->is_jmp
= DISAS_TB_JUMP
;
2125 l1
= gen_new_label();
2126 l2
= gen_new_label();
2129 gen_jmp_im(next_eip
);
2139 static void gen_cmovcc1(CPUX86State
*env
, DisasContext
*s
, TCGMemOp ot
, int b
,
2144 gen_ldst_modrm(env
, s
, modrm
, ot
, OR_TMP0
, 0);
2146 cc
= gen_prepare_cc(s
, b
, cpu_T1
);
2147 if (cc
.mask
!= -1) {
2148 TCGv t0
= tcg_temp_new();
2149 tcg_gen_andi_tl(t0
, cc
.reg
, cc
.mask
);
2153 cc
.reg2
= tcg_const_tl(cc
.imm
);
2156 tcg_gen_movcond_tl(cc
.cond
, cpu_T0
, cc
.reg
, cc
.reg2
,
2157 cpu_T0
, cpu_regs
[reg
]);
2158 gen_op_mov_reg_v(ot
, reg
, cpu_T0
);
2160 if (cc
.mask
!= -1) {
2161 tcg_temp_free(cc
.reg
);
2164 tcg_temp_free(cc
.reg2
);
2168 static inline void gen_op_movl_T0_seg(int seg_reg
)
2170 tcg_gen_ld32u_tl(cpu_T0
, cpu_env
,
2171 offsetof(CPUX86State
,segs
[seg_reg
].selector
));
2174 static inline void gen_op_movl_seg_T0_vm(int seg_reg
)
2176 tcg_gen_ext16u_tl(cpu_T0
, cpu_T0
);
2177 tcg_gen_st32_tl(cpu_T0
, cpu_env
,
2178 offsetof(CPUX86State
,segs
[seg_reg
].selector
));
2179 tcg_gen_shli_tl(cpu_seg_base
[seg_reg
], cpu_T0
, 4);
2182 /* move T0 to seg_reg and compute if the CPU state may change. Never
2183 call this function with seg_reg == R_CS */
2184 static void gen_movl_seg_T0(DisasContext
*s
, int seg_reg
)
2186 if (s
->pe
&& !s
->vm86
) {
2187 tcg_gen_trunc_tl_i32(cpu_tmp2_i32
, cpu_T0
);
2188 gen_helper_load_seg(cpu_env
, tcg_const_i32(seg_reg
), cpu_tmp2_i32
);
2189 /* abort translation because the addseg value may change or
2190 because ss32 may change. For R_SS, translation must always
2191 stop as a special handling must be done to disable hardware
2192 interrupts for the next instruction */
2193 if (seg_reg
== R_SS
|| (s
->code32
&& seg_reg
< R_FS
))
2194 s
->is_jmp
= DISAS_TB_JUMP
;
2196 gen_op_movl_seg_T0_vm(seg_reg
);
2197 if (seg_reg
== R_SS
)
2198 s
->is_jmp
= DISAS_TB_JUMP
;
2202 static inline int svm_is_rep(int prefixes
)
2204 return ((prefixes
& (PREFIX_REPZ
| PREFIX_REPNZ
)) ?
8 : 0);
2208 gen_svm_check_intercept_param(DisasContext
*s
, target_ulong pc_start
,
2209 uint32_t type
, uint64_t param
)
2211 /* no SVM activated; fast case */
2212 if (likely(!(s
->flags
& HF_SVMI_MASK
)))
2214 gen_update_cc_op(s
);
2215 gen_jmp_im(pc_start
- s
->cs_base
);
2216 gen_helper_svm_check_intercept_param(cpu_env
, tcg_const_i32(type
),
2217 tcg_const_i64(param
));
2221 gen_svm_check_intercept(DisasContext
*s
, target_ulong pc_start
, uint64_t type
)
2223 gen_svm_check_intercept_param(s
, pc_start
, type
, 0);
2226 static inline void gen_stack_update(DisasContext
*s
, int addend
)
2228 gen_op_add_reg_im(mo_stacksize(s
), R_ESP
, addend
);
2231 /* Generate a push. It depends on ss32, addseg and dflag. */
2232 static void gen_push_v(DisasContext
*s
, TCGv val
)
2234 TCGMemOp d_ot
= mo_pushpop(s
, s
->dflag
);
2235 TCGMemOp a_ot
= mo_stacksize(s
);
2236 int size
= 1 << d_ot
;
2237 TCGv new_esp
= cpu_A0
;
2239 tcg_gen_subi_tl(cpu_A0
, cpu_regs
[R_ESP
], size
);
2244 tcg_gen_mov_tl(new_esp
, cpu_A0
);
2246 gen_lea_v_seg(s
, a_ot
, cpu_A0
, R_SS
, -1);
2249 gen_op_st_v(s
, d_ot
, val
, cpu_A0
);
2250 gen_op_mov_reg_v(a_ot
, R_ESP
, new_esp
);
2253 /* two step pop is necessary for precise exceptions */
2254 static TCGMemOp
gen_pop_T0(DisasContext
*s
)
2256 TCGMemOp d_ot
= mo_pushpop(s
, s
->dflag
);
2258 gen_lea_v_seg(s
, mo_stacksize(s
), cpu_regs
[R_ESP
], R_SS
, -1);
2259 gen_op_ld_v(s
, d_ot
, cpu_T0
, cpu_A0
);
2264 static inline void gen_pop_update(DisasContext
*s
, TCGMemOp ot
)
2266 gen_stack_update(s
, 1 << ot
);
2269 static inline void gen_stack_A0(DisasContext
*s
)
2271 gen_lea_v_seg(s
, s
->ss32 ? MO_32
: MO_16
, cpu_regs
[R_ESP
], R_SS
, -1);
2274 static void gen_pusha(DisasContext
*s
)
2276 TCGMemOp s_ot
= s
->ss32 ? MO_32
: MO_16
;
2277 TCGMemOp d_ot
= s
->dflag
;
2278 int size
= 1 << d_ot
;
2281 for (i
= 0; i
< 8; i
++) {
2282 tcg_gen_addi_tl(cpu_A0
, cpu_regs
[R_ESP
], (i
- 8) * size
);
2283 gen_lea_v_seg(s
, s_ot
, cpu_A0
, R_SS
, -1);
2284 gen_op_st_v(s
, d_ot
, cpu_regs
[7 - i
], cpu_A0
);
2287 gen_stack_update(s
, -8 * size
);
2290 static void gen_popa(DisasContext
*s
)
2292 TCGMemOp s_ot
= s
->ss32 ? MO_32
: MO_16
;
2293 TCGMemOp d_ot
= s
->dflag
;
2294 int size
= 1 << d_ot
;
2297 for (i
= 0; i
< 8; i
++) {
2298 /* ESP is not reloaded */
2299 if (7 - i
== R_ESP
) {
2302 tcg_gen_addi_tl(cpu_A0
, cpu_regs
[R_ESP
], i
* size
);
2303 gen_lea_v_seg(s
, s_ot
, cpu_A0
, R_SS
, -1);
2304 gen_op_ld_v(s
, d_ot
, cpu_T0
, cpu_A0
);
2305 gen_op_mov_reg_v(d_ot
, 7 - i
, cpu_T0
);
2308 gen_stack_update(s
, 8 * size
);
2311 static void gen_enter(DisasContext
*s
, int esp_addend
, int level
)
2313 TCGMemOp d_ot
= mo_pushpop(s
, s
->dflag
);
2314 TCGMemOp a_ot
= CODE64(s
) ? MO_64
: s
->ss32 ? MO_32
: MO_16
;
2315 int size
= 1 << d_ot
;
2317 /* Push BP; compute FrameTemp into T1. */
2318 tcg_gen_subi_tl(cpu_T1
, cpu_regs
[R_ESP
], size
);
2319 gen_lea_v_seg(s
, a_ot
, cpu_T1
, R_SS
, -1);
2320 gen_op_st_v(s
, d_ot
, cpu_regs
[R_EBP
], cpu_A0
);
2326 /* Copy level-1 pointers from the previous frame. */
2327 for (i
= 1; i
< level
; ++i
) {
2328 tcg_gen_subi_tl(cpu_A0
, cpu_regs
[R_EBP
], size
* i
);
2329 gen_lea_v_seg(s
, a_ot
, cpu_A0
, R_SS
, -1);
2330 gen_op_ld_v(s
, d_ot
, cpu_tmp0
, cpu_A0
);
2332 tcg_gen_subi_tl(cpu_A0
, cpu_T1
, size
* i
);
2333 gen_lea_v_seg(s
, a_ot
, cpu_A0
, R_SS
, -1);
2334 gen_op_st_v(s
, d_ot
, cpu_tmp0
, cpu_A0
);
2337 /* Push the current FrameTemp as the last level. */
2338 tcg_gen_subi_tl(cpu_A0
, cpu_T1
, size
* level
);
2339 gen_lea_v_seg(s
, a_ot
, cpu_A0
, R_SS
, -1);
2340 gen_op_st_v(s
, d_ot
, cpu_T1
, cpu_A0
);
2343 /* Copy the FrameTemp value to EBP. */
2344 gen_op_mov_reg_v(a_ot
, R_EBP
, cpu_T1
);
2346 /* Compute the final value of ESP. */
2347 tcg_gen_subi_tl(cpu_T1
, cpu_T1
, esp_addend
+ size
* level
);
2348 gen_op_mov_reg_v(a_ot
, R_ESP
, cpu_T1
);
2351 static void gen_leave(DisasContext
*s
)
2353 TCGMemOp d_ot
= mo_pushpop(s
, s
->dflag
);
2354 TCGMemOp a_ot
= mo_stacksize(s
);
2356 gen_lea_v_seg(s
, a_ot
, cpu_regs
[R_EBP
], R_SS
, -1);
2357 gen_op_ld_v(s
, d_ot
, cpu_T0
, cpu_A0
);
2359 tcg_gen_addi_tl(cpu_T1
, cpu_regs
[R_EBP
], 1 << d_ot
);
2361 gen_op_mov_reg_v(d_ot
, R_EBP
, cpu_T0
);
2362 gen_op_mov_reg_v(a_ot
, R_ESP
, cpu_T1
);
2365 static void gen_exception(DisasContext
*s
, int trapno
, target_ulong cur_eip
)
2367 gen_update_cc_op(s
);
2368 gen_jmp_im(cur_eip
);
2369 gen_helper_raise_exception(cpu_env
, tcg_const_i32(trapno
));
2370 s
->is_jmp
= DISAS_TB_JUMP
;
2373 /* an interrupt is different from an exception because of the
2375 static void gen_interrupt(DisasContext
*s
, int intno
,
2376 target_ulong cur_eip
, target_ulong next_eip
)
2378 gen_update_cc_op(s
);
2379 gen_jmp_im(cur_eip
);
2380 gen_helper_raise_interrupt(cpu_env
, tcg_const_i32(intno
),
2381 tcg_const_i32(next_eip
- cur_eip
));
2382 s
->is_jmp
= DISAS_TB_JUMP
;
2385 static void gen_debug(DisasContext
*s
, target_ulong cur_eip
)
2387 gen_update_cc_op(s
);
2388 gen_jmp_im(cur_eip
);
2389 gen_helper_debug(cpu_env
);
2390 s
->is_jmp
= DISAS_TB_JUMP
;
2393 /* generate a generic end of block. Trace exception is also generated
2395 static void gen_eob(DisasContext
*s
)
2397 gen_update_cc_op(s
);
2398 if (s
->tb
->flags
& HF_INHIBIT_IRQ_MASK
) {
2399 gen_helper_reset_inhibit_irq(cpu_env
);
2401 if (s
->tb
->flags
& HF_RF_MASK
) {
2402 gen_helper_reset_rf(cpu_env
);
2404 if (s
->singlestep_enabled
) {
2405 gen_helper_debug(cpu_env
);
2407 gen_helper_single_step(cpu_env
);
2411 s
->is_jmp
= DISAS_TB_JUMP
;
2414 /* generate a jump to eip. No segment change must happen before as a
2415 direct call to the next block may occur */
2416 static void gen_jmp_tb(DisasContext
*s
, target_ulong eip
, int tb_num
)
2418 gen_update_cc_op(s
);
2419 set_cc_op(s
, CC_OP_DYNAMIC
);
2421 gen_goto_tb(s
, tb_num
, eip
);
2422 s
->is_jmp
= DISAS_TB_JUMP
;
2429 static void gen_jmp(DisasContext
*s
, target_ulong eip
)
2431 gen_jmp_tb(s
, eip
, 0);
2434 static inline void gen_ldq_env_A0(DisasContext
*s
, int offset
)
2436 tcg_gen_qemu_ld_i64(cpu_tmp1_i64
, cpu_A0
, s
->mem_index
, MO_LEQ
);
2437 tcg_gen_st_i64(cpu_tmp1_i64
, cpu_env
, offset
);
2440 static inline void gen_stq_env_A0(DisasContext
*s
, int offset
)
2442 tcg_gen_ld_i64(cpu_tmp1_i64
, cpu_env
, offset
);
2443 tcg_gen_qemu_st_i64(cpu_tmp1_i64
, cpu_A0
, s
->mem_index
, MO_LEQ
);
2446 static inline void gen_ldo_env_A0(DisasContext
*s
, int offset
)
2448 int mem_index
= s
->mem_index
;
2449 tcg_gen_qemu_ld_i64(cpu_tmp1_i64
, cpu_A0
, mem_index
, MO_LEQ
);
2450 tcg_gen_st_i64(cpu_tmp1_i64
, cpu_env
, offset
+ offsetof(ZMMReg
, ZMM_Q(0)));
2451 tcg_gen_addi_tl(cpu_tmp0
, cpu_A0
, 8);
2452 tcg_gen_qemu_ld_i64(cpu_tmp1_i64
, cpu_tmp0
, mem_index
, MO_LEQ
);
2453 tcg_gen_st_i64(cpu_tmp1_i64
, cpu_env
, offset
+ offsetof(ZMMReg
, ZMM_Q(1)));
2456 static inline void gen_sto_env_A0(DisasContext
*s
, int offset
)
2458 int mem_index
= s
->mem_index
;
2459 tcg_gen_ld_i64(cpu_tmp1_i64
, cpu_env
, offset
+ offsetof(ZMMReg
, ZMM_Q(0)));
2460 tcg_gen_qemu_st_i64(cpu_tmp1_i64
, cpu_A0
, mem_index
, MO_LEQ
);
2461 tcg_gen_addi_tl(cpu_tmp0
, cpu_A0
, 8);
2462 tcg_gen_ld_i64(cpu_tmp1_i64
, cpu_env
, offset
+ offsetof(ZMMReg
, ZMM_Q(1)));
2463 tcg_gen_qemu_st_i64(cpu_tmp1_i64
, cpu_tmp0
, mem_index
, MO_LEQ
);
2466 static inline void gen_op_movo(int d_offset
, int s_offset
)
2468 tcg_gen_ld_i64(cpu_tmp1_i64
, cpu_env
, s_offset
+ offsetof(ZMMReg
, ZMM_Q(0)));
2469 tcg_gen_st_i64(cpu_tmp1_i64
, cpu_env
, d_offset
+ offsetof(ZMMReg
, ZMM_Q(0)));
2470 tcg_gen_ld_i64(cpu_tmp1_i64
, cpu_env
, s_offset
+ offsetof(ZMMReg
, ZMM_Q(1)));
2471 tcg_gen_st_i64(cpu_tmp1_i64
, cpu_env
, d_offset
+ offsetof(ZMMReg
, ZMM_Q(1)));
2474 static inline void gen_op_movq(int d_offset
, int s_offset
)
2476 tcg_gen_ld_i64(cpu_tmp1_i64
, cpu_env
, s_offset
);
2477 tcg_gen_st_i64(cpu_tmp1_i64
, cpu_env
, d_offset
);
2480 static inline void gen_op_movl(int d_offset
, int s_offset
)
2482 tcg_gen_ld_i32(cpu_tmp2_i32
, cpu_env
, s_offset
);
2483 tcg_gen_st_i32(cpu_tmp2_i32
, cpu_env
, d_offset
);
2486 static inline void gen_op_movq_env_0(int d_offset
)
2488 tcg_gen_movi_i64(cpu_tmp1_i64
, 0);
2489 tcg_gen_st_i64(cpu_tmp1_i64
, cpu_env
, d_offset
);
2492 typedef void (*SSEFunc_i_ep
)(TCGv_i32 val
, TCGv_ptr env
, TCGv_ptr reg
);
2493 typedef void (*SSEFunc_l_ep
)(TCGv_i64 val
, TCGv_ptr env
, TCGv_ptr reg
);
2494 typedef void (*SSEFunc_0_epi
)(TCGv_ptr env
, TCGv_ptr reg
, TCGv_i32 val
);
2495 typedef void (*SSEFunc_0_epl
)(TCGv_ptr env
, TCGv_ptr reg
, TCGv_i64 val
);
2496 typedef void (*SSEFunc_0_epp
)(TCGv_ptr env
, TCGv_ptr reg_a
, TCGv_ptr reg_b
);
2497 typedef void (*SSEFunc_0_eppi
)(TCGv_ptr env
, TCGv_ptr reg_a
, TCGv_ptr reg_b
,
2499 typedef void (*SSEFunc_0_ppi
)(TCGv_ptr reg_a
, TCGv_ptr reg_b
, TCGv_i32 val
);
2500 typedef void (*SSEFunc_0_eppt
)(TCGv_ptr env
, TCGv_ptr reg_a
, TCGv_ptr reg_b
,
2503 #define SSE_SPECIAL ((void *)1)
2504 #define SSE_DUMMY ((void *)2)
2506 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2507 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2508 gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2510 static const SSEFunc_0_epp sse_op_table1
[256][4] = {
2511 /* 3DNow! extensions */
2512 [0x0e] = { SSE_DUMMY
}, /* femms */
2513 [0x0f] = { SSE_DUMMY
}, /* pf... */
2514 /* pure SSE operations */
2515 [0x10] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* movups, movupd, movss, movsd */
2516 [0x11] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* movups, movupd, movss, movsd */
2517 [0x12] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* movlps, movlpd, movsldup, movddup */
2518 [0x13] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* movlps, movlpd */
2519 [0x14] = { gen_helper_punpckldq_xmm
, gen_helper_punpcklqdq_xmm
},
2520 [0x15] = { gen_helper_punpckhdq_xmm
, gen_helper_punpckhqdq_xmm
},
2521 [0x16] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* movhps, movhpd, movshdup */
2522 [0x17] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* movhps, movhpd */
2524 [0x28] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* movaps, movapd */
2525 [0x29] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* movaps, movapd */
2526 [0x2a] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2527 [0x2b] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* movntps, movntpd, movntss, movntsd */
2528 [0x2c] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2529 [0x2d] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2530 [0x2e] = { gen_helper_ucomiss
, gen_helper_ucomisd
},
2531 [0x2f] = { gen_helper_comiss
, gen_helper_comisd
},
2532 [0x50] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* movmskps, movmskpd */
2533 [0x51] = SSE_FOP(sqrt
),
2534 [0x52] = { gen_helper_rsqrtps
, NULL
, gen_helper_rsqrtss
, NULL
},
2535 [0x53] = { gen_helper_rcpps
, NULL
, gen_helper_rcpss
, NULL
},
2536 [0x54] = { gen_helper_pand_xmm
, gen_helper_pand_xmm
}, /* andps, andpd */
2537 [0x55] = { gen_helper_pandn_xmm
, gen_helper_pandn_xmm
}, /* andnps, andnpd */
2538 [0x56] = { gen_helper_por_xmm
, gen_helper_por_xmm
}, /* orps, orpd */
2539 [0x57] = { gen_helper_pxor_xmm
, gen_helper_pxor_xmm
}, /* xorps, xorpd */
2540 [0x58] = SSE_FOP(add
),
2541 [0x59] = SSE_FOP(mul
),
2542 [0x5a] = { gen_helper_cvtps2pd
, gen_helper_cvtpd2ps
,
2543 gen_helper_cvtss2sd
, gen_helper_cvtsd2ss
},
2544 [0x5b] = { gen_helper_cvtdq2ps
, gen_helper_cvtps2dq
, gen_helper_cvttps2dq
},
2545 [0x5c] = SSE_FOP(sub
),
2546 [0x5d] = SSE_FOP(min
),
2547 [0x5e] = SSE_FOP(div
),
2548 [0x5f] = SSE_FOP(max
),
2550 [0xc2] = SSE_FOP(cmpeq
),
2551 [0xc6] = { (SSEFunc_0_epp
)gen_helper_shufps
,
2552 (SSEFunc_0_epp
)gen_helper_shufpd
}, /* XXX: casts */
2554 /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */
2555 [0x38] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
},
2556 [0x3a] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
},
2558 /* MMX ops and their SSE extensions */
2559 [0x60] = MMX_OP2(punpcklbw
),
2560 [0x61] = MMX_OP2(punpcklwd
),
2561 [0x62] = MMX_OP2(punpckldq
),
2562 [0x63] = MMX_OP2(packsswb
),
2563 [0x64] = MMX_OP2(pcmpgtb
),
2564 [0x65] = MMX_OP2(pcmpgtw
),
2565 [0x66] = MMX_OP2(pcmpgtl
),
2566 [0x67] = MMX_OP2(packuswb
),
2567 [0x68] = MMX_OP2(punpckhbw
),
2568 [0x69] = MMX_OP2(punpckhwd
),
2569 [0x6a] = MMX_OP2(punpckhdq
),
2570 [0x6b] = MMX_OP2(packssdw
),
2571 [0x6c] = { NULL
, gen_helper_punpcklqdq_xmm
},
2572 [0x6d] = { NULL
, gen_helper_punpckhqdq_xmm
},
2573 [0x6e] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* movd mm, ea */
2574 [0x6f] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* movq, movdqa, , movqdu */
2575 [0x70] = { (SSEFunc_0_epp
)gen_helper_pshufw_mmx
,
2576 (SSEFunc_0_epp
)gen_helper_pshufd_xmm
,
2577 (SSEFunc_0_epp
)gen_helper_pshufhw_xmm
,
2578 (SSEFunc_0_epp
)gen_helper_pshuflw_xmm
}, /* XXX: casts */
2579 [0x71] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* shiftw */
2580 [0x72] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* shiftd */
2581 [0x73] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* shiftq */
2582 [0x74] = MMX_OP2(pcmpeqb
),
2583 [0x75] = MMX_OP2(pcmpeqw
),
2584 [0x76] = MMX_OP2(pcmpeql
),
2585 [0x77] = { SSE_DUMMY
}, /* emms */
2586 [0x78] = { NULL
, SSE_SPECIAL
, NULL
, SSE_SPECIAL
}, /* extrq_i, insertq_i */
2587 [0x79] = { NULL
, gen_helper_extrq_r
, NULL
, gen_helper_insertq_r
},
2588 [0x7c] = { NULL
, gen_helper_haddpd
, NULL
, gen_helper_haddps
},
2589 [0x7d] = { NULL
, gen_helper_hsubpd
, NULL
, gen_helper_hsubps
},
2590 [0x7e] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* movd, movd, , movq */
2591 [0x7f] = { SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
}, /* movq, movdqa, movdqu */
2592 [0xc4] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* pinsrw */
2593 [0xc5] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* pextrw */
2594 [0xd0] = { NULL
, gen_helper_addsubpd
, NULL
, gen_helper_addsubps
},
2595 [0xd1] = MMX_OP2(psrlw
),
2596 [0xd2] = MMX_OP2(psrld
),
2597 [0xd3] = MMX_OP2(psrlq
),
2598 [0xd4] = MMX_OP2(paddq
),
2599 [0xd5] = MMX_OP2(pmullw
),
2600 [0xd6] = { NULL
, SSE_SPECIAL
, SSE_SPECIAL
, SSE_SPECIAL
},
2601 [0xd7] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* pmovmskb */
2602 [0xd8] = MMX_OP2(psubusb
),
2603 [0xd9] = MMX_OP2(psubusw
),
2604 [0xda] = MMX_OP2(pminub
),
2605 [0xdb] = MMX_OP2(pand
),
2606 [0xdc] = MMX_OP2(paddusb
),
2607 [0xdd] = MMX_OP2(paddusw
),
2608 [0xde] = MMX_OP2(pmaxub
),
2609 [0xdf] = MMX_OP2(pandn
),
2610 [0xe0] = MMX_OP2(pavgb
),
2611 [0xe1] = MMX_OP2(psraw
),
2612 [0xe2] = MMX_OP2(psrad
),
2613 [0xe3] = MMX_OP2(pavgw
),
2614 [0xe4] = MMX_OP2(pmulhuw
),
2615 [0xe5] = MMX_OP2(pmulhw
),
2616 [0xe6] = { NULL
, gen_helper_cvttpd2dq
, gen_helper_cvtdq2pd
, gen_helper_cvtpd2dq
},
2617 [0xe7] = { SSE_SPECIAL
, SSE_SPECIAL
}, /* movntq, movntq */
2618 [0xe8] = MMX_OP2(psubsb
),
2619 [0xe9] = MMX_OP2(psubsw
),
2620 [0xea] = MMX_OP2(pminsw
),
2621 [0xeb] = MMX_OP2(por
),
2622 [0xec] = MMX_OP2(paddsb
),
2623 [0xed] = MMX_OP2(paddsw
),
2624 [0xee] = MMX_OP2(pmaxsw
),
2625 [0xef] = MMX_OP2(pxor
),
2626 [0xf0] = { NULL
, NULL
, NULL
, SSE_SPECIAL
}, /* lddqu */
2627 [0xf1] = MMX_OP2(psllw
),
2628 [0xf2] = MMX_OP2(pslld
),
2629 [0xf3] = MMX_OP2(psllq
),
2630 [0xf4] = MMX_OP2(pmuludq
),
2631 [0xf5] = MMX_OP2(pmaddwd
),
2632 [0xf6] = MMX_OP2(psadbw
),
2633 [0xf7] = { (SSEFunc_0_epp
)gen_helper_maskmov_mmx
,
2634 (SSEFunc_0_epp
)gen_helper_maskmov_xmm
}, /* XXX: casts */
2635 [0xf8] = MMX_OP2(psubb
),
2636 [0xf9] = MMX_OP2(psubw
),
2637 [0xfa] = MMX_OP2(psubl
),
2638 [0xfb] = MMX_OP2(psubq
),
2639 [0xfc] = MMX_OP2(paddb
),
2640 [0xfd] = MMX_OP2(paddw
),
2641 [0xfe] = MMX_OP2(paddl
),
2644 static const SSEFunc_0_epp sse_op_table2
[3 * 8][2] = {
2645 [0 + 2] = MMX_OP2(psrlw
),
2646 [0 + 4] = MMX_OP2(psraw
),
2647 [0 + 6] = MMX_OP2(psllw
),
2648 [8 + 2] = MMX_OP2(psrld
),
2649 [8 + 4] = MMX_OP2(psrad
),
2650 [8 + 6] = MMX_OP2(pslld
),
2651 [16 + 2] = MMX_OP2(psrlq
),
2652 [16 + 3] = { NULL
, gen_helper_psrldq_xmm
},
2653 [16 + 6] = MMX_OP2(psllq
),
2654 [16 + 7] = { NULL
, gen_helper_pslldq_xmm
},
2657 static const SSEFunc_0_epi sse_op_table3ai
[] = {
2658 gen_helper_cvtsi2ss
,
2662 #ifdef TARGET_X86_64
2663 static const SSEFunc_0_epl sse_op_table3aq
[] = {
2664 gen_helper_cvtsq2ss
,
2669 static const SSEFunc_i_ep sse_op_table3bi
[] = {
2670 gen_helper_cvttss2si
,
2671 gen_helper_cvtss2si
,
2672 gen_helper_cvttsd2si
,
2676 #ifdef TARGET_X86_64
2677 static const SSEFunc_l_ep sse_op_table3bq
[] = {
2678 gen_helper_cvttss2sq
,
2679 gen_helper_cvtss2sq
,
2680 gen_helper_cvttsd2sq
,
2685 static const SSEFunc_0_epp sse_op_table4
[8][4] = {
2696 static const SSEFunc_0_epp sse_op_table5
[256] = {
2697 [0x0c] = gen_helper_pi2fw
,
2698 [0x0d] = gen_helper_pi2fd
,
2699 [0x1c] = gen_helper_pf2iw
,
2700 [0x1d] = gen_helper_pf2id
,
2701 [0x8a] = gen_helper_pfnacc
,
2702 [0x8e] = gen_helper_pfpnacc
,
2703 [0x90] = gen_helper_pfcmpge
,
2704 [0x94] = gen_helper_pfmin
,
2705 [0x96] = gen_helper_pfrcp
,
2706 [0x97] = gen_helper_pfrsqrt
,
2707 [0x9a] = gen_helper_pfsub
,
2708 [0x9e] = gen_helper_pfadd
,
2709 [0xa0] = gen_helper_pfcmpgt
,
2710 [0xa4] = gen_helper_pfmax
,
2711 [0xa6] = gen_helper_movq
, /* pfrcpit1; no need to actually increase precision */
2712 [0xa7] = gen_helper_movq
, /* pfrsqit1 */
2713 [0xaa] = gen_helper_pfsubr
,
2714 [0xae] = gen_helper_pfacc
,
2715 [0xb0] = gen_helper_pfcmpeq
,
2716 [0xb4] = gen_helper_pfmul
,
2717 [0xb6] = gen_helper_movq
, /* pfrcpit2 */
2718 [0xb7] = gen_helper_pmulhrw_mmx
,
2719 [0xbb] = gen_helper_pswapd
,
2720 [0xbf] = gen_helper_pavgb_mmx
/* pavgusb */
2723 struct SSEOpHelper_epp
{
2724 SSEFunc_0_epp op
[2];
2728 struct SSEOpHelper_eppi
{
2729 SSEFunc_0_eppi op
[2];
2733 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2734 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2735 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2736 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2737 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2738 CPUID_EXT_PCLMULQDQ }
2739 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2741 static const struct SSEOpHelper_epp sse_op_table6
[256] = {
2742 [0x00] = SSSE3_OP(pshufb
),
2743 [0x01] = SSSE3_OP(phaddw
),
2744 [0x02] = SSSE3_OP(phaddd
),
2745 [0x03] = SSSE3_OP(phaddsw
),
2746 [0x04] = SSSE3_OP(pmaddubsw
),
2747 [0x05] = SSSE3_OP(phsubw
),
2748 [0x06] = SSSE3_OP(phsubd
),
2749 [0x07] = SSSE3_OP(phsubsw
),
2750 [0x08] = SSSE3_OP(psignb
),
2751 [0x09] = SSSE3_OP(psignw
),
2752 [0x0a] = SSSE3_OP(psignd
),
2753 [0x0b] = SSSE3_OP(pmulhrsw
),
2754 [0x10] = SSE41_OP(pblendvb
),
2755 [0x14] = SSE41_OP(blendvps
),
2756 [0x15] = SSE41_OP(blendvpd
),
2757 [0x17] = SSE41_OP(ptest
),
2758 [0x1c] = SSSE3_OP(pabsb
),
2759 [0x1d] = SSSE3_OP(pabsw
),
2760 [0x1e] = SSSE3_OP(pabsd
),
2761 [0x20] = SSE41_OP(pmovsxbw
),
2762 [0x21] = SSE41_OP(pmovsxbd
),
2763 [0x22] = SSE41_OP(pmovsxbq
),
2764 [0x23] = SSE41_OP(pmovsxwd
),
2765 [0x24] = SSE41_OP(pmovsxwq
),
2766 [0x25] = SSE41_OP(pmovsxdq
),
2767 [0x28] = SSE41_OP(pmuldq
),
2768 [0x29] = SSE41_OP(pcmpeqq
),
2769 [0x2a] = SSE41_SPECIAL
, /* movntqda */
2770 [0x2b] = SSE41_OP(packusdw
),
2771 [0x30] = SSE41_OP(pmovzxbw
),
2772 [0x31] = SSE41_OP(pmovzxbd
),
2773 [0x32] = SSE41_OP(pmovzxbq
),
2774 [0x33] = SSE41_OP(pmovzxwd
),
2775 [0x34] = SSE41_OP(pmovzxwq
),
2776 [0x35] = SSE41_OP(pmovzxdq
),
2777 [0x37] = SSE42_OP(pcmpgtq
),
2778 [0x38] = SSE41_OP(pminsb
),
2779 [0x39] = SSE41_OP(pminsd
),
2780 [0x3a] = SSE41_OP(pminuw
),
2781 [0x3b] = SSE41_OP(pminud
),
2782 [0x3c] = SSE41_OP(pmaxsb
),
2783 [0x3d] = SSE41_OP(pmaxsd
),
2784 [0x3e] = SSE41_OP(pmaxuw
),
2785 [0x3f] = SSE41_OP(pmaxud
),
2786 [0x40] = SSE41_OP(pmulld
),
2787 [0x41] = SSE41_OP(phminposuw
),
2788 [0xdb] = AESNI_OP(aesimc
),
2789 [0xdc] = AESNI_OP(aesenc
),
2790 [0xdd] = AESNI_OP(aesenclast
),
2791 [0xde] = AESNI_OP(aesdec
),
2792 [0xdf] = AESNI_OP(aesdeclast
),
2795 static const struct SSEOpHelper_eppi sse_op_table7
[256] = {
2796 [0x08] = SSE41_OP(roundps
),
2797 [0x09] = SSE41_OP(roundpd
),
2798 [0x0a] = SSE41_OP(roundss
),
2799 [0x0b] = SSE41_OP(roundsd
),
2800 [0x0c] = SSE41_OP(blendps
),
2801 [0x0d] = SSE41_OP(blendpd
),
2802 [0x0e] = SSE41_OP(pblendw
),
2803 [0x0f] = SSSE3_OP(palignr
),
2804 [0x14] = SSE41_SPECIAL
, /* pextrb */
2805 [0x15] = SSE41_SPECIAL
, /* pextrw */
2806 [0x16] = SSE41_SPECIAL
, /* pextrd/pextrq */
2807 [0x17] = SSE41_SPECIAL
, /* extractps */
2808 [0x20] = SSE41_SPECIAL
, /* pinsrb */
2809 [0x21] = SSE41_SPECIAL
, /* insertps */
2810 [0x22] = SSE41_SPECIAL
, /* pinsrd/pinsrq */
2811 [0x40] = SSE41_OP(dpps
),
2812 [0x41] = SSE41_OP(dppd
),
2813 [0x42] = SSE41_OP(mpsadbw
),
2814 [0x44] = PCLMULQDQ_OP(pclmulqdq
),
2815 [0x60] = SSE42_OP(pcmpestrm
),
2816 [0x61] = SSE42_OP(pcmpestri
),
2817 [0x62] = SSE42_OP(pcmpistrm
),
2818 [0x63] = SSE42_OP(pcmpistri
),
2819 [0xdf] = AESNI_OP(aeskeygenassist
),
2822 static void gen_sse(CPUX86State
*env
, DisasContext
*s
, int b
,
2823 target_ulong pc_start
, int rex_r
)
2825 int b1
, op1_offset
, op2_offset
, is_xmm
, val
;
2826 int modrm
, mod
, rm
, reg
;
2827 SSEFunc_0_epp sse_fn_epp
;
2828 SSEFunc_0_eppi sse_fn_eppi
;
2829 SSEFunc_0_ppi sse_fn_ppi
;
2830 SSEFunc_0_eppt sse_fn_eppt
;
2834 if (s
->prefix
& PREFIX_DATA
)
2836 else if (s
->prefix
& PREFIX_REPZ
)
2838 else if (s
->prefix
& PREFIX_REPNZ
)
2842 sse_fn_epp
= sse_op_table1
[b
][b1
];
2846 if ((b
<= 0x5f && b
>= 0x10) || b
== 0xc6 || b
== 0xc2) {
2856 /* simple MMX/SSE operation */
2857 if (s
->flags
& HF_TS_MASK
) {
2858 gen_exception(s
, EXCP07_PREX
, pc_start
- s
->cs_base
);
2861 if (s
->flags
& HF_EM_MASK
) {
2863 gen_exception(s
, EXCP06_ILLOP
, pc_start
- s
->cs_base
);
2866 if (is_xmm
&& !(s
->flags
& HF_OSFXSR_MASK
))
2867 if ((b
!= 0x38 && b
!= 0x3a) || (s
->prefix
& PREFIX_DATA
))
2870 if (!(s
->cpuid_ext2_features
& CPUID_EXT2_3DNOW
))
2873 gen_helper_emms(cpu_env
);
2878 gen_helper_emms(cpu_env
);
2881 /* prepare MMX state (XXX: optimize by storing fptt and fptags in
2882 the static cpu state) */
2884 gen_helper_enter_mmx(cpu_env
);
2887 modrm
= cpu_ldub_code(env
, s
->pc
++);
2888 reg
= ((modrm
>> 3) & 7);
2891 mod
= (modrm
>> 6) & 3;
2892 if (sse_fn_epp
== SSE_SPECIAL
) {
2895 case 0x0e7: /* movntq */
2898 gen_lea_modrm(env
, s
, modrm
);
2899 gen_stq_env_A0(s
, offsetof(CPUX86State
, fpregs
[reg
].mmx
));
2901 case 0x1e7: /* movntdq */
2902 case 0x02b: /* movntps */
2903 case 0x12b: /* movntps */
2906 gen_lea_modrm(env
, s
, modrm
);
2907 gen_sto_env_A0(s
, offsetof(CPUX86State
, xmm_regs
[reg
]));
2909 case 0x3f0: /* lddqu */
2912 gen_lea_modrm(env
, s
, modrm
);
2913 gen_ldo_env_A0(s
, offsetof(CPUX86State
, xmm_regs
[reg
]));
2915 case 0x22b: /* movntss */
2916 case 0x32b: /* movntsd */
2919 gen_lea_modrm(env
, s
, modrm
);
2921 gen_stq_env_A0(s
, offsetof(CPUX86State
,
2922 xmm_regs
[reg
].ZMM_Q(0)));
2924 tcg_gen_ld32u_tl(cpu_T0
, cpu_env
, offsetof(CPUX86State
,
2925 xmm_regs
[reg
].ZMM_L(0)));
2926 gen_op_st_v(s
, MO_32
, cpu_T0
, cpu_A0
);
2929 case 0x6e: /* movd mm, ea */
2930 #ifdef TARGET_X86_64
2931 if (s
->dflag
== MO_64
) {
2932 gen_ldst_modrm(env
, s
, modrm
, MO_64
, OR_TMP0
, 0);
2933 tcg_gen_st_tl(cpu_T0
, cpu_env
, offsetof(CPUX86State
,fpregs
[reg
].mmx
));
2937 gen_ldst_modrm(env
, s
, modrm
, MO_32
, OR_TMP0
, 0);
2938 tcg_gen_addi_ptr(cpu_ptr0
, cpu_env
,
2939 offsetof(CPUX86State
,fpregs
[reg
].mmx
));
2940 tcg_gen_trunc_tl_i32(cpu_tmp2_i32
, cpu_T0
);
2941 gen_helper_movl_mm_T0_mmx(cpu_ptr0
, cpu_tmp2_i32
);
2944 case 0x16e: /* movd xmm, ea */
2945 #ifdef TARGET_X86_64
2946 if (s
->dflag
== MO_64
) {
2947 gen_ldst_modrm(env
, s
, modrm
, MO_64
, OR_TMP0
, 0);
2948 tcg_gen_addi_ptr(cpu_ptr0
, cpu_env
,
2949 offsetof(CPUX86State
,xmm_regs
[reg
]));
2950 gen_helper_movq_mm_T0_xmm(cpu_ptr0
, cpu_T0
);
2954 gen_ldst_modrm(env
, s
, modrm
, MO_32
, OR_TMP0
, 0);
2955 tcg_gen_addi_ptr(cpu_ptr0
, cpu_env
,
2956 offsetof(CPUX86State
,xmm_regs
[reg
]));
2957 tcg_gen_trunc_tl_i32(cpu_tmp2_i32
, cpu_T0
);
2958 gen_helper_movl_mm_T0_xmm(cpu_ptr0
, cpu_tmp2_i32
);
2961 case 0x6f: /* movq mm, ea */
2963 gen_lea_modrm(env
, s
, modrm
);
2964 gen_ldq_env_A0(s
, offsetof(CPUX86State
, fpregs
[reg
].mmx
));
2967 tcg_gen_ld_i64(cpu_tmp1_i64
, cpu_env
,
2968 offsetof(CPUX86State
,fpregs
[rm
].mmx
));
2969 tcg_gen_st_i64(cpu_tmp1_i64
, cpu_env
,
2970 offsetof(CPUX86State
,fpregs
[reg
].mmx
));
2973 case 0x010: /* movups */
2974 case 0x110: /* movupd */
2975 case 0x028: /* movaps */
2976 case 0x128: /* movapd */
2977 case 0x16f: /* movdqa xmm, ea */
2978 case 0x26f: /* movdqu xmm, ea */
2980 gen_lea_modrm(env
, s
, modrm
);
2981 gen_ldo_env_A0(s
, offsetof(CPUX86State
, xmm_regs
[reg
]));
2983 rm
= (modrm
& 7) | REX_B(s
);
2984 gen_op_movo(offsetof(CPUX86State
,xmm_regs
[reg
]),
2985 offsetof(CPUX86State
,xmm_regs
[rm
]));
2988 case 0x210: /* movss xmm, ea */
2990 gen_lea_modrm(env
, s
, modrm
);
2991 gen_op_ld_v(s
, MO_32
, cpu_T0
, cpu_A0
);
2992 tcg_gen_st32_tl(cpu_T0
, cpu_env
, offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(0)));
2993 tcg_gen_movi_tl(cpu_T0
, 0);
2994 tcg_gen_st32_tl(cpu_T0
, cpu_env
, offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(1)));
2995 tcg_gen_st32_tl(cpu_T0
, cpu_env
, offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(2)));
2996 tcg_gen_st32_tl(cpu_T0
, cpu_env
, offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(3)));
2998 rm
= (modrm
& 7) | REX_B(s
);
2999 gen_op_movl(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(0)),
3000 offsetof(CPUX86State
,xmm_regs
[rm
].ZMM_L(0)));
3003 case 0x310: /* movsd xmm, ea */
3005 gen_lea_modrm(env
, s
, modrm
);
3006 gen_ldq_env_A0(s
, offsetof(CPUX86State
,
3007 xmm_regs
[reg
].ZMM_Q(0)));
3008 tcg_gen_movi_tl(cpu_T0
, 0);
3009 tcg_gen_st32_tl(cpu_T0
, cpu_env
, offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(2)));
3010 tcg_gen_st32_tl(cpu_T0
, cpu_env
, offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(3)));
3012 rm
= (modrm
& 7) | REX_B(s
);
3013 gen_op_movq(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_Q(0)),
3014 offsetof(CPUX86State
,xmm_regs
[rm
].ZMM_Q(0)));
3017 case 0x012: /* movlps */
3018 case 0x112: /* movlpd */
3020 gen_lea_modrm(env
, s
, modrm
);
3021 gen_ldq_env_A0(s
, offsetof(CPUX86State
,
3022 xmm_regs
[reg
].ZMM_Q(0)));
3025 rm
= (modrm
& 7) | REX_B(s
);
3026 gen_op_movq(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_Q(0)),
3027 offsetof(CPUX86State
,xmm_regs
[rm
].ZMM_Q(1)));
3030 case 0x212: /* movsldup */
3032 gen_lea_modrm(env
, s
, modrm
);
3033 gen_ldo_env_A0(s
, offsetof(CPUX86State
, xmm_regs
[reg
]));
3035 rm
= (modrm
& 7) | REX_B(s
);
3036 gen_op_movl(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(0)),
3037 offsetof(CPUX86State
,xmm_regs
[rm
].ZMM_L(0)));
3038 gen_op_movl(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(2)),
3039 offsetof(CPUX86State
,xmm_regs
[rm
].ZMM_L(2)));
3041 gen_op_movl(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(1)),
3042 offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(0)));
3043 gen_op_movl(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(3)),
3044 offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(2)));
3046 case 0x312: /* movddup */
3048 gen_lea_modrm(env
, s
, modrm
);
3049 gen_ldq_env_A0(s
, offsetof(CPUX86State
,
3050 xmm_regs
[reg
].ZMM_Q(0)));
3052 rm
= (modrm
& 7) | REX_B(s
);
3053 gen_op_movq(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_Q(0)),
3054 offsetof(CPUX86State
,xmm_regs
[rm
].ZMM_Q(0)));
3056 gen_op_movq(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_Q(1)),
3057 offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_Q(0)));
3059 case 0x016: /* movhps */
3060 case 0x116: /* movhpd */
3062 gen_lea_modrm(env
, s
, modrm
);
3063 gen_ldq_env_A0(s
, offsetof(CPUX86State
,
3064 xmm_regs
[reg
].ZMM_Q(1)));
3067 rm
= (modrm
& 7) | REX_B(s
);
3068 gen_op_movq(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_Q(1)),
3069 offsetof(CPUX86State
,xmm_regs
[rm
].ZMM_Q(0)));
3072 case 0x216: /* movshdup */
3074 gen_lea_modrm(env
, s
, modrm
);
3075 gen_ldo_env_A0(s
, offsetof(CPUX86State
, xmm_regs
[reg
]));
3077 rm
= (modrm
& 7) | REX_B(s
);
3078 gen_op_movl(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(1)),
3079 offsetof(CPUX86State
,xmm_regs
[rm
].ZMM_L(1)));
3080 gen_op_movl(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(3)),
3081 offsetof(CPUX86State
,xmm_regs
[rm
].ZMM_L(3)));
3083 gen_op_movl(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(0)),
3084 offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(1)));
3085 gen_op_movl(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(2)),
3086 offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(3)));
3091 int bit_index
, field_length
;
3093 if (b1
== 1 && reg
!= 0)
3095 field_length
= cpu_ldub_code(env
, s
->pc
++) & 0x3F;
3096 bit_index
= cpu_ldub_code(env
, s
->pc
++) & 0x3F;
3097 tcg_gen_addi_ptr(cpu_ptr0
, cpu_env
,
3098 offsetof(CPUX86State
,xmm_regs
[reg
]));
3100 gen_helper_extrq_i(cpu_env
, cpu_ptr0
,
3101 tcg_const_i32(bit_index
),
3102 tcg_const_i32(field_length
));
3104 gen_helper_insertq_i(cpu_env
, cpu_ptr0
,
3105 tcg_const_i32(bit_index
),
3106 tcg_const_i32(field_length
));
3109 case 0x7e: /* movd ea, mm */
3110 #ifdef TARGET_X86_64
3111 if (s
->dflag
== MO_64
) {
3112 tcg_gen_ld_i64(cpu_T0
, cpu_env
,
3113 offsetof(CPUX86State
,fpregs
[reg
].mmx
));
3114 gen_ldst_modrm(env
, s
, modrm
, MO_64
, OR_TMP0
, 1);
3118 tcg_gen_ld32u_tl(cpu_T0
, cpu_env
,
3119 offsetof(CPUX86State
,fpregs
[reg
].mmx
.MMX_L(0)));
3120 gen_ldst_modrm(env
, s
, modrm
, MO_32
, OR_TMP0
, 1);
3123 case 0x17e: /* movd ea, xmm */
3124 #ifdef TARGET_X86_64
3125 if (s
->dflag
== MO_64
) {
3126 tcg_gen_ld_i64(cpu_T0
, cpu_env
,
3127 offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_Q(0)));
3128 gen_ldst_modrm(env
, s
, modrm
, MO_64
, OR_TMP0
, 1);
3132 tcg_gen_ld32u_tl(cpu_T0
, cpu_env
,
3133 offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_L(0)));
3134 gen_ldst_modrm(env
, s
, modrm
, MO_32
, OR_TMP0
, 1);
3137 case 0x27e: /* movq xmm, ea */
3139 gen_lea_modrm(env
, s
, modrm
);
3140 gen_ldq_env_A0(s
, offsetof(CPUX86State
,
3141 xmm_regs
[reg
].ZMM_Q(0)));
3143 rm
= (modrm
& 7) | REX_B(s
);
3144 gen_op_movq(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_Q(0)),
3145 offsetof(CPUX86State
,xmm_regs
[rm
].ZMM_Q(0)));
3147 gen_op_movq_env_0(offsetof(CPUX86State
,xmm_regs
[reg
].ZMM_Q(1)));
3149 case 0x7f: /* movq ea, mm */
3151 gen_lea_modrm(env
, s
, modrm
);
3152 gen_stq_env_A0(s
, offsetof(CPUX86State
, fpregs
[reg
].mmx
));
3155 gen_op_movq(offsetof(CPUX86State
,fpregs
[rm
].mmx
),