target-i386: Rearrange processing of 0F AE
[qemu.git] / target-i386 / translate.c
1 /*
2 * i386 translation
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "tcg-op.h"
25 #include "exec/cpu_ldst.h"
26
27 #include "exec/helper-proto.h"
28 #include "exec/helper-gen.h"
29
30 #include "trace-tcg.h"
31 #include "exec/log.h"
32
33
34 #define PREFIX_REPZ 0x01
35 #define PREFIX_REPNZ 0x02
36 #define PREFIX_LOCK 0x04
37 #define PREFIX_DATA 0x08
38 #define PREFIX_ADR 0x10
39 #define PREFIX_VEX 0x20
40
41 #ifdef TARGET_X86_64
42 #define CODE64(s) ((s)->code64)
43 #define REX_X(s) ((s)->rex_x)
44 #define REX_B(s) ((s)->rex_b)
45 #else
46 #define CODE64(s) 0
47 #define REX_X(s) 0
48 #define REX_B(s) 0
49 #endif
50
51 #ifdef TARGET_X86_64
52 # define ctztl ctz64
53 # define clztl clz64
54 #else
55 # define ctztl ctz32
56 # define clztl clz32
57 #endif
58
59 /* For a switch indexed by MODRM, match all memory operands for a given OP. */
60 #define CASE_MEM_OP(OP) \
61 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
62 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
63 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
64
65 //#define MACRO_TEST 1
66
67 /* global register indexes */
68 static TCGv_ptr cpu_env;
69 static TCGv cpu_A0;
70 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
71 static TCGv_i32 cpu_cc_op;
72 static TCGv cpu_regs[CPU_NB_REGS];
73 static TCGv cpu_seg_base[6];
74 /* local temps */
75 static TCGv cpu_T0, cpu_T1;
76 /* local register indexes (only used inside old micro ops) */
77 static TCGv cpu_tmp0, cpu_tmp4;
78 static TCGv_ptr cpu_ptr0, cpu_ptr1;
79 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
80 static TCGv_i64 cpu_tmp1_i64;
81
82 #include "exec/gen-icount.h"
83
84 #ifdef TARGET_X86_64
85 static int x86_64_hregs;
86 #endif
87
88 typedef struct DisasContext {
89 /* current insn context */
90 int override; /* -1 if no override */
91 int prefix;
92 TCGMemOp aflag;
93 TCGMemOp dflag;
94 target_ulong pc; /* pc = eip + cs_base */
95 int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
96 static state change (stop translation) */
97 /* current block context */
98 target_ulong cs_base; /* base of CS segment */
99 int pe; /* protected mode */
100 int code32; /* 32 bit code segment */
101 #ifdef TARGET_X86_64
102 int lma; /* long mode active */
103 int code64; /* 64 bit code segment */
104 int rex_x, rex_b;
105 #endif
106 int vex_l; /* vex vector length */
107 int vex_v; /* vex vvvv register, without 1's compliment. */
108 int ss32; /* 32 bit stack segment */
109 CCOp cc_op; /* current CC operation */
110 bool cc_op_dirty;
111 int addseg; /* non zero if either DS/ES/SS have a non zero base */
112 int f_st; /* currently unused */
113 int vm86; /* vm86 mode */
114 int cpl;
115 int iopl;
116 int tf; /* TF cpu flag */
117 int singlestep_enabled; /* "hardware" single step enabled */
118 int jmp_opt; /* use direct block chaining for direct jumps */
119 int repz_opt; /* optimize jumps within repz instructions */
120 int mem_index; /* select memory access functions */
121 uint64_t flags; /* all execution flags */
122 struct TranslationBlock *tb;
123 int popl_esp_hack; /* for correct popl with esp base handling */
124 int rip_offset; /* only used in x86_64, but left for simplicity */
125 int cpuid_features;
126 int cpuid_ext_features;
127 int cpuid_ext2_features;
128 int cpuid_ext3_features;
129 int cpuid_7_0_ebx_features;
130 } DisasContext;
131
132 static void gen_eob(DisasContext *s);
133 static void gen_jmp(DisasContext *s, target_ulong eip);
134 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
135 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
136
137 /* i386 arith/logic operations */
138 enum {
139 OP_ADDL,
140 OP_ORL,
141 OP_ADCL,
142 OP_SBBL,
143 OP_ANDL,
144 OP_SUBL,
145 OP_XORL,
146 OP_CMPL,
147 };
148
149 /* i386 shift ops */
150 enum {
151 OP_ROL,
152 OP_ROR,
153 OP_RCL,
154 OP_RCR,
155 OP_SHL,
156 OP_SHR,
157 OP_SHL1, /* undocumented */
158 OP_SAR = 7,
159 };
160
161 enum {
162 JCC_O,
163 JCC_B,
164 JCC_Z,
165 JCC_BE,
166 JCC_S,
167 JCC_P,
168 JCC_L,
169 JCC_LE,
170 };
171
172 enum {
173 /* I386 int registers */
174 OR_EAX, /* MUST be even numbered */
175 OR_ECX,
176 OR_EDX,
177 OR_EBX,
178 OR_ESP,
179 OR_EBP,
180 OR_ESI,
181 OR_EDI,
182
183 OR_TMP0 = 16, /* temporary operand register */
184 OR_TMP1,
185 OR_A0, /* temporary register used when doing address evaluation */
186 };
187
188 enum {
189 USES_CC_DST = 1,
190 USES_CC_SRC = 2,
191 USES_CC_SRC2 = 4,
192 USES_CC_SRCT = 8,
193 };
194
195 /* Bit set if the global variable is live after setting CC_OP to X. */
196 static const uint8_t cc_op_live[CC_OP_NB] = {
197 [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
198 [CC_OP_EFLAGS] = USES_CC_SRC,
199 [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
200 [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
201 [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
202 [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
203 [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
204 [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
205 [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
206 [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
207 [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
208 [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
209 [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
210 [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
211 [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
212 [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
213 [CC_OP_CLR] = 0,
214 };
215
216 static void set_cc_op(DisasContext *s, CCOp op)
217 {
218 int dead;
219
220 if (s->cc_op == op) {
221 return;
222 }
223
224 /* Discard CC computation that will no longer be used. */
225 dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
226 if (dead & USES_CC_DST) {
227 tcg_gen_discard_tl(cpu_cc_dst);
228 }
229 if (dead & USES_CC_SRC) {
230 tcg_gen_discard_tl(cpu_cc_src);
231 }
232 if (dead & USES_CC_SRC2) {
233 tcg_gen_discard_tl(cpu_cc_src2);
234 }
235 if (dead & USES_CC_SRCT) {
236 tcg_gen_discard_tl(cpu_cc_srcT);
237 }
238
239 if (op == CC_OP_DYNAMIC) {
240 /* The DYNAMIC setting is translator only, and should never be
241 stored. Thus we always consider it clean. */
242 s->cc_op_dirty = false;
243 } else {
244 /* Discard any computed CC_OP value (see shifts). */
245 if (s->cc_op == CC_OP_DYNAMIC) {
246 tcg_gen_discard_i32(cpu_cc_op);
247 }
248 s->cc_op_dirty = true;
249 }
250 s->cc_op = op;
251 }
252
253 static void gen_update_cc_op(DisasContext *s)
254 {
255 if (s->cc_op_dirty) {
256 tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
257 s->cc_op_dirty = false;
258 }
259 }
260
261 #ifdef TARGET_X86_64
262
263 #define NB_OP_SIZES 4
264
265 #else /* !TARGET_X86_64 */
266
267 #define NB_OP_SIZES 3
268
269 #endif /* !TARGET_X86_64 */
270
271 #if defined(HOST_WORDS_BIGENDIAN)
272 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
273 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
274 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
275 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
276 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
277 #else
278 #define REG_B_OFFSET 0
279 #define REG_H_OFFSET 1
280 #define REG_W_OFFSET 0
281 #define REG_L_OFFSET 0
282 #define REG_LH_OFFSET 4
283 #endif
284
285 /* In instruction encodings for byte register accesses the
286 * register number usually indicates "low 8 bits of register N";
287 * however there are some special cases where N 4..7 indicates
288 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
289 * true for this special case, false otherwise.
290 */
291 static inline bool byte_reg_is_xH(int reg)
292 {
293 if (reg < 4) {
294 return false;
295 }
296 #ifdef TARGET_X86_64
297 if (reg >= 8 || x86_64_hregs) {
298 return false;
299 }
300 #endif
301 return true;
302 }
303
304 /* Select the size of a push/pop operation. */
305 static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
306 {
307 if (CODE64(s)) {
308 return ot == MO_16 ? MO_16 : MO_64;
309 } else {
310 return ot;
311 }
312 }
313
314 /* Select the size of the stack pointer. */
315 static inline TCGMemOp mo_stacksize(DisasContext *s)
316 {
317 return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
318 }
319
320 /* Select only size 64 else 32. Used for SSE operand sizes. */
321 static inline TCGMemOp mo_64_32(TCGMemOp ot)
322 {
323 #ifdef TARGET_X86_64
324 return ot == MO_64 ? MO_64 : MO_32;
325 #else
326 return MO_32;
327 #endif
328 }
329
330 /* Select size 8 if lsb of B is clear, else OT. Used for decoding
331 byte vs word opcodes. */
332 static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
333 {
334 return b & 1 ? ot : MO_8;
335 }
336
337 /* Select size 8 if lsb of B is clear, else OT capped at 32.
338 Used for decoding operand size of port opcodes. */
339 static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
340 {
341 return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
342 }
343
344 static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
345 {
346 switch(ot) {
347 case MO_8:
348 if (!byte_reg_is_xH(reg)) {
349 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
350 } else {
351 tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
352 }
353 break;
354 case MO_16:
355 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
356 break;
357 case MO_32:
358 /* For x86_64, this sets the higher half of register to zero.
359 For i386, this is equivalent to a mov. */
360 tcg_gen_ext32u_tl(cpu_regs[reg], t0);
361 break;
362 #ifdef TARGET_X86_64
363 case MO_64:
364 tcg_gen_mov_tl(cpu_regs[reg], t0);
365 break;
366 #endif
367 default:
368 tcg_abort();
369 }
370 }
371
372 static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
373 {
374 if (ot == MO_8 && byte_reg_is_xH(reg)) {
375 tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
376 tcg_gen_ext8u_tl(t0, t0);
377 } else {
378 tcg_gen_mov_tl(t0, cpu_regs[reg]);
379 }
380 }
381
382 static void gen_add_A0_im(DisasContext *s, int val)
383 {
384 tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
385 if (!CODE64(s)) {
386 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
387 }
388 }
389
390 static inline void gen_op_jmp_v(TCGv dest)
391 {
392 tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
393 }
394
395 static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
396 {
397 tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
398 gen_op_mov_reg_v(size, reg, cpu_tmp0);
399 }
400
401 static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
402 {
403 tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
404 gen_op_mov_reg_v(size, reg, cpu_tmp0);
405 }
406
407 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
408 {
409 tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
410 }
411
412 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
413 {
414 tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
415 }
416
417 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
418 {
419 if (d == OR_TMP0) {
420 gen_op_st_v(s, idx, cpu_T0, cpu_A0);
421 } else {
422 gen_op_mov_reg_v(idx, d, cpu_T0);
423 }
424 }
425
426 static inline void gen_jmp_im(target_ulong pc)
427 {
428 tcg_gen_movi_tl(cpu_tmp0, pc);
429 gen_op_jmp_v(cpu_tmp0);
430 }
431
432 /* Compute SEG:REG into A0. SEG is selected from the override segment
433 (OVR_SEG) and the default segment (DEF_SEG). OVR_SEG may be -1 to
434 indicate no override. */
435 static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
436 int def_seg, int ovr_seg)
437 {
438 switch (aflag) {
439 #ifdef TARGET_X86_64
440 case MO_64:
441 if (ovr_seg < 0) {
442 tcg_gen_mov_tl(cpu_A0, a0);
443 return;
444 }
445 break;
446 #endif
447 case MO_32:
448 /* 32 bit address */
449 if (ovr_seg < 0) {
450 if (s->addseg) {
451 ovr_seg = def_seg;
452 } else {
453 tcg_gen_ext32u_tl(cpu_A0, a0);
454 return;
455 }
456 }
457 break;
458 case MO_16:
459 /* 16 bit address */
460 if (ovr_seg < 0) {
461 ovr_seg = def_seg;
462 }
463 tcg_gen_ext16u_tl(cpu_A0, a0);
464 /* ADDSEG will only be false in 16-bit mode for LEA. */
465 if (!s->addseg) {
466 return;
467 }
468 a0 = cpu_A0;
469 break;
470 default:
471 tcg_abort();
472 }
473
474 if (ovr_seg >= 0) {
475 TCGv seg = cpu_seg_base[ovr_seg];
476
477 if (aflag == MO_64) {
478 tcg_gen_add_tl(cpu_A0, a0, seg);
479 } else if (CODE64(s)) {
480 tcg_gen_ext32u_tl(cpu_A0, a0);
481 tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
482 } else {
483 tcg_gen_add_tl(cpu_A0, a0, seg);
484 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
485 }
486 }
487 }
488
489 static inline void gen_string_movl_A0_ESI(DisasContext *s)
490 {
491 gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
492 }
493
494 static inline void gen_string_movl_A0_EDI(DisasContext *s)
495 {
496 gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
497 }
498
499 static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
500 {
501 tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
502 tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
503 };
504
505 static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
506 {
507 switch (size) {
508 case MO_8:
509 if (sign) {
510 tcg_gen_ext8s_tl(dst, src);
511 } else {
512 tcg_gen_ext8u_tl(dst, src);
513 }
514 return dst;
515 case MO_16:
516 if (sign) {
517 tcg_gen_ext16s_tl(dst, src);
518 } else {
519 tcg_gen_ext16u_tl(dst, src);
520 }
521 return dst;
522 #ifdef TARGET_X86_64
523 case MO_32:
524 if (sign) {
525 tcg_gen_ext32s_tl(dst, src);
526 } else {
527 tcg_gen_ext32u_tl(dst, src);
528 }
529 return dst;
530 #endif
531 default:
532 return src;
533 }
534 }
535
536 static void gen_extu(TCGMemOp ot, TCGv reg)
537 {
538 gen_ext_tl(reg, reg, ot, false);
539 }
540
541 static void gen_exts(TCGMemOp ot, TCGv reg)
542 {
543 gen_ext_tl(reg, reg, ot, true);
544 }
545
546 static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
547 {
548 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
549 gen_extu(size, cpu_tmp0);
550 tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
551 }
552
553 static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
554 {
555 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
556 gen_extu(size, cpu_tmp0);
557 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
558 }
559
560 static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
561 {
562 switch (ot) {
563 case MO_8:
564 gen_helper_inb(v, cpu_env, n);
565 break;
566 case MO_16:
567 gen_helper_inw(v, cpu_env, n);
568 break;
569 case MO_32:
570 gen_helper_inl(v, cpu_env, n);
571 break;
572 default:
573 tcg_abort();
574 }
575 }
576
577 static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
578 {
579 switch (ot) {
580 case MO_8:
581 gen_helper_outb(cpu_env, v, n);
582 break;
583 case MO_16:
584 gen_helper_outw(cpu_env, v, n);
585 break;
586 case MO_32:
587 gen_helper_outl(cpu_env, v, n);
588 break;
589 default:
590 tcg_abort();
591 }
592 }
593
594 static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
595 uint32_t svm_flags)
596 {
597 target_ulong next_eip;
598
599 if (s->pe && (s->cpl > s->iopl || s->vm86)) {
600 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
601 switch (ot) {
602 case MO_8:
603 gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
604 break;
605 case MO_16:
606 gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
607 break;
608 case MO_32:
609 gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
610 break;
611 default:
612 tcg_abort();
613 }
614 }
615 if(s->flags & HF_SVMI_MASK) {
616 gen_update_cc_op(s);
617 gen_jmp_im(cur_eip);
618 svm_flags |= (1 << (4 + ot));
619 next_eip = s->pc - s->cs_base;
620 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
621 gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
622 tcg_const_i32(svm_flags),
623 tcg_const_i32(next_eip - cur_eip));
624 }
625 }
626
627 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
628 {
629 gen_string_movl_A0_ESI(s);
630 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
631 gen_string_movl_A0_EDI(s);
632 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
633 gen_op_movl_T0_Dshift(ot);
634 gen_op_add_reg_T0(s->aflag, R_ESI);
635 gen_op_add_reg_T0(s->aflag, R_EDI);
636 }
637
638 static void gen_op_update1_cc(void)
639 {
640 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
641 }
642
643 static void gen_op_update2_cc(void)
644 {
645 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
646 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
647 }
648
649 static void gen_op_update3_cc(TCGv reg)
650 {
651 tcg_gen_mov_tl(cpu_cc_src2, reg);
652 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
653 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
654 }
655
656 static inline void gen_op_testl_T0_T1_cc(void)
657 {
658 tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
659 }
660
661 static void gen_op_update_neg_cc(void)
662 {
663 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
664 tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
665 tcg_gen_movi_tl(cpu_cc_srcT, 0);
666 }
667
668 /* compute all eflags to cc_src */
669 static void gen_compute_eflags(DisasContext *s)
670 {
671 TCGv zero, dst, src1, src2;
672 int live, dead;
673
674 if (s->cc_op == CC_OP_EFLAGS) {
675 return;
676 }
677 if (s->cc_op == CC_OP_CLR) {
678 tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
679 set_cc_op(s, CC_OP_EFLAGS);
680 return;
681 }
682
683 TCGV_UNUSED(zero);
684 dst = cpu_cc_dst;
685 src1 = cpu_cc_src;
686 src2 = cpu_cc_src2;
687
688 /* Take care to not read values that are not live. */
689 live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
690 dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
691 if (dead) {
692 zero = tcg_const_tl(0);
693 if (dead & USES_CC_DST) {
694 dst = zero;
695 }
696 if (dead & USES_CC_SRC) {
697 src1 = zero;
698 }
699 if (dead & USES_CC_SRC2) {
700 src2 = zero;
701 }
702 }
703
704 gen_update_cc_op(s);
705 gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
706 set_cc_op(s, CC_OP_EFLAGS);
707
708 if (dead) {
709 tcg_temp_free(zero);
710 }
711 }
712
713 typedef struct CCPrepare {
714 TCGCond cond;
715 TCGv reg;
716 TCGv reg2;
717 target_ulong imm;
718 target_ulong mask;
719 bool use_reg2;
720 bool no_setcond;
721 } CCPrepare;
722
723 /* compute eflags.C to reg */
724 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
725 {
726 TCGv t0, t1;
727 int size, shift;
728
729 switch (s->cc_op) {
730 case CC_OP_SUBB ... CC_OP_SUBQ:
731 /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
732 size = s->cc_op - CC_OP_SUBB;
733 t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
734 /* If no temporary was used, be careful not to alias t1 and t0. */
735 t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
736 tcg_gen_mov_tl(t0, cpu_cc_srcT);
737 gen_extu(size, t0);
738 goto add_sub;
739
740 case CC_OP_ADDB ... CC_OP_ADDQ:
741 /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
742 size = s->cc_op - CC_OP_ADDB;
743 t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
744 t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
745 add_sub:
746 return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
747 .reg2 = t1, .mask = -1, .use_reg2 = true };
748
749 case CC_OP_LOGICB ... CC_OP_LOGICQ:
750 case CC_OP_CLR:
751 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
752
753 case CC_OP_INCB ... CC_OP_INCQ:
754 case CC_OP_DECB ... CC_OP_DECQ:
755 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
756 .mask = -1, .no_setcond = true };
757
758 case CC_OP_SHLB ... CC_OP_SHLQ:
759 /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
760 size = s->cc_op - CC_OP_SHLB;
761 shift = (8 << size) - 1;
762 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
763 .mask = (target_ulong)1 << shift };
764
765 case CC_OP_MULB ... CC_OP_MULQ:
766 return (CCPrepare) { .cond = TCG_COND_NE,
767 .reg = cpu_cc_src, .mask = -1 };
768
769 case CC_OP_BMILGB ... CC_OP_BMILGQ:
770 size = s->cc_op - CC_OP_BMILGB;
771 t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
772 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
773
774 case CC_OP_ADCX:
775 case CC_OP_ADCOX:
776 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
777 .mask = -1, .no_setcond = true };
778
779 case CC_OP_EFLAGS:
780 case CC_OP_SARB ... CC_OP_SARQ:
781 /* CC_SRC & 1 */
782 return (CCPrepare) { .cond = TCG_COND_NE,
783 .reg = cpu_cc_src, .mask = CC_C };
784
785 default:
786 /* The need to compute only C from CC_OP_DYNAMIC is important
787 in efficiently implementing e.g. INC at the start of a TB. */
788 gen_update_cc_op(s);
789 gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
790 cpu_cc_src2, cpu_cc_op);
791 return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
792 .mask = -1, .no_setcond = true };
793 }
794 }
795
796 /* compute eflags.P to reg */
797 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
798 {
799 gen_compute_eflags(s);
800 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
801 .mask = CC_P };
802 }
803
804 /* compute eflags.S to reg */
805 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
806 {
807 switch (s->cc_op) {
808 case CC_OP_DYNAMIC:
809 gen_compute_eflags(s);
810 /* FALLTHRU */
811 case CC_OP_EFLAGS:
812 case CC_OP_ADCX:
813 case CC_OP_ADOX:
814 case CC_OP_ADCOX:
815 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
816 .mask = CC_S };
817 case CC_OP_CLR:
818 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
819 default:
820 {
821 TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
822 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
823 return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
824 }
825 }
826 }
827
828 /* compute eflags.O to reg */
829 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
830 {
831 switch (s->cc_op) {
832 case CC_OP_ADOX:
833 case CC_OP_ADCOX:
834 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
835 .mask = -1, .no_setcond = true };
836 case CC_OP_CLR:
837 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
838 default:
839 gen_compute_eflags(s);
840 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
841 .mask = CC_O };
842 }
843 }
844
845 /* compute eflags.Z to reg */
846 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
847 {
848 switch (s->cc_op) {
849 case CC_OP_DYNAMIC:
850 gen_compute_eflags(s);
851 /* FALLTHRU */
852 case CC_OP_EFLAGS:
853 case CC_OP_ADCX:
854 case CC_OP_ADOX:
855 case CC_OP_ADCOX:
856 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
857 .mask = CC_Z };
858 case CC_OP_CLR:
859 return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
860 default:
861 {
862 TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
863 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
864 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
865 }
866 }
867 }
868
869 /* perform a conditional store into register 'reg' according to jump opcode
870 value 'b'. In the fast case, T0 is guaranted not to be used. */
871 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
872 {
873 int inv, jcc_op, cond;
874 TCGMemOp size;
875 CCPrepare cc;
876 TCGv t0;
877
878 inv = b & 1;
879 jcc_op = (b >> 1) & 7;
880
881 switch (s->cc_op) {
882 case CC_OP_SUBB ... CC_OP_SUBQ:
883 /* We optimize relational operators for the cmp/jcc case. */
884 size = s->cc_op - CC_OP_SUBB;
885 switch (jcc_op) {
886 case JCC_BE:
887 tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
888 gen_extu(size, cpu_tmp4);
889 t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
890 cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
891 .reg2 = t0, .mask = -1, .use_reg2 = true };
892 break;
893
894 case JCC_L:
895 cond = TCG_COND_LT;
896 goto fast_jcc_l;
897 case JCC_LE:
898 cond = TCG_COND_LE;
899 fast_jcc_l:
900 tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
901 gen_exts(size, cpu_tmp4);
902 t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
903 cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
904 .reg2 = t0, .mask = -1, .use_reg2 = true };
905 break;
906
907 default:
908 goto slow_jcc;
909 }
910 break;
911
912 default:
913 slow_jcc:
914 /* This actually generates good code for JC, JZ and JS. */
915 switch (jcc_op) {
916 case JCC_O:
917 cc = gen_prepare_eflags_o(s, reg);
918 break;
919 case JCC_B:
920 cc = gen_prepare_eflags_c(s, reg);
921 break;
922 case JCC_Z:
923 cc = gen_prepare_eflags_z(s, reg);
924 break;
925 case JCC_BE:
926 gen_compute_eflags(s);
927 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
928 .mask = CC_Z | CC_C };
929 break;
930 case JCC_S:
931 cc = gen_prepare_eflags_s(s, reg);
932 break;
933 case JCC_P:
934 cc = gen_prepare_eflags_p(s, reg);
935 break;
936 case JCC_L:
937 gen_compute_eflags(s);
938 if (TCGV_EQUAL(reg, cpu_cc_src)) {
939 reg = cpu_tmp0;
940 }
941 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
942 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
943 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
944 .mask = CC_S };
945 break;
946 default:
947 case JCC_LE:
948 gen_compute_eflags(s);
949 if (TCGV_EQUAL(reg, cpu_cc_src)) {
950 reg = cpu_tmp0;
951 }
952 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
953 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
954 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
955 .mask = CC_S | CC_Z };
956 break;
957 }
958 break;
959 }
960
961 if (inv) {
962 cc.cond = tcg_invert_cond(cc.cond);
963 }
964 return cc;
965 }
966
967 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
968 {
969 CCPrepare cc = gen_prepare_cc(s, b, reg);
970
971 if (cc.no_setcond) {
972 if (cc.cond == TCG_COND_EQ) {
973 tcg_gen_xori_tl(reg, cc.reg, 1);
974 } else {
975 tcg_gen_mov_tl(reg, cc.reg);
976 }
977 return;
978 }
979
980 if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
981 cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
982 tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
983 tcg_gen_andi_tl(reg, reg, 1);
984 return;
985 }
986 if (cc.mask != -1) {
987 tcg_gen_andi_tl(reg, cc.reg, cc.mask);
988 cc.reg = reg;
989 }
990 if (cc.use_reg2) {
991 tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
992 } else {
993 tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
994 }
995 }
996
997 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
998 {
999 gen_setcc1(s, JCC_B << 1, reg);
1000 }
1001
1002 /* generate a conditional jump to label 'l1' according to jump opcode
1003 value 'b'. In the fast case, T0 is guaranted not to be used. */
1004 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1005 {
1006 CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1007
1008 if (cc.mask != -1) {
1009 tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1010 cc.reg = cpu_T0;
1011 }
1012 if (cc.use_reg2) {
1013 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1014 } else {
1015 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1016 }
1017 }
1018
1019 /* Generate a conditional jump to label 'l1' according to jump opcode
1020 value 'b'. In the fast case, T0 is guaranted not to be used.
1021 A translation block must end soon. */
1022 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1023 {
1024 CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1025
1026 gen_update_cc_op(s);
1027 if (cc.mask != -1) {
1028 tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1029 cc.reg = cpu_T0;
1030 }
1031 set_cc_op(s, CC_OP_DYNAMIC);
1032 if (cc.use_reg2) {
1033 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1034 } else {
1035 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1036 }
1037 }
1038
1039 /* XXX: does not work with gdbstub "ice" single step - not a
1040 serious problem */
1041 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1042 {
1043 TCGLabel *l1 = gen_new_label();
1044 TCGLabel *l2 = gen_new_label();
1045 gen_op_jnz_ecx(s->aflag, l1);
1046 gen_set_label(l2);
1047 gen_jmp_tb(s, next_eip, 1);
1048 gen_set_label(l1);
1049 return l2;
1050 }
1051
1052 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1053 {
1054 gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
1055 gen_string_movl_A0_EDI(s);
1056 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1057 gen_op_movl_T0_Dshift(ot);
1058 gen_op_add_reg_T0(s->aflag, R_EDI);
1059 }
1060
1061 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1062 {
1063 gen_string_movl_A0_ESI(s);
1064 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1065 gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
1066 gen_op_movl_T0_Dshift(ot);
1067 gen_op_add_reg_T0(s->aflag, R_ESI);
1068 }
1069
1070 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1071 {
1072 gen_string_movl_A0_EDI(s);
1073 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1074 gen_op(s, OP_CMPL, ot, R_EAX);
1075 gen_op_movl_T0_Dshift(ot);
1076 gen_op_add_reg_T0(s->aflag, R_EDI);
1077 }
1078
1079 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1080 {
1081 gen_string_movl_A0_EDI(s);
1082 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1083 gen_string_movl_A0_ESI(s);
1084 gen_op(s, OP_CMPL, ot, OR_TMP0);
1085 gen_op_movl_T0_Dshift(ot);
1086 gen_op_add_reg_T0(s->aflag, R_ESI);
1087 gen_op_add_reg_T0(s->aflag, R_EDI);
1088 }
1089
1090 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1091 {
1092 if (s->flags & HF_IOBPT_MASK) {
1093 TCGv_i32 t_size = tcg_const_i32(1 << ot);
1094 TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1095
1096 gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1097 tcg_temp_free_i32(t_size);
1098 tcg_temp_free(t_next);
1099 }
1100 }
1101
1102
1103 static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1104 {
1105 if (s->tb->cflags & CF_USE_ICOUNT) {
1106 gen_io_start();
1107 }
1108 gen_string_movl_A0_EDI(s);
1109 /* Note: we must do this dummy write first to be restartable in
1110 case of page fault. */
1111 tcg_gen_movi_tl(cpu_T0, 0);
1112 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1113 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1114 tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1115 gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
1116 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1117 gen_op_movl_T0_Dshift(ot);
1118 gen_op_add_reg_T0(s->aflag, R_EDI);
1119 gen_bpt_io(s, cpu_tmp2_i32, ot);
1120 if (s->tb->cflags & CF_USE_ICOUNT) {
1121 gen_io_end();
1122 }
1123 }
1124
1125 static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1126 {
1127 if (s->tb->cflags & CF_USE_ICOUNT) {
1128 gen_io_start();
1129 }
1130 gen_string_movl_A0_ESI(s);
1131 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1132
1133 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1134 tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1135 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
1136 gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1137 gen_op_movl_T0_Dshift(ot);
1138 gen_op_add_reg_T0(s->aflag, R_ESI);
1139 gen_bpt_io(s, cpu_tmp2_i32, ot);
1140 if (s->tb->cflags & CF_USE_ICOUNT) {
1141 gen_io_end();
1142 }
1143 }
1144
1145 /* same method as Valgrind : we generate jumps to current or next
1146 instruction */
1147 #define GEN_REPZ(op) \
1148 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1149 target_ulong cur_eip, target_ulong next_eip) \
1150 { \
1151 TCGLabel *l2; \
1152 gen_update_cc_op(s); \
1153 l2 = gen_jz_ecx_string(s, next_eip); \
1154 gen_ ## op(s, ot); \
1155 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1156 /* a loop would cause two single step exceptions if ECX = 1 \
1157 before rep string_insn */ \
1158 if (s->repz_opt) \
1159 gen_op_jz_ecx(s->aflag, l2); \
1160 gen_jmp(s, cur_eip); \
1161 }
1162
1163 #define GEN_REPZ2(op) \
1164 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1165 target_ulong cur_eip, \
1166 target_ulong next_eip, \
1167 int nz) \
1168 { \
1169 TCGLabel *l2; \
1170 gen_update_cc_op(s); \
1171 l2 = gen_jz_ecx_string(s, next_eip); \
1172 gen_ ## op(s, ot); \
1173 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1174 gen_update_cc_op(s); \
1175 gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \
1176 if (s->repz_opt) \
1177 gen_op_jz_ecx(s->aflag, l2); \
1178 gen_jmp(s, cur_eip); \
1179 }
1180
1181 GEN_REPZ(movs)
1182 GEN_REPZ(stos)
1183 GEN_REPZ(lods)
1184 GEN_REPZ(ins)
1185 GEN_REPZ(outs)
1186 GEN_REPZ2(scas)
1187 GEN_REPZ2(cmps)
1188
1189 static void gen_helper_fp_arith_ST0_FT0(int op)
1190 {
1191 switch (op) {
1192 case 0:
1193 gen_helper_fadd_ST0_FT0(cpu_env);
1194 break;
1195 case 1:
1196 gen_helper_fmul_ST0_FT0(cpu_env);
1197 break;
1198 case 2:
1199 gen_helper_fcom_ST0_FT0(cpu_env);
1200 break;
1201 case 3:
1202 gen_helper_fcom_ST0_FT0(cpu_env);
1203 break;
1204 case 4:
1205 gen_helper_fsub_ST0_FT0(cpu_env);
1206 break;
1207 case 5:
1208 gen_helper_fsubr_ST0_FT0(cpu_env);
1209 break;
1210 case 6:
1211 gen_helper_fdiv_ST0_FT0(cpu_env);
1212 break;
1213 case 7:
1214 gen_helper_fdivr_ST0_FT0(cpu_env);
1215 break;
1216 }
1217 }
1218
1219 /* NOTE the exception in "r" op ordering */
1220 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1221 {
1222 TCGv_i32 tmp = tcg_const_i32(opreg);
1223 switch (op) {
1224 case 0:
1225 gen_helper_fadd_STN_ST0(cpu_env, tmp);
1226 break;
1227 case 1:
1228 gen_helper_fmul_STN_ST0(cpu_env, tmp);
1229 break;
1230 case 4:
1231 gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1232 break;
1233 case 5:
1234 gen_helper_fsub_STN_ST0(cpu_env, tmp);
1235 break;
1236 case 6:
1237 gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1238 break;
1239 case 7:
1240 gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1241 break;
1242 }
1243 }
1244
1245 /* if d == OR_TMP0, it means memory operand (address in A0) */
1246 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1247 {
1248 if (d != OR_TMP0) {
1249 gen_op_mov_v_reg(ot, cpu_T0, d);
1250 } else {
1251 gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1252 }
1253 switch(op) {
1254 case OP_ADCL:
1255 gen_compute_eflags_c(s1, cpu_tmp4);
1256 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1257 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
1258 gen_op_st_rm_T0_A0(s1, ot, d);
1259 gen_op_update3_cc(cpu_tmp4);
1260 set_cc_op(s1, CC_OP_ADCB + ot);
1261 break;
1262 case OP_SBBL:
1263 gen_compute_eflags_c(s1, cpu_tmp4);
1264 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1265 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
1266 gen_op_st_rm_T0_A0(s1, ot, d);
1267 gen_op_update3_cc(cpu_tmp4);
1268 set_cc_op(s1, CC_OP_SBBB + ot);
1269 break;
1270 case OP_ADDL:
1271 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1272 gen_op_st_rm_T0_A0(s1, ot, d);
1273 gen_op_update2_cc();
1274 set_cc_op(s1, CC_OP_ADDB + ot);
1275 break;
1276 case OP_SUBL:
1277 tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1278 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1279 gen_op_st_rm_T0_A0(s1, ot, d);
1280 gen_op_update2_cc();
1281 set_cc_op(s1, CC_OP_SUBB + ot);
1282 break;
1283 default:
1284 case OP_ANDL:
1285 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
1286 gen_op_st_rm_T0_A0(s1, ot, d);
1287 gen_op_update1_cc();
1288 set_cc_op(s1, CC_OP_LOGICB + ot);
1289 break;
1290 case OP_ORL:
1291 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1292 gen_op_st_rm_T0_A0(s1, ot, d);
1293 gen_op_update1_cc();
1294 set_cc_op(s1, CC_OP_LOGICB + ot);
1295 break;
1296 case OP_XORL:
1297 tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
1298 gen_op_st_rm_T0_A0(s1, ot, d);
1299 gen_op_update1_cc();
1300 set_cc_op(s1, CC_OP_LOGICB + ot);
1301 break;
1302 case OP_CMPL:
1303 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
1304 tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1305 tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
1306 set_cc_op(s1, CC_OP_SUBB + ot);
1307 break;
1308 }
1309 }
1310
1311 /* if d == OR_TMP0, it means memory operand (address in A0) */
1312 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1313 {
1314 if (d != OR_TMP0) {
1315 gen_op_mov_v_reg(ot, cpu_T0, d);
1316 } else {
1317 gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1318 }
1319 gen_compute_eflags_c(s1, cpu_cc_src);
1320 if (c > 0) {
1321 tcg_gen_addi_tl(cpu_T0, cpu_T0, 1);
1322 set_cc_op(s1, CC_OP_INCB + ot);
1323 } else {
1324 tcg_gen_addi_tl(cpu_T0, cpu_T0, -1);
1325 set_cc_op(s1, CC_OP_DECB + ot);
1326 }
1327 gen_op_st_rm_T0_A0(s1, ot, d);
1328 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1329 }
1330
1331 static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1332 TCGv shm1, TCGv count, bool is_right)
1333 {
1334 TCGv_i32 z32, s32, oldop;
1335 TCGv z_tl;
1336
1337 /* Store the results into the CC variables. If we know that the
1338 variable must be dead, store unconditionally. Otherwise we'll
1339 need to not disrupt the current contents. */
1340 z_tl = tcg_const_tl(0);
1341 if (cc_op_live[s->cc_op] & USES_CC_DST) {
1342 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1343 result, cpu_cc_dst);
1344 } else {
1345 tcg_gen_mov_tl(cpu_cc_dst, result);
1346 }
1347 if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1348 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1349 shm1, cpu_cc_src);
1350 } else {
1351 tcg_gen_mov_tl(cpu_cc_src, shm1);
1352 }
1353 tcg_temp_free(z_tl);
1354
1355 /* Get the two potential CC_OP values into temporaries. */
1356 tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1357 if (s->cc_op == CC_OP_DYNAMIC) {
1358 oldop = cpu_cc_op;
1359 } else {
1360 tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1361 oldop = cpu_tmp3_i32;
1362 }
1363
1364 /* Conditionally store the CC_OP value. */
1365 z32 = tcg_const_i32(0);
1366 s32 = tcg_temp_new_i32();
1367 tcg_gen_trunc_tl_i32(s32, count);
1368 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1369 tcg_temp_free_i32(z32);
1370 tcg_temp_free_i32(s32);
1371
1372 /* The CC_OP value is no longer predictable. */
1373 set_cc_op(s, CC_OP_DYNAMIC);
1374 }
1375
1376 static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1377 int is_right, int is_arith)
1378 {
1379 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1380
1381 /* load */
1382 if (op1 == OR_TMP0) {
1383 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1384 } else {
1385 gen_op_mov_v_reg(ot, cpu_T0, op1);
1386 }
1387
1388 tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1389 tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
1390
1391 if (is_right) {
1392 if (is_arith) {
1393 gen_exts(ot, cpu_T0);
1394 tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1395 tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
1396 } else {
1397 gen_extu(ot, cpu_T0);
1398 tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1399 tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
1400 }
1401 } else {
1402 tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1403 tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
1404 }
1405
1406 /* store */
1407 gen_op_st_rm_T0_A0(s, ot, op1);
1408
1409 gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
1410 }
1411
1412 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1413 int is_right, int is_arith)
1414 {
1415 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1416
1417 /* load */
1418 if (op1 == OR_TMP0)
1419 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1420 else
1421 gen_op_mov_v_reg(ot, cpu_T0, op1);
1422
1423 op2 &= mask;
1424 if (op2 != 0) {
1425 if (is_right) {
1426 if (is_arith) {
1427 gen_exts(ot, cpu_T0);
1428 tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
1429 tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
1430 } else {
1431 gen_extu(ot, cpu_T0);
1432 tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
1433 tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
1434 }
1435 } else {
1436 tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
1437 tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
1438 }
1439 }
1440
1441 /* store */
1442 gen_op_st_rm_T0_A0(s, ot, op1);
1443
1444 /* update eflags if non zero shift */
1445 if (op2 != 0) {
1446 tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1447 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1448 set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1449 }
1450 }
1451
1452 static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1453 {
1454 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1455 TCGv_i32 t0, t1;
1456
1457 /* load */
1458 if (op1 == OR_TMP0) {
1459 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1460 } else {
1461 gen_op_mov_v_reg(ot, cpu_T0, op1);
1462 }
1463
1464 tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1465
1466 switch (ot) {
1467 case MO_8:
1468 /* Replicate the 8-bit input so that a 32-bit rotate works. */
1469 tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
1470 tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
1471 goto do_long;
1472 case MO_16:
1473 /* Replicate the 16-bit input so that a 32-bit rotate works. */
1474 tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
1475 goto do_long;
1476 do_long:
1477 #ifdef TARGET_X86_64
1478 case MO_32:
1479 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1480 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
1481 if (is_right) {
1482 tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1483 } else {
1484 tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1485 }
1486 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1487 break;
1488 #endif
1489 default:
1490 if (is_right) {
1491 tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
1492 } else {
1493 tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
1494 }
1495 break;
1496 }
1497
1498 /* store */
1499 gen_op_st_rm_T0_A0(s, ot, op1);
1500
1501 /* We'll need the flags computed into CC_SRC. */
1502 gen_compute_eflags(s);
1503
1504 /* The value that was "rotated out" is now present at the other end
1505 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1506 since we've computed the flags into CC_SRC, these variables are
1507 currently dead. */
1508 if (is_right) {
1509 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1510 tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1511 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1512 } else {
1513 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1514 tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1515 }
1516 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1517 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1518
1519 /* Now conditionally store the new CC_OP value. If the shift count
1520 is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1521 Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1522 exactly as we computed above. */
1523 t0 = tcg_const_i32(0);
1524 t1 = tcg_temp_new_i32();
1525 tcg_gen_trunc_tl_i32(t1, cpu_T1);
1526 tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
1527 tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1528 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1529 cpu_tmp2_i32, cpu_tmp3_i32);
1530 tcg_temp_free_i32(t0);
1531 tcg_temp_free_i32(t1);
1532
1533 /* The CC_OP value is no longer predictable. */
1534 set_cc_op(s, CC_OP_DYNAMIC);
1535 }
1536
1537 static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1538 int is_right)
1539 {
1540 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1541 int shift;
1542
1543 /* load */
1544 if (op1 == OR_TMP0) {
1545 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1546 } else {
1547 gen_op_mov_v_reg(ot, cpu_T0, op1);
1548 }
1549
1550 op2 &= mask;
1551 if (op2 != 0) {
1552 switch (ot) {
1553 #ifdef TARGET_X86_64
1554 case MO_32:
1555 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1556 if (is_right) {
1557 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1558 } else {
1559 tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1560 }
1561 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1562 break;
1563 #endif
1564 default:
1565 if (is_right) {
1566 tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
1567 } else {
1568 tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
1569 }
1570 break;
1571 case MO_8:
1572 mask = 7;
1573 goto do_shifts;
1574 case MO_16:
1575 mask = 15;
1576 do_shifts:
1577 shift = op2 & mask;
1578 if (is_right) {
1579 shift = mask + 1 - shift;
1580 }
1581 gen_extu(ot, cpu_T0);
1582 tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
1583 tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
1584 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
1585 break;
1586 }
1587 }
1588
1589 /* store */
1590 gen_op_st_rm_T0_A0(s, ot, op1);
1591
1592 if (op2 != 0) {
1593 /* Compute the flags into CC_SRC. */
1594 gen_compute_eflags(s);
1595
1596 /* The value that was "rotated out" is now present at the other end
1597 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1598 since we've computed the flags into CC_SRC, these variables are
1599 currently dead. */
1600 if (is_right) {
1601 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1602 tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1603 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1604 } else {
1605 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1606 tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1607 }
1608 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1609 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1610 set_cc_op(s, CC_OP_ADCOX);
1611 }
1612 }
1613
1614 /* XXX: add faster immediate = 1 case */
1615 static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1616 int is_right)
1617 {
1618 gen_compute_eflags(s);
1619 assert(s->cc_op == CC_OP_EFLAGS);
1620
1621 /* load */
1622 if (op1 == OR_TMP0)
1623 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1624 else
1625 gen_op_mov_v_reg(ot, cpu_T0, op1);
1626
1627 if (is_right) {
1628 switch (ot) {
1629 case MO_8:
1630 gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1631 break;
1632 case MO_16:
1633 gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1634 break;
1635 case MO_32:
1636 gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1637 break;
1638 #ifdef TARGET_X86_64
1639 case MO_64:
1640 gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1641 break;
1642 #endif
1643 default:
1644 tcg_abort();
1645 }
1646 } else {
1647 switch (ot) {
1648 case MO_8:
1649 gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1650 break;
1651 case MO_16:
1652 gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1653 break;
1654 case MO_32:
1655 gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1656 break;
1657 #ifdef TARGET_X86_64
1658 case MO_64:
1659 gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1660 break;
1661 #endif
1662 default:
1663 tcg_abort();
1664 }
1665 }
1666 /* store */
1667 gen_op_st_rm_T0_A0(s, ot, op1);
1668 }
1669
1670 /* XXX: add faster immediate case */
1671 static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1672 bool is_right, TCGv count_in)
1673 {
1674 target_ulong mask = (ot == MO_64 ? 63 : 31);
1675 TCGv count;
1676
1677 /* load */
1678 if (op1 == OR_TMP0) {
1679 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1680 } else {
1681 gen_op_mov_v_reg(ot, cpu_T0, op1);
1682 }
1683
1684 count = tcg_temp_new();
1685 tcg_gen_andi_tl(count, count_in, mask);
1686
1687 switch (ot) {
1688 case MO_16:
1689 /* Note: we implement the Intel behaviour for shift count > 16.
1690 This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
1691 portion by constructing it as a 32-bit value. */
1692 if (is_right) {
1693 tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
1694 tcg_gen_mov_tl(cpu_T1, cpu_T0);
1695 tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
1696 } else {
1697 tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
1698 }
1699 /* FALLTHRU */
1700 #ifdef TARGET_X86_64
1701 case MO_32:
1702 /* Concatenate the two 32-bit values and use a 64-bit shift. */
1703 tcg_gen_subi_tl(cpu_tmp0, count, 1);
1704 if (is_right) {
1705 tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
1706 tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1707 tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
1708 } else {
1709 tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
1710 tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1711 tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
1712 tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1713 tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
1714 }
1715 break;
1716 #endif
1717 default:
1718 tcg_gen_subi_tl(cpu_tmp0, count, 1);
1719 if (is_right) {
1720 tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1721
1722 tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1723 tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
1724 tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
1725 } else {
1726 tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1727 if (ot == MO_16) {
1728 /* Only needed if count > 16, for Intel behaviour. */
1729 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1730 tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
1731 tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1732 }
1733
1734 tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1735 tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
1736 tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
1737 }
1738 tcg_gen_movi_tl(cpu_tmp4, 0);
1739 tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
1740 cpu_tmp4, cpu_T1);
1741 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1742 break;
1743 }
1744
1745 /* store */
1746 gen_op_st_rm_T0_A0(s, ot, op1);
1747
1748 gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
1749 tcg_temp_free(count);
1750 }
1751
1752 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1753 {
1754 if (s != OR_TMP1)
1755 gen_op_mov_v_reg(ot, cpu_T1, s);
1756 switch(op) {
1757 case OP_ROL:
1758 gen_rot_rm_T1(s1, ot, d, 0);
1759 break;
1760 case OP_ROR:
1761 gen_rot_rm_T1(s1, ot, d, 1);
1762 break;
1763 case OP_SHL:
1764 case OP_SHL1:
1765 gen_shift_rm_T1(s1, ot, d, 0, 0);
1766 break;
1767 case OP_SHR:
1768 gen_shift_rm_T1(s1, ot, d, 1, 0);
1769 break;
1770 case OP_SAR:
1771 gen_shift_rm_T1(s1, ot, d, 1, 1);
1772 break;
1773 case OP_RCL:
1774 gen_rotc_rm_T1(s1, ot, d, 0);
1775 break;
1776 case OP_RCR:
1777 gen_rotc_rm_T1(s1, ot, d, 1);
1778 break;
1779 }
1780 }
1781
1782 static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1783 {
1784 switch(op) {
1785 case OP_ROL:
1786 gen_rot_rm_im(s1, ot, d, c, 0);
1787 break;
1788 case OP_ROR:
1789 gen_rot_rm_im(s1, ot, d, c, 1);
1790 break;
1791 case OP_SHL:
1792 case OP_SHL1:
1793 gen_shift_rm_im(s1, ot, d, c, 0, 0);
1794 break;
1795 case OP_SHR:
1796 gen_shift_rm_im(s1, ot, d, c, 1, 0);
1797 break;
1798 case OP_SAR:
1799 gen_shift_rm_im(s1, ot, d, c, 1, 1);
1800 break;
1801 default:
1802 /* currently not optimized */
1803 tcg_gen_movi_tl(cpu_T1, c);
1804 gen_shift(s1, op, ot, d, OR_TMP1);
1805 break;
1806 }
1807 }
1808
1809 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
1810 {
1811 target_long disp;
1812 int havesib, base, index, scale;
1813 int mod, rm, code, def_seg, ovr_seg;
1814 TCGv sum;
1815
1816 def_seg = R_DS;
1817 ovr_seg = s->override;
1818 mod = (modrm >> 6) & 3;
1819 rm = modrm & 7;
1820
1821 switch (s->aflag) {
1822 case MO_64:
1823 case MO_32:
1824 havesib = 0;
1825 base = rm;
1826 index = -1;
1827 scale = 0;
1828
1829 if (base == 4) {
1830 havesib = 1;
1831 code = cpu_ldub_code(env, s->pc++);
1832 scale = (code >> 6) & 3;
1833 index = ((code >> 3) & 7) | REX_X(s);
1834 if (index == 4) {
1835 index = -1; /* no index */
1836 }
1837 base = (code & 7);
1838 }
1839 base |= REX_B(s);
1840
1841 switch (mod) {
1842 case 0:
1843 if ((base & 7) == 5) {
1844 base = -1;
1845 disp = (int32_t)cpu_ldl_code(env, s->pc);
1846 s->pc += 4;
1847 if (CODE64(s) && !havesib) {
1848 disp += s->pc + s->rip_offset;
1849 }
1850 } else {
1851 disp = 0;
1852 }
1853 break;
1854 case 1:
1855 disp = (int8_t)cpu_ldub_code(env, s->pc++);
1856 break;
1857 default:
1858 case 2:
1859 disp = (int32_t)cpu_ldl_code(env, s->pc);
1860 s->pc += 4;
1861 break;
1862 }
1863
1864 /* For correct popl handling with esp. */
1865 if (base == R_ESP && s->popl_esp_hack) {
1866 disp += s->popl_esp_hack;
1867 }
1868
1869 /* Compute the address, with a minimum number of TCG ops. */
1870 TCGV_UNUSED(sum);
1871 if (index >= 0) {
1872 if (scale == 0) {
1873 sum = cpu_regs[index];
1874 } else {
1875 tcg_gen_shli_tl(cpu_A0, cpu_regs[index], scale);
1876 sum = cpu_A0;
1877 }
1878 if (base >= 0) {
1879 tcg_gen_add_tl(cpu_A0, sum, cpu_regs[base]);
1880 sum = cpu_A0;
1881 }
1882 } else if (base >= 0) {
1883 sum = cpu_regs[base];
1884 }
1885 if (TCGV_IS_UNUSED(sum)) {
1886 tcg_gen_movi_tl(cpu_A0, disp);
1887 sum = cpu_A0;
1888 } else if (disp != 0) {
1889 tcg_gen_addi_tl(cpu_A0, sum, disp);
1890 sum = cpu_A0;
1891 }
1892
1893 if (base == R_EBP || base == R_ESP) {
1894 def_seg = R_SS;
1895 }
1896 break;
1897
1898 case MO_16:
1899 sum = cpu_A0;
1900 if (mod == 0) {
1901 if (rm == 6) {
1902 disp = cpu_lduw_code(env, s->pc);
1903 s->pc += 2;
1904 tcg_gen_movi_tl(cpu_A0, disp);
1905 break;
1906 }
1907 disp = 0;
1908 } else if (mod == 1) {
1909 disp = (int8_t)cpu_ldub_code(env, s->pc++);
1910 } else {
1911 disp = (int16_t)cpu_lduw_code(env, s->pc);
1912 s->pc += 2;
1913 }
1914
1915 switch (rm) {
1916 case 0:
1917 tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_ESI]);
1918 break;
1919 case 1:
1920 tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_EDI]);
1921 break;
1922 case 2:
1923 tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_ESI]);
1924 def_seg = R_SS;
1925 break;
1926 case 3:
1927 tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_EDI]);
1928 def_seg = R_SS;
1929 break;
1930 case 4:
1931 sum = cpu_regs[R_ESI];
1932 break;
1933 case 5:
1934 sum = cpu_regs[R_EDI];
1935 break;
1936 case 6:
1937 sum = cpu_regs[R_EBP];
1938 def_seg = R_SS;
1939 break;
1940 default:
1941 case 7:
1942 sum = cpu_regs[R_EBX];
1943 break;
1944 }
1945 if (disp != 0) {
1946 tcg_gen_addi_tl(cpu_A0, sum, disp);
1947 sum = cpu_A0;
1948 }
1949 break;
1950
1951 default:
1952 tcg_abort();
1953 }
1954
1955 gen_lea_v_seg(s, s->aflag, sum, def_seg, ovr_seg);
1956 }
1957
1958 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
1959 {
1960 int mod, rm, base, code;
1961
1962 mod = (modrm >> 6) & 3;
1963 if (mod == 3)
1964 return;
1965 rm = modrm & 7;
1966
1967 switch (s->aflag) {
1968 case MO_64:
1969 case MO_32:
1970 base = rm;
1971
1972 if (base == 4) {
1973 code = cpu_ldub_code(env, s->pc++);
1974 base = (code & 7);
1975 }
1976
1977 switch (mod) {
1978 case 0:
1979 if (base == 5) {
1980 s->pc += 4;
1981 }
1982 break;
1983 case 1:
1984 s->pc++;
1985 break;
1986 default:
1987 case 2:
1988 s->pc += 4;
1989 break;
1990 }
1991 break;
1992
1993 case MO_16:
1994 switch (mod) {
1995 case 0:
1996 if (rm == 6) {
1997 s->pc += 2;
1998 }
1999 break;
2000 case 1:
2001 s->pc++;
2002 break;
2003 default:
2004 case 2:
2005 s->pc += 2;
2006 break;
2007 }
2008 break;
2009
2010 default:
2011 tcg_abort();
2012 }
2013 }
2014
2015 /* used for LEA and MOV AX, mem */
2016 static void gen_add_A0_ds_seg(DisasContext *s)
2017 {
2018 gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
2019 }
2020
2021 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2022 OR_TMP0 */
2023 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2024 TCGMemOp ot, int reg, int is_store)
2025 {
2026 int mod, rm;
2027
2028 mod = (modrm >> 6) & 3;
2029 rm = (modrm & 7) | REX_B(s);
2030 if (mod == 3) {
2031 if (is_store) {
2032 if (reg != OR_TMP0)
2033 gen_op_mov_v_reg(ot, cpu_T0, reg);
2034 gen_op_mov_reg_v(ot, rm, cpu_T0);
2035 } else {
2036 gen_op_mov_v_reg(ot, cpu_T0, rm);
2037 if (reg != OR_TMP0)
2038 gen_op_mov_reg_v(ot, reg, cpu_T0);
2039 }
2040 } else {
2041 gen_lea_modrm(env, s, modrm);
2042 if (is_store) {
2043 if (reg != OR_TMP0)
2044 gen_op_mov_v_reg(ot, cpu_T0, reg);
2045 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
2046 } else {
2047 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
2048 if (reg != OR_TMP0)
2049 gen_op_mov_reg_v(ot, reg, cpu_T0);
2050 }
2051 }
2052 }
2053
2054 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2055 {
2056 uint32_t ret;
2057
2058 switch (ot) {
2059 case MO_8:
2060 ret = cpu_ldub_code(env, s->pc);
2061 s->pc++;
2062 break;
2063 case MO_16:
2064 ret = cpu_lduw_code(env, s->pc);
2065 s->pc += 2;
2066 break;
2067 case MO_32:
2068 #ifdef TARGET_X86_64
2069 case MO_64:
2070 #endif
2071 ret = cpu_ldl_code(env, s->pc);
2072 s->pc += 4;
2073 break;
2074 default:
2075 tcg_abort();
2076 }
2077 return ret;
2078 }
2079
2080 static inline int insn_const_size(TCGMemOp ot)
2081 {
2082 if (ot <= MO_32) {
2083 return 1 << ot;
2084 } else {
2085 return 4;
2086 }
2087 }
2088
2089 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2090 {
2091 TranslationBlock *tb;
2092 target_ulong pc;
2093
2094 pc = s->cs_base + eip;
2095 tb = s->tb;
2096 /* NOTE: we handle the case where the TB spans two pages here */
2097 if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
2098 (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) {
2099 /* jump to same page: we can use a direct jump */
2100 tcg_gen_goto_tb(tb_num);
2101 gen_jmp_im(eip);
2102 tcg_gen_exit_tb((uintptr_t)tb + tb_num);
2103 } else {
2104 /* jump to another page: currently not optimized */
2105 gen_jmp_im(eip);
2106 gen_eob(s);
2107 }
2108 }
2109
2110 static inline void gen_jcc(DisasContext *s, int b,
2111 target_ulong val, target_ulong next_eip)
2112 {
2113 TCGLabel *l1, *l2;
2114
2115 if (s->jmp_opt) {
2116 l1 = gen_new_label();
2117 gen_jcc1(s, b, l1);
2118
2119 gen_goto_tb(s, 0, next_eip);
2120
2121 gen_set_label(l1);
2122 gen_goto_tb(s, 1, val);
2123 s->is_jmp = DISAS_TB_JUMP;
2124 } else {
2125 l1 = gen_new_label();
2126 l2 = gen_new_label();
2127 gen_jcc1(s, b, l1);
2128
2129 gen_jmp_im(next_eip);
2130 tcg_gen_br(l2);
2131
2132 gen_set_label(l1);
2133 gen_jmp_im(val);
2134 gen_set_label(l2);
2135 gen_eob(s);
2136 }
2137 }
2138
2139 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2140 int modrm, int reg)
2141 {
2142 CCPrepare cc;
2143
2144 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2145
2146 cc = gen_prepare_cc(s, b, cpu_T1);
2147 if (cc.mask != -1) {
2148 TCGv t0 = tcg_temp_new();
2149 tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2150 cc.reg = t0;
2151 }
2152 if (!cc.use_reg2) {
2153 cc.reg2 = tcg_const_tl(cc.imm);
2154 }
2155
2156 tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
2157 cpu_T0, cpu_regs[reg]);
2158 gen_op_mov_reg_v(ot, reg, cpu_T0);
2159
2160 if (cc.mask != -1) {
2161 tcg_temp_free(cc.reg);
2162 }
2163 if (!cc.use_reg2) {
2164 tcg_temp_free(cc.reg2);
2165 }
2166 }
2167
2168 static inline void gen_op_movl_T0_seg(int seg_reg)
2169 {
2170 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
2171 offsetof(CPUX86State,segs[seg_reg].selector));
2172 }
2173
2174 static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2175 {
2176 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
2177 tcg_gen_st32_tl(cpu_T0, cpu_env,
2178 offsetof(CPUX86State,segs[seg_reg].selector));
2179 tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
2180 }
2181
2182 /* move T0 to seg_reg and compute if the CPU state may change. Never
2183 call this function with seg_reg == R_CS */
2184 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2185 {
2186 if (s->pe && !s->vm86) {
2187 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2188 gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2189 /* abort translation because the addseg value may change or
2190 because ss32 may change. For R_SS, translation must always
2191 stop as a special handling must be done to disable hardware
2192 interrupts for the next instruction */
2193 if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2194 s->is_jmp = DISAS_TB_JUMP;
2195 } else {
2196 gen_op_movl_seg_T0_vm(seg_reg);
2197 if (seg_reg == R_SS)
2198 s->is_jmp = DISAS_TB_JUMP;
2199 }
2200 }
2201
2202 static inline int svm_is_rep(int prefixes)
2203 {
2204 return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2205 }
2206
2207 static inline void
2208 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2209 uint32_t type, uint64_t param)
2210 {
2211 /* no SVM activated; fast case */
2212 if (likely(!(s->flags & HF_SVMI_MASK)))
2213 return;
2214 gen_update_cc_op(s);
2215 gen_jmp_im(pc_start - s->cs_base);
2216 gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2217 tcg_const_i64(param));
2218 }
2219
2220 static inline void
2221 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2222 {
2223 gen_svm_check_intercept_param(s, pc_start, type, 0);
2224 }
2225
2226 static inline void gen_stack_update(DisasContext *s, int addend)
2227 {
2228 gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
2229 }
2230
2231 /* Generate a push. It depends on ss32, addseg and dflag. */
2232 static void gen_push_v(DisasContext *s, TCGv val)
2233 {
2234 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2235 TCGMemOp a_ot = mo_stacksize(s);
2236 int size = 1 << d_ot;
2237 TCGv new_esp = cpu_A0;
2238
2239 tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2240
2241 if (!CODE64(s)) {
2242 if (s->addseg) {
2243 new_esp = cpu_tmp4;
2244 tcg_gen_mov_tl(new_esp, cpu_A0);
2245 }
2246 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2247 }
2248
2249 gen_op_st_v(s, d_ot, val, cpu_A0);
2250 gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2251 }
2252
2253 /* two step pop is necessary for precise exceptions */
2254 static TCGMemOp gen_pop_T0(DisasContext *s)
2255 {
2256 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2257
2258 gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2259 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2260
2261 return d_ot;
2262 }
2263
2264 static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2265 {
2266 gen_stack_update(s, 1 << ot);
2267 }
2268
2269 static inline void gen_stack_A0(DisasContext *s)
2270 {
2271 gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2272 }
2273
2274 static void gen_pusha(DisasContext *s)
2275 {
2276 TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2277 TCGMemOp d_ot = s->dflag;
2278 int size = 1 << d_ot;
2279 int i;
2280
2281 for (i = 0; i < 8; i++) {
2282 tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
2283 gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2284 gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
2285 }
2286
2287 gen_stack_update(s, -8 * size);
2288 }
2289
2290 static void gen_popa(DisasContext *s)
2291 {
2292 TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2293 TCGMemOp d_ot = s->dflag;
2294 int size = 1 << d_ot;
2295 int i;
2296
2297 for (i = 0; i < 8; i++) {
2298 /* ESP is not reloaded */
2299 if (7 - i == R_ESP) {
2300 continue;
2301 }
2302 tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
2303 gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2304 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2305 gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
2306 }
2307
2308 gen_stack_update(s, 8 * size);
2309 }
2310
2311 static void gen_enter(DisasContext *s, int esp_addend, int level)
2312 {
2313 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2314 TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2315 int size = 1 << d_ot;
2316
2317 /* Push BP; compute FrameTemp into T1. */
2318 tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
2319 gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
2320 gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
2321
2322 level &= 31;
2323 if (level != 0) {
2324 int i;
2325
2326 /* Copy level-1 pointers from the previous frame. */
2327 for (i = 1; i < level; ++i) {
2328 tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
2329 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2330 gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
2331
2332 tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
2333 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2334 gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
2335 }
2336
2337 /* Push the current FrameTemp as the last level. */
2338 tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
2339 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2340 gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
2341 }
2342
2343 /* Copy the FrameTemp value to EBP. */
2344 gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
2345
2346 /* Compute the final value of ESP. */
2347 tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
2348 gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2349 }
2350
2351 static void gen_leave(DisasContext *s)
2352 {
2353 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2354 TCGMemOp a_ot = mo_stacksize(s);
2355
2356 gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2357 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2358
2359 tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
2360
2361 gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
2362 gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2363 }
2364
2365 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2366 {
2367 gen_update_cc_op(s);
2368 gen_jmp_im(cur_eip);
2369 gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2370 s->is_jmp = DISAS_TB_JUMP;
2371 }
2372
2373 /* an interrupt is different from an exception because of the
2374 privilege checks */
2375 static void gen_interrupt(DisasContext *s, int intno,
2376 target_ulong cur_eip, target_ulong next_eip)
2377 {
2378 gen_update_cc_op(s);
2379 gen_jmp_im(cur_eip);
2380 gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2381 tcg_const_i32(next_eip - cur_eip));
2382 s->is_jmp = DISAS_TB_JUMP;
2383 }
2384
2385 static void gen_debug(DisasContext *s, target_ulong cur_eip)
2386 {
2387 gen_update_cc_op(s);
2388 gen_jmp_im(cur_eip);
2389 gen_helper_debug(cpu_env);
2390 s->is_jmp = DISAS_TB_JUMP;
2391 }
2392
2393 /* generate a generic end of block. Trace exception is also generated
2394 if needed */
2395 static void gen_eob(DisasContext *s)
2396 {
2397 gen_update_cc_op(s);
2398 if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
2399 gen_helper_reset_inhibit_irq(cpu_env);
2400 }
2401 if (s->tb->flags & HF_RF_MASK) {
2402 gen_helper_reset_rf(cpu_env);
2403 }
2404 if (s->singlestep_enabled) {
2405 gen_helper_debug(cpu_env);
2406 } else if (s->tf) {
2407 gen_helper_single_step(cpu_env);
2408 } else {
2409 tcg_gen_exit_tb(0);
2410 }
2411 s->is_jmp = DISAS_TB_JUMP;
2412 }
2413
2414 /* generate a jump to eip. No segment change must happen before as a
2415 direct call to the next block may occur */
2416 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2417 {
2418 gen_update_cc_op(s);
2419 set_cc_op(s, CC_OP_DYNAMIC);
2420 if (s->jmp_opt) {
2421 gen_goto_tb(s, tb_num, eip);
2422 s->is_jmp = DISAS_TB_JUMP;
2423 } else {
2424 gen_jmp_im(eip);
2425 gen_eob(s);
2426 }
2427 }
2428
2429 static void gen_jmp(DisasContext *s, target_ulong eip)
2430 {
2431 gen_jmp_tb(s, eip, 0);
2432 }
2433
2434 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2435 {
2436 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2437 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2438 }
2439
2440 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2441 {
2442 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2443 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2444 }
2445
2446 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2447 {
2448 int mem_index = s->mem_index;
2449 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2450 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2451 tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2452 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2453 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2454 }
2455
2456 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2457 {
2458 int mem_index = s->mem_index;
2459 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2460 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2461 tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2462 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2463 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2464 }
2465
2466 static inline void gen_op_movo(int d_offset, int s_offset)
2467 {
2468 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2469 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2470 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2471 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2472 }
2473
2474 static inline void gen_op_movq(int d_offset, int s_offset)
2475 {
2476 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2477 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2478 }
2479
2480 static inline void gen_op_movl(int d_offset, int s_offset)
2481 {
2482 tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2483 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2484 }
2485
2486 static inline void gen_op_movq_env_0(int d_offset)
2487 {
2488 tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2489 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2490 }
2491
2492 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2493 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2494 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2495 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2496 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2497 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2498 TCGv_i32 val);
2499 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2500 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2501 TCGv val);
2502
2503 #define SSE_SPECIAL ((void *)1)
2504 #define SSE_DUMMY ((void *)2)
2505
2506 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2507 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2508 gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2509
2510 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2511 /* 3DNow! extensions */
2512 [0x0e] = { SSE_DUMMY }, /* femms */
2513 [0x0f] = { SSE_DUMMY }, /* pf... */
2514 /* pure SSE operations */
2515 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2516 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2517 [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2518 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2519 [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2520 [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2521 [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
2522 [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
2523
2524 [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2525 [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2526 [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2527 [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2528 [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2529 [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2530 [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2531 [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2532 [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2533 [0x51] = SSE_FOP(sqrt),
2534 [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2535 [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2536 [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2537 [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2538 [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2539 [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2540 [0x58] = SSE_FOP(add),
2541 [0x59] = SSE_FOP(mul),
2542 [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2543 gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2544 [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2545 [0x5c] = SSE_FOP(sub),
2546 [0x5d] = SSE_FOP(min),
2547 [0x5e] = SSE_FOP(div),
2548 [0x5f] = SSE_FOP(max),
2549
2550 [0xc2] = SSE_FOP(cmpeq),
2551 [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2552 (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2553
2554 /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */
2555 [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2556 [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2557
2558 /* MMX ops and their SSE extensions */
2559 [0x60] = MMX_OP2(punpcklbw),
2560 [0x61] = MMX_OP2(punpcklwd),
2561 [0x62] = MMX_OP2(punpckldq),
2562 [0x63] = MMX_OP2(packsswb),
2563 [0x64] = MMX_OP2(pcmpgtb),
2564 [0x65] = MMX_OP2(pcmpgtw),
2565 [0x66] = MMX_OP2(pcmpgtl),
2566 [0x67] = MMX_OP2(packuswb),
2567 [0x68] = MMX_OP2(punpckhbw),
2568 [0x69] = MMX_OP2(punpckhwd),
2569 [0x6a] = MMX_OP2(punpckhdq),
2570 [0x6b] = MMX_OP2(packssdw),
2571 [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2572 [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2573 [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2574 [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2575 [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2576 (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2577 (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2578 (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2579 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2580 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2581 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2582 [0x74] = MMX_OP2(pcmpeqb),
2583 [0x75] = MMX_OP2(pcmpeqw),
2584 [0x76] = MMX_OP2(pcmpeql),
2585 [0x77] = { SSE_DUMMY }, /* emms */
2586 [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2587 [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2588 [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2589 [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2590 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2591 [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2592 [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2593 [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2594 [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2595 [0xd1] = MMX_OP2(psrlw),
2596 [0xd2] = MMX_OP2(psrld),
2597 [0xd3] = MMX_OP2(psrlq),
2598 [0xd4] = MMX_OP2(paddq),
2599 [0xd5] = MMX_OP2(pmullw),
2600 [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2601 [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2602 [0xd8] = MMX_OP2(psubusb),
2603 [0xd9] = MMX_OP2(psubusw),
2604 [0xda] = MMX_OP2(pminub),
2605 [0xdb] = MMX_OP2(pand),
2606 [0xdc] = MMX_OP2(paddusb),
2607 [0xdd] = MMX_OP2(paddusw),
2608 [0xde] = MMX_OP2(pmaxub),
2609 [0xdf] = MMX_OP2(pandn),
2610 [0xe0] = MMX_OP2(pavgb),
2611 [0xe1] = MMX_OP2(psraw),
2612 [0xe2] = MMX_OP2(psrad),
2613 [0xe3] = MMX_OP2(pavgw),
2614 [0xe4] = MMX_OP2(pmulhuw),
2615 [0xe5] = MMX_OP2(pmulhw),
2616 [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2617 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
2618 [0xe8] = MMX_OP2(psubsb),
2619 [0xe9] = MMX_OP2(psubsw),
2620 [0xea] = MMX_OP2(pminsw),
2621 [0xeb] = MMX_OP2(por),
2622 [0xec] = MMX_OP2(paddsb),
2623 [0xed] = MMX_OP2(paddsw),
2624 [0xee] = MMX_OP2(pmaxsw),
2625 [0xef] = MMX_OP2(pxor),
2626 [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2627 [0xf1] = MMX_OP2(psllw),
2628 [0xf2] = MMX_OP2(pslld),
2629 [0xf3] = MMX_OP2(psllq),
2630 [0xf4] = MMX_OP2(pmuludq),
2631 [0xf5] = MMX_OP2(pmaddwd),
2632 [0xf6] = MMX_OP2(psadbw),
2633 [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2634 (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2635 [0xf8] = MMX_OP2(psubb),
2636 [0xf9] = MMX_OP2(psubw),
2637 [0xfa] = MMX_OP2(psubl),
2638 [0xfb] = MMX_OP2(psubq),
2639 [0xfc] = MMX_OP2(paddb),
2640 [0xfd] = MMX_OP2(paddw),
2641 [0xfe] = MMX_OP2(paddl),
2642 };
2643
2644 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2645 [0 + 2] = MMX_OP2(psrlw),
2646 [0 + 4] = MMX_OP2(psraw),
2647 [0 + 6] = MMX_OP2(psllw),
2648 [8 + 2] = MMX_OP2(psrld),
2649 [8 + 4] = MMX_OP2(psrad),
2650 [8 + 6] = MMX_OP2(pslld),
2651 [16 + 2] = MMX_OP2(psrlq),
2652 [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2653 [16 + 6] = MMX_OP2(psllq),
2654 [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2655 };
2656
2657 static const SSEFunc_0_epi sse_op_table3ai[] = {
2658 gen_helper_cvtsi2ss,
2659 gen_helper_cvtsi2sd
2660 };
2661
2662 #ifdef TARGET_X86_64
2663 static const SSEFunc_0_epl sse_op_table3aq[] = {
2664 gen_helper_cvtsq2ss,
2665 gen_helper_cvtsq2sd
2666 };
2667 #endif
2668
2669 static const SSEFunc_i_ep sse_op_table3bi[] = {
2670 gen_helper_cvttss2si,
2671 gen_helper_cvtss2si,
2672 gen_helper_cvttsd2si,
2673 gen_helper_cvtsd2si
2674 };
2675
2676 #ifdef TARGET_X86_64
2677 static const SSEFunc_l_ep sse_op_table3bq[] = {
2678 gen_helper_cvttss2sq,
2679 gen_helper_cvtss2sq,
2680 gen_helper_cvttsd2sq,
2681 gen_helper_cvtsd2sq
2682 };
2683 #endif
2684
2685 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2686 SSE_FOP(cmpeq),
2687 SSE_FOP(cmplt),
2688 SSE_FOP(cmple),
2689 SSE_FOP(cmpunord),
2690 SSE_FOP(cmpneq),
2691 SSE_FOP(cmpnlt),
2692 SSE_FOP(cmpnle),
2693 SSE_FOP(cmpord),
2694 };
2695
2696 static const SSEFunc_0_epp sse_op_table5[256] = {
2697 [0x0c] = gen_helper_pi2fw,
2698 [0x0d] = gen_helper_pi2fd,
2699 [0x1c] = gen_helper_pf2iw,
2700 [0x1d] = gen_helper_pf2id,
2701 [0x8a] = gen_helper_pfnacc,
2702 [0x8e] = gen_helper_pfpnacc,
2703 [0x90] = gen_helper_pfcmpge,
2704 [0x94] = gen_helper_pfmin,
2705 [0x96] = gen_helper_pfrcp,
2706 [0x97] = gen_helper_pfrsqrt,
2707 [0x9a] = gen_helper_pfsub,
2708 [0x9e] = gen_helper_pfadd,
2709 [0xa0] = gen_helper_pfcmpgt,
2710 [0xa4] = gen_helper_pfmax,
2711 [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2712 [0xa7] = gen_helper_movq, /* pfrsqit1 */
2713 [0xaa] = gen_helper_pfsubr,
2714 [0xae] = gen_helper_pfacc,
2715 [0xb0] = gen_helper_pfcmpeq,
2716 [0xb4] = gen_helper_pfmul,
2717 [0xb6] = gen_helper_movq, /* pfrcpit2 */
2718 [0xb7] = gen_helper_pmulhrw_mmx,
2719 [0xbb] = gen_helper_pswapd,
2720 [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2721 };
2722
2723 struct SSEOpHelper_epp {
2724 SSEFunc_0_epp op[2];
2725 uint32_t ext_mask;
2726 };
2727
2728 struct SSEOpHelper_eppi {
2729 SSEFunc_0_eppi op[2];
2730 uint32_t ext_mask;
2731 };
2732
2733 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2734 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2735 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2736 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2737 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2738 CPUID_EXT_PCLMULQDQ }
2739 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2740
2741 static const struct SSEOpHelper_epp sse_op_table6[256] = {
2742 [0x00] = SSSE3_OP(pshufb),
2743 [0x01] = SSSE3_OP(phaddw),
2744 [0x02] = SSSE3_OP(phaddd),
2745 [0x03] = SSSE3_OP(phaddsw),
2746 [0x04] = SSSE3_OP(pmaddubsw),
2747 [0x05] = SSSE3_OP(phsubw),
2748 [0x06] = SSSE3_OP(phsubd),
2749 [0x07] = SSSE3_OP(phsubsw),
2750 [0x08] = SSSE3_OP(psignb),
2751 [0x09] = SSSE3_OP(psignw),
2752 [0x0a] = SSSE3_OP(psignd),
2753 [0x0b] = SSSE3_OP(pmulhrsw),
2754 [0x10] = SSE41_OP(pblendvb),
2755 [0x14] = SSE41_OP(blendvps),
2756 [0x15] = SSE41_OP(blendvpd),
2757 [0x17] = SSE41_OP(ptest),
2758 [0x1c] = SSSE3_OP(pabsb),
2759 [0x1d] = SSSE3_OP(pabsw),
2760 [0x1e] = SSSE3_OP(pabsd),
2761 [0x20] = SSE41_OP(pmovsxbw),
2762 [0x21] = SSE41_OP(pmovsxbd),
2763 [0x22] = SSE41_OP(pmovsxbq),
2764 [0x23] = SSE41_OP(pmovsxwd),
2765 [0x24] = SSE41_OP(pmovsxwq),
2766 [0x25] = SSE41_OP(pmovsxdq),
2767 [0x28] = SSE41_OP(pmuldq),
2768 [0x29] = SSE41_OP(pcmpeqq),
2769 [0x2a] = SSE41_SPECIAL, /* movntqda */
2770 [0x2b] = SSE41_OP(packusdw),
2771 [0x30] = SSE41_OP(pmovzxbw),
2772 [0x31] = SSE41_OP(pmovzxbd),
2773 [0x32] = SSE41_OP(pmovzxbq),
2774 [0x33] = SSE41_OP(pmovzxwd),
2775 [0x34] = SSE41_OP(pmovzxwq),
2776 [0x35] = SSE41_OP(pmovzxdq),
2777 [0x37] = SSE42_OP(pcmpgtq),
2778 [0x38] = SSE41_OP(pminsb),
2779 [0x39] = SSE41_OP(pminsd),
2780 [0x3a] = SSE41_OP(pminuw),
2781 [0x3b] = SSE41_OP(pminud),
2782 [0x3c] = SSE41_OP(pmaxsb),
2783 [0x3d] = SSE41_OP(pmaxsd),
2784 [0x3e] = SSE41_OP(pmaxuw),
2785 [0x3f] = SSE41_OP(pmaxud),
2786 [0x40] = SSE41_OP(pmulld),
2787 [0x41] = SSE41_OP(phminposuw),
2788 [0xdb] = AESNI_OP(aesimc),
2789 [0xdc] = AESNI_OP(aesenc),
2790 [0xdd] = AESNI_OP(aesenclast),
2791 [0xde] = AESNI_OP(aesdec),
2792 [0xdf] = AESNI_OP(aesdeclast),
2793 };
2794
2795 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2796 [0x08] = SSE41_OP(roundps),
2797 [0x09] = SSE41_OP(roundpd),
2798 [0x0a] = SSE41_OP(roundss),
2799 [0x0b] = SSE41_OP(roundsd),
2800 [0x0c] = SSE41_OP(blendps),
2801 [0x0d] = SSE41_OP(blendpd),
2802 [0x0e] = SSE41_OP(pblendw),
2803 [0x0f] = SSSE3_OP(palignr),
2804 [0x14] = SSE41_SPECIAL, /* pextrb */
2805 [0x15] = SSE41_SPECIAL, /* pextrw */
2806 [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2807 [0x17] = SSE41_SPECIAL, /* extractps */
2808 [0x20] = SSE41_SPECIAL, /* pinsrb */
2809 [0x21] = SSE41_SPECIAL, /* insertps */
2810 [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2811 [0x40] = SSE41_OP(dpps),
2812 [0x41] = SSE41_OP(dppd),
2813 [0x42] = SSE41_OP(mpsadbw),
2814 [0x44] = PCLMULQDQ_OP(pclmulqdq),
2815 [0x60] = SSE42_OP(pcmpestrm),
2816 [0x61] = SSE42_OP(pcmpestri),
2817 [0x62] = SSE42_OP(pcmpistrm),
2818 [0x63] = SSE42_OP(pcmpistri),
2819 [0xdf] = AESNI_OP(aeskeygenassist),
2820 };
2821
2822 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2823 target_ulong pc_start, int rex_r)
2824 {
2825 int b1, op1_offset, op2_offset, is_xmm, val;
2826 int modrm, mod, rm, reg;
2827 SSEFunc_0_epp sse_fn_epp;
2828 SSEFunc_0_eppi sse_fn_eppi;
2829 SSEFunc_0_ppi sse_fn_ppi;
2830 SSEFunc_0_eppt sse_fn_eppt;
2831 TCGMemOp ot;
2832
2833 b &= 0xff;
2834 if (s->prefix & PREFIX_DATA)
2835 b1 = 1;
2836 else if (s->prefix & PREFIX_REPZ)
2837 b1 = 2;
2838 else if (s->prefix & PREFIX_REPNZ)
2839 b1 = 3;
2840 else
2841 b1 = 0;
2842 sse_fn_epp = sse_op_table1[b][b1];
2843 if (!sse_fn_epp) {
2844 goto illegal_op;
2845 }
2846 if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
2847 is_xmm = 1;
2848 } else {
2849 if (b1 == 0) {
2850 /* MMX case */
2851 is_xmm = 0;
2852 } else {
2853 is_xmm = 1;
2854 }
2855 }
2856 /* simple MMX/SSE operation */
2857 if (s->flags & HF_TS_MASK) {
2858 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
2859 return;
2860 }
2861 if (s->flags & HF_EM_MASK) {
2862 illegal_op:
2863 gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
2864 return;
2865 }
2866 if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
2867 if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
2868 goto illegal_op;
2869 if (b == 0x0e) {
2870 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
2871 goto illegal_op;
2872 /* femms */
2873 gen_helper_emms(cpu_env);
2874 return;
2875 }
2876 if (b == 0x77) {
2877 /* emms */
2878 gen_helper_emms(cpu_env);
2879 return;
2880 }
2881 /* prepare MMX state (XXX: optimize by storing fptt and fptags in
2882 the static cpu state) */
2883 if (!is_xmm) {
2884 gen_helper_enter_mmx(cpu_env);
2885 }
2886
2887 modrm = cpu_ldub_code(env, s->pc++);
2888 reg = ((modrm >> 3) & 7);
2889 if (is_xmm)
2890 reg |= rex_r;
2891 mod = (modrm >> 6) & 3;
2892 if (sse_fn_epp == SSE_SPECIAL) {
2893 b |= (b1 << 8);
2894 switch(b) {
2895 case 0x0e7: /* movntq */
2896 if (mod == 3)
2897 goto illegal_op;
2898 gen_lea_modrm(env, s, modrm);
2899 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
2900 break;
2901 case 0x1e7: /* movntdq */
2902 case 0x02b: /* movntps */
2903 case 0x12b: /* movntps */
2904 if (mod == 3)
2905 goto illegal_op;
2906 gen_lea_modrm(env, s, modrm);
2907 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2908 break;
2909 case 0x3f0: /* lddqu */
2910 if (mod == 3)
2911 goto illegal_op;
2912 gen_lea_modrm(env, s, modrm);
2913 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2914 break;
2915 case 0x22b: /* movntss */
2916 case 0x32b: /* movntsd */
2917 if (mod == 3)
2918 goto illegal_op;
2919 gen_lea_modrm(env, s, modrm);
2920 if (b1 & 1) {
2921 gen_stq_env_A0(s, offsetof(CPUX86State,
2922 xmm_regs[reg].ZMM_Q(0)));
2923 } else {
2924 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
2925 xmm_regs[reg].ZMM_L(0)));
2926 gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
2927 }
2928 break;
2929 case 0x6e: /* movd mm, ea */
2930 #ifdef TARGET_X86_64
2931 if (s->dflag == MO_64) {
2932 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
2933 tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
2934 } else
2935 #endif
2936 {
2937 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
2938 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
2939 offsetof(CPUX86State,fpregs[reg].mmx));
2940 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2941 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
2942 }
2943 break;
2944 case 0x16e: /* movd xmm, ea */
2945 #ifdef TARGET_X86_64
2946 if (s->dflag == MO_64) {
2947 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
2948 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
2949 offsetof(CPUX86State,xmm_regs[reg]));
2950 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
2951 } else
2952 #endif
2953 {
2954 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
2955 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
2956 offsetof(CPUX86State,xmm_regs[reg]));
2957 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2958 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
2959 }
2960 break;
2961 case 0x6f: /* movq mm, ea */
2962 if (mod != 3) {
2963 gen_lea_modrm(env, s, modrm);
2964 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
2965 } else {
2966 rm = (modrm & 7);
2967 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
2968 offsetof(CPUX86State,fpregs[rm].mmx));
2969 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
2970 offsetof(CPUX86State,fpregs[reg].mmx));
2971 }
2972 break;
2973 case 0x010: /* movups */
2974 case 0x110: /* movupd */
2975 case 0x028: /* movaps */
2976 case 0x128: /* movapd */
2977 case 0x16f: /* movdqa xmm, ea */
2978 case 0x26f: /* movdqu xmm, ea */
2979 if (mod != 3) {
2980 gen_lea_modrm(env, s, modrm);
2981 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2982 } else {
2983 rm = (modrm & 7) | REX_B(s);
2984 gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
2985 offsetof(CPUX86State,xmm_regs[rm]));
2986 }
2987 break;
2988 case 0x210: /* movss xmm, ea */
2989 if (mod != 3) {
2990 gen_lea_modrm(env, s, modrm);
2991 gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
2992 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
2993 tcg_gen_movi_tl(cpu_T0, 0);
2994 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
2995 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
2996 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
2997 } else {
2998 rm = (modrm & 7) | REX_B(s);
2999 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3000 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3001 }
3002 break;
3003 case 0x310: /* movsd xmm, ea */
3004 if (mod != 3) {
3005 gen_lea_modrm(env, s, modrm);
3006 gen_ldq_env_A0(s, offsetof(CPUX86State,
3007 xmm_regs[reg].ZMM_Q(0)));
3008 tcg_gen_movi_tl(cpu_T0, 0);
3009 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3010 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3011 } else {
3012 rm = (modrm & 7) | REX_B(s);
3013 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3014 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3015 }
3016 break;
3017 case 0x012: /* movlps */
3018 case 0x112: /* movlpd */
3019 if (mod != 3) {
3020 gen_lea_modrm(env, s, modrm);
3021 gen_ldq_env_A0(s, offsetof(CPUX86State,
3022 xmm_regs[reg].ZMM_Q(0)));
3023 } else {
3024 /* movhlps */
3025 rm = (modrm & 7) | REX_B(s);
3026 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3027 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3028 }
3029 break;
3030 case 0x212: /* movsldup */
3031 if (mod != 3) {
3032 gen_lea_modrm(env, s, modrm);
3033 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3034 } else {
3035 rm = (modrm & 7) | REX_B(s);
3036 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3037 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3038 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3039 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3040 }
3041 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3042 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3043 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3044 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3045 break;
3046 case 0x312: /* movddup */
3047 if (mod != 3) {
3048 gen_lea_modrm(env, s, modrm);
3049 gen_ldq_env_A0(s, offsetof(CPUX86State,
3050 xmm_regs[reg].ZMM_Q(0)));
3051 } else {
3052 rm = (modrm & 7) | REX_B(s);
3053 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3054 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3055 }
3056 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3057 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3058 break;
3059 case 0x016: /* movhps */
3060 case 0x116: /* movhpd */
3061 if (mod != 3) {
3062 gen_lea_modrm(env, s, modrm);
3063 gen_ldq_env_A0(s, offsetof(CPUX86State,
3064 xmm_regs[reg].ZMM_Q(1)));
3065 } else {
3066 /* movlhps */
3067 rm = (modrm & 7) | REX_B(s);
3068 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3069 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3070 }
3071 break;
3072 case 0x216: /* movshdup */
3073 if (mod != 3) {
3074 gen_lea_modrm(env, s, modrm);
3075 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3076 } else {
3077 rm = (modrm & 7) | REX_B(s);
3078 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3079 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3080 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3081 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3082 }
3083 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3084 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3085 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3086 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3087 break;
3088 case 0x178:
3089 case 0x378:
3090 {
3091 int bit_index, field_length;
3092
3093 if (b1 == 1 && reg != 0)
3094 goto illegal_op;
3095 field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
3096 bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
3097 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3098 offsetof(CPUX86State,xmm_regs[reg]));
3099 if (b1 == 1)
3100 gen_helper_extrq_i(cpu_env, cpu_ptr0,
3101 tcg_const_i32(bit_index),
3102 tcg_const_i32(field_length));
3103 else
3104 gen_helper_insertq_i(cpu_env, cpu_ptr0,
3105 tcg_const_i32(bit_index),
3106 tcg_const_i32(field_length));
3107 }
3108 break;
3109 case 0x7e: /* movd ea, mm */
3110 #ifdef TARGET_X86_64
3111 if (s->dflag == MO_64) {
3112 tcg_gen_ld_i64(cpu_T0, cpu_env,
3113 offsetof(CPUX86State,fpregs[reg].mmx));
3114 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3115 } else
3116 #endif
3117 {
3118 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3119 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3120 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3121 }
3122 break;
3123 case 0x17e: /* movd ea, xmm */
3124 #ifdef TARGET_X86_64
3125 if (s->dflag == MO_64) {
3126 tcg_gen_ld_i64(cpu_T0, cpu_env,
3127 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3128 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3129 } else
3130 #endif
3131 {
3132 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3133 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3134 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3135 }
3136 break;
3137 case 0x27e: /* movq xmm, ea */
3138 if (mod != 3) {
3139 gen_lea_modrm(env, s, modrm);
3140 gen_ldq_env_A0(s, offsetof(CPUX86State,
3141 xmm_regs[reg].ZMM_Q(0)));
3142 } else {
3143 rm = (modrm & 7) | REX_B(s);
3144 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3145 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3146 }
3147 gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3148 break;
3149 case 0x7f: /* movq ea, mm */
3150 if (mod != 3) {
3151 gen_lea_modrm(env, s, modrm);
3152 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3153 } else {
3154 rm = (modrm & 7);
3155 gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),