cpu: Move breakpoints field from CPU_COMMON to CPUState
[qemu.git] / target-i386 / translate.c
1 /*
2 * i386 translation
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include <stdarg.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <inttypes.h>
24 #include <signal.h>
25
26 #include "qemu/host-utils.h"
27 #include "cpu.h"
28 #include "disas/disas.h"
29 #include "tcg-op.h"
30
31 #include "helper.h"
32 #define GEN_HELPER 1
33 #include "helper.h"
34
35 #define PREFIX_REPZ 0x01
36 #define PREFIX_REPNZ 0x02
37 #define PREFIX_LOCK 0x04
38 #define PREFIX_DATA 0x08
39 #define PREFIX_ADR 0x10
40 #define PREFIX_VEX 0x20
41
42 #ifdef TARGET_X86_64
43 #define CODE64(s) ((s)->code64)
44 #define REX_X(s) ((s)->rex_x)
45 #define REX_B(s) ((s)->rex_b)
46 #else
47 #define CODE64(s) 0
48 #define REX_X(s) 0
49 #define REX_B(s) 0
50 #endif
51
52 #ifdef TARGET_X86_64
53 # define ctztl ctz64
54 # define clztl clz64
55 #else
56 # define ctztl ctz32
57 # define clztl clz32
58 #endif
59
60 //#define MACRO_TEST 1
61
62 /* global register indexes */
63 static TCGv_ptr cpu_env;
64 static TCGv cpu_A0;
65 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
66 static TCGv_i32 cpu_cc_op;
67 static TCGv cpu_regs[CPU_NB_REGS];
68 /* local temps */
69 static TCGv cpu_T[2];
70 /* local register indexes (only used inside old micro ops) */
71 static TCGv cpu_tmp0, cpu_tmp4;
72 static TCGv_ptr cpu_ptr0, cpu_ptr1;
73 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
74 static TCGv_i64 cpu_tmp1_i64;
75
76 static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
77
78 #include "exec/gen-icount.h"
79
80 #ifdef TARGET_X86_64
81 static int x86_64_hregs;
82 #endif
83
84 typedef struct DisasContext {
85 /* current insn context */
86 int override; /* -1 if no override */
87 int prefix;
88 TCGMemOp aflag;
89 TCGMemOp dflag;
90 target_ulong pc; /* pc = eip + cs_base */
91 int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
92 static state change (stop translation) */
93 /* current block context */
94 target_ulong cs_base; /* base of CS segment */
95 int pe; /* protected mode */
96 int code32; /* 32 bit code segment */
97 #ifdef TARGET_X86_64
98 int lma; /* long mode active */
99 int code64; /* 64 bit code segment */
100 int rex_x, rex_b;
101 #endif
102 int vex_l; /* vex vector length */
103 int vex_v; /* vex vvvv register, without 1's compliment. */
104 int ss32; /* 32 bit stack segment */
105 CCOp cc_op; /* current CC operation */
106 bool cc_op_dirty;
107 int addseg; /* non zero if either DS/ES/SS have a non zero base */
108 int f_st; /* currently unused */
109 int vm86; /* vm86 mode */
110 int cpl;
111 int iopl;
112 int tf; /* TF cpu flag */
113 int singlestep_enabled; /* "hardware" single step enabled */
114 int jmp_opt; /* use direct block chaining for direct jumps */
115 int mem_index; /* select memory access functions */
116 uint64_t flags; /* all execution flags */
117 struct TranslationBlock *tb;
118 int popl_esp_hack; /* for correct popl with esp base handling */
119 int rip_offset; /* only used in x86_64, but left for simplicity */
120 int cpuid_features;
121 int cpuid_ext_features;
122 int cpuid_ext2_features;
123 int cpuid_ext3_features;
124 int cpuid_7_0_ebx_features;
125 } DisasContext;
126
127 static void gen_eob(DisasContext *s);
128 static void gen_jmp(DisasContext *s, target_ulong eip);
129 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
130 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
131
132 /* i386 arith/logic operations */
133 enum {
134 OP_ADDL,
135 OP_ORL,
136 OP_ADCL,
137 OP_SBBL,
138 OP_ANDL,
139 OP_SUBL,
140 OP_XORL,
141 OP_CMPL,
142 };
143
144 /* i386 shift ops */
145 enum {
146 OP_ROL,
147 OP_ROR,
148 OP_RCL,
149 OP_RCR,
150 OP_SHL,
151 OP_SHR,
152 OP_SHL1, /* undocumented */
153 OP_SAR = 7,
154 };
155
156 enum {
157 JCC_O,
158 JCC_B,
159 JCC_Z,
160 JCC_BE,
161 JCC_S,
162 JCC_P,
163 JCC_L,
164 JCC_LE,
165 };
166
167 enum {
168 /* I386 int registers */
169 OR_EAX, /* MUST be even numbered */
170 OR_ECX,
171 OR_EDX,
172 OR_EBX,
173 OR_ESP,
174 OR_EBP,
175 OR_ESI,
176 OR_EDI,
177
178 OR_TMP0 = 16, /* temporary operand register */
179 OR_TMP1,
180 OR_A0, /* temporary register used when doing address evaluation */
181 };
182
183 enum {
184 USES_CC_DST = 1,
185 USES_CC_SRC = 2,
186 USES_CC_SRC2 = 4,
187 USES_CC_SRCT = 8,
188 };
189
190 /* Bit set if the global variable is live after setting CC_OP to X. */
191 static const uint8_t cc_op_live[CC_OP_NB] = {
192 [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
193 [CC_OP_EFLAGS] = USES_CC_SRC,
194 [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
195 [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
196 [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
197 [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
198 [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
199 [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
200 [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
201 [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
202 [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
203 [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
204 [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
205 [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
206 [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
207 [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
208 [CC_OP_CLR] = 0,
209 };
210
211 static void set_cc_op(DisasContext *s, CCOp op)
212 {
213 int dead;
214
215 if (s->cc_op == op) {
216 return;
217 }
218
219 /* Discard CC computation that will no longer be used. */
220 dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
221 if (dead & USES_CC_DST) {
222 tcg_gen_discard_tl(cpu_cc_dst);
223 }
224 if (dead & USES_CC_SRC) {
225 tcg_gen_discard_tl(cpu_cc_src);
226 }
227 if (dead & USES_CC_SRC2) {
228 tcg_gen_discard_tl(cpu_cc_src2);
229 }
230 if (dead & USES_CC_SRCT) {
231 tcg_gen_discard_tl(cpu_cc_srcT);
232 }
233
234 if (op == CC_OP_DYNAMIC) {
235 /* The DYNAMIC setting is translator only, and should never be
236 stored. Thus we always consider it clean. */
237 s->cc_op_dirty = false;
238 } else {
239 /* Discard any computed CC_OP value (see shifts). */
240 if (s->cc_op == CC_OP_DYNAMIC) {
241 tcg_gen_discard_i32(cpu_cc_op);
242 }
243 s->cc_op_dirty = true;
244 }
245 s->cc_op = op;
246 }
247
248 static void gen_update_cc_op(DisasContext *s)
249 {
250 if (s->cc_op_dirty) {
251 tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
252 s->cc_op_dirty = false;
253 }
254 }
255
256 #ifdef TARGET_X86_64
257
258 #define NB_OP_SIZES 4
259
260 #else /* !TARGET_X86_64 */
261
262 #define NB_OP_SIZES 3
263
264 #endif /* !TARGET_X86_64 */
265
266 #if defined(HOST_WORDS_BIGENDIAN)
267 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
268 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
269 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
270 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
271 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
272 #else
273 #define REG_B_OFFSET 0
274 #define REG_H_OFFSET 1
275 #define REG_W_OFFSET 0
276 #define REG_L_OFFSET 0
277 #define REG_LH_OFFSET 4
278 #endif
279
280 /* In instruction encodings for byte register accesses the
281 * register number usually indicates "low 8 bits of register N";
282 * however there are some special cases where N 4..7 indicates
283 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
284 * true for this special case, false otherwise.
285 */
286 static inline bool byte_reg_is_xH(int reg)
287 {
288 if (reg < 4) {
289 return false;
290 }
291 #ifdef TARGET_X86_64
292 if (reg >= 8 || x86_64_hregs) {
293 return false;
294 }
295 #endif
296 return true;
297 }
298
299 /* Select the size of a push/pop operation. */
300 static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
301 {
302 if (CODE64(s)) {
303 return ot == MO_16 ? MO_16 : MO_64;
304 } else {
305 return ot;
306 }
307 }
308
309 /* Select only size 64 else 32. Used for SSE operand sizes. */
310 static inline TCGMemOp mo_64_32(TCGMemOp ot)
311 {
312 #ifdef TARGET_X86_64
313 return ot == MO_64 ? MO_64 : MO_32;
314 #else
315 return MO_32;
316 #endif
317 }
318
319 /* Select size 8 if lsb of B is clear, else OT. Used for decoding
320 byte vs word opcodes. */
321 static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
322 {
323 return b & 1 ? ot : MO_8;
324 }
325
326 /* Select size 8 if lsb of B is clear, else OT capped at 32.
327 Used for decoding operand size of port opcodes. */
328 static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
329 {
330 return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
331 }
332
333 static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
334 {
335 switch(ot) {
336 case MO_8:
337 if (!byte_reg_is_xH(reg)) {
338 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
339 } else {
340 tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
341 }
342 break;
343 case MO_16:
344 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
345 break;
346 case MO_32:
347 /* For x86_64, this sets the higher half of register to zero.
348 For i386, this is equivalent to a mov. */
349 tcg_gen_ext32u_tl(cpu_regs[reg], t0);
350 break;
351 #ifdef TARGET_X86_64
352 case MO_64:
353 tcg_gen_mov_tl(cpu_regs[reg], t0);
354 break;
355 #endif
356 default:
357 tcg_abort();
358 }
359 }
360
361 static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
362 {
363 if (ot == MO_8 && byte_reg_is_xH(reg)) {
364 tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
365 tcg_gen_ext8u_tl(t0, t0);
366 } else {
367 tcg_gen_mov_tl(t0, cpu_regs[reg]);
368 }
369 }
370
371 static inline void gen_op_movl_A0_reg(int reg)
372 {
373 tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
374 }
375
376 static inline void gen_op_addl_A0_im(int32_t val)
377 {
378 tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
379 #ifdef TARGET_X86_64
380 tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
381 #endif
382 }
383
384 #ifdef TARGET_X86_64
385 static inline void gen_op_addq_A0_im(int64_t val)
386 {
387 tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
388 }
389 #endif
390
391 static void gen_add_A0_im(DisasContext *s, int val)
392 {
393 #ifdef TARGET_X86_64
394 if (CODE64(s))
395 gen_op_addq_A0_im(val);
396 else
397 #endif
398 gen_op_addl_A0_im(val);
399 }
400
401 static inline void gen_op_jmp_v(TCGv dest)
402 {
403 tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
404 }
405
406 static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
407 {
408 tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
409 gen_op_mov_reg_v(size, reg, cpu_tmp0);
410 }
411
412 static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
413 {
414 tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
415 gen_op_mov_reg_v(size, reg, cpu_tmp0);
416 }
417
418 static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
419 {
420 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
421 if (shift != 0)
422 tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
423 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
424 /* For x86_64, this sets the higher half of register to zero.
425 For i386, this is equivalent to a nop. */
426 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
427 }
428
429 static inline void gen_op_movl_A0_seg(int reg)
430 {
431 tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base) + REG_L_OFFSET);
432 }
433
434 static inline void gen_op_addl_A0_seg(DisasContext *s, int reg)
435 {
436 tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
437 #ifdef TARGET_X86_64
438 if (CODE64(s)) {
439 tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
440 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
441 } else {
442 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
443 tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
444 }
445 #else
446 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
447 #endif
448 }
449
450 #ifdef TARGET_X86_64
451 static inline void gen_op_movq_A0_seg(int reg)
452 {
453 tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base));
454 }
455
456 static inline void gen_op_addq_A0_seg(int reg)
457 {
458 tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
459 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
460 }
461
462 static inline void gen_op_movq_A0_reg(int reg)
463 {
464 tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
465 }
466
467 static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
468 {
469 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
470 if (shift != 0)
471 tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
472 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
473 }
474 #endif
475
476 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
477 {
478 tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
479 }
480
481 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
482 {
483 tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
484 }
485
486 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
487 {
488 if (d == OR_TMP0) {
489 gen_op_st_v(s, idx, cpu_T[0], cpu_A0);
490 } else {
491 gen_op_mov_reg_v(idx, d, cpu_T[0]);
492 }
493 }
494
495 static inline void gen_jmp_im(target_ulong pc)
496 {
497 tcg_gen_movi_tl(cpu_tmp0, pc);
498 gen_op_jmp_v(cpu_tmp0);
499 }
500
501 static inline void gen_string_movl_A0_ESI(DisasContext *s)
502 {
503 int override;
504
505 override = s->override;
506 switch (s->aflag) {
507 #ifdef TARGET_X86_64
508 case MO_64:
509 if (override >= 0) {
510 gen_op_movq_A0_seg(override);
511 gen_op_addq_A0_reg_sN(0, R_ESI);
512 } else {
513 gen_op_movq_A0_reg(R_ESI);
514 }
515 break;
516 #endif
517 case MO_32:
518 /* 32 bit address */
519 if (s->addseg && override < 0)
520 override = R_DS;
521 if (override >= 0) {
522 gen_op_movl_A0_seg(override);
523 gen_op_addl_A0_reg_sN(0, R_ESI);
524 } else {
525 gen_op_movl_A0_reg(R_ESI);
526 }
527 break;
528 case MO_16:
529 /* 16 address, always override */
530 if (override < 0)
531 override = R_DS;
532 tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_ESI]);
533 gen_op_addl_A0_seg(s, override);
534 break;
535 default:
536 tcg_abort();
537 }
538 }
539
540 static inline void gen_string_movl_A0_EDI(DisasContext *s)
541 {
542 switch (s->aflag) {
543 #ifdef TARGET_X86_64
544 case MO_64:
545 gen_op_movq_A0_reg(R_EDI);
546 break;
547 #endif
548 case MO_32:
549 if (s->addseg) {
550 gen_op_movl_A0_seg(R_ES);
551 gen_op_addl_A0_reg_sN(0, R_EDI);
552 } else {
553 gen_op_movl_A0_reg(R_EDI);
554 }
555 break;
556 case MO_16:
557 tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_EDI]);
558 gen_op_addl_A0_seg(s, R_ES);
559 break;
560 default:
561 tcg_abort();
562 }
563 }
564
565 static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
566 {
567 tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, df));
568 tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
569 };
570
571 static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
572 {
573 switch (size) {
574 case MO_8:
575 if (sign) {
576 tcg_gen_ext8s_tl(dst, src);
577 } else {
578 tcg_gen_ext8u_tl(dst, src);
579 }
580 return dst;
581 case MO_16:
582 if (sign) {
583 tcg_gen_ext16s_tl(dst, src);
584 } else {
585 tcg_gen_ext16u_tl(dst, src);
586 }
587 return dst;
588 #ifdef TARGET_X86_64
589 case MO_32:
590 if (sign) {
591 tcg_gen_ext32s_tl(dst, src);
592 } else {
593 tcg_gen_ext32u_tl(dst, src);
594 }
595 return dst;
596 #endif
597 default:
598 return src;
599 }
600 }
601
602 static void gen_extu(TCGMemOp ot, TCGv reg)
603 {
604 gen_ext_tl(reg, reg, ot, false);
605 }
606
607 static void gen_exts(TCGMemOp ot, TCGv reg)
608 {
609 gen_ext_tl(reg, reg, ot, true);
610 }
611
612 static inline void gen_op_jnz_ecx(TCGMemOp size, int label1)
613 {
614 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
615 gen_extu(size, cpu_tmp0);
616 tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
617 }
618
619 static inline void gen_op_jz_ecx(TCGMemOp size, int label1)
620 {
621 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
622 gen_extu(size, cpu_tmp0);
623 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
624 }
625
626 static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
627 {
628 switch (ot) {
629 case MO_8:
630 gen_helper_inb(v, n);
631 break;
632 case MO_16:
633 gen_helper_inw(v, n);
634 break;
635 case MO_32:
636 gen_helper_inl(v, n);
637 break;
638 default:
639 tcg_abort();
640 }
641 }
642
643 static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
644 {
645 switch (ot) {
646 case MO_8:
647 gen_helper_outb(v, n);
648 break;
649 case MO_16:
650 gen_helper_outw(v, n);
651 break;
652 case MO_32:
653 gen_helper_outl(v, n);
654 break;
655 default:
656 tcg_abort();
657 }
658 }
659
660 static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
661 uint32_t svm_flags)
662 {
663 int state_saved;
664 target_ulong next_eip;
665
666 state_saved = 0;
667 if (s->pe && (s->cpl > s->iopl || s->vm86)) {
668 gen_update_cc_op(s);
669 gen_jmp_im(cur_eip);
670 state_saved = 1;
671 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
672 switch (ot) {
673 case MO_8:
674 gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
675 break;
676 case MO_16:
677 gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
678 break;
679 case MO_32:
680 gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
681 break;
682 default:
683 tcg_abort();
684 }
685 }
686 if(s->flags & HF_SVMI_MASK) {
687 if (!state_saved) {
688 gen_update_cc_op(s);
689 gen_jmp_im(cur_eip);
690 }
691 svm_flags |= (1 << (4 + ot));
692 next_eip = s->pc - s->cs_base;
693 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
694 gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
695 tcg_const_i32(svm_flags),
696 tcg_const_i32(next_eip - cur_eip));
697 }
698 }
699
700 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
701 {
702 gen_string_movl_A0_ESI(s);
703 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
704 gen_string_movl_A0_EDI(s);
705 gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
706 gen_op_movl_T0_Dshift(ot);
707 gen_op_add_reg_T0(s->aflag, R_ESI);
708 gen_op_add_reg_T0(s->aflag, R_EDI);
709 }
710
711 static void gen_op_update1_cc(void)
712 {
713 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
714 }
715
716 static void gen_op_update2_cc(void)
717 {
718 tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
719 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
720 }
721
722 static void gen_op_update3_cc(TCGv reg)
723 {
724 tcg_gen_mov_tl(cpu_cc_src2, reg);
725 tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
726 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
727 }
728
729 static inline void gen_op_testl_T0_T1_cc(void)
730 {
731 tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
732 }
733
734 static void gen_op_update_neg_cc(void)
735 {
736 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
737 tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
738 tcg_gen_movi_tl(cpu_cc_srcT, 0);
739 }
740
741 /* compute all eflags to cc_src */
742 static void gen_compute_eflags(DisasContext *s)
743 {
744 TCGv zero, dst, src1, src2;
745 int live, dead;
746
747 if (s->cc_op == CC_OP_EFLAGS) {
748 return;
749 }
750 if (s->cc_op == CC_OP_CLR) {
751 tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
752 set_cc_op(s, CC_OP_EFLAGS);
753 return;
754 }
755
756 TCGV_UNUSED(zero);
757 dst = cpu_cc_dst;
758 src1 = cpu_cc_src;
759 src2 = cpu_cc_src2;
760
761 /* Take care to not read values that are not live. */
762 live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
763 dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
764 if (dead) {
765 zero = tcg_const_tl(0);
766 if (dead & USES_CC_DST) {
767 dst = zero;
768 }
769 if (dead & USES_CC_SRC) {
770 src1 = zero;
771 }
772 if (dead & USES_CC_SRC2) {
773 src2 = zero;
774 }
775 }
776
777 gen_update_cc_op(s);
778 gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
779 set_cc_op(s, CC_OP_EFLAGS);
780
781 if (dead) {
782 tcg_temp_free(zero);
783 }
784 }
785
786 typedef struct CCPrepare {
787 TCGCond cond;
788 TCGv reg;
789 TCGv reg2;
790 target_ulong imm;
791 target_ulong mask;
792 bool use_reg2;
793 bool no_setcond;
794 } CCPrepare;
795
796 /* compute eflags.C to reg */
797 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
798 {
799 TCGv t0, t1;
800 int size, shift;
801
802 switch (s->cc_op) {
803 case CC_OP_SUBB ... CC_OP_SUBQ:
804 /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
805 size = s->cc_op - CC_OP_SUBB;
806 t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
807 /* If no temporary was used, be careful not to alias t1 and t0. */
808 t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
809 tcg_gen_mov_tl(t0, cpu_cc_srcT);
810 gen_extu(size, t0);
811 goto add_sub;
812
813 case CC_OP_ADDB ... CC_OP_ADDQ:
814 /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
815 size = s->cc_op - CC_OP_ADDB;
816 t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
817 t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
818 add_sub:
819 return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
820 .reg2 = t1, .mask = -1, .use_reg2 = true };
821
822 case CC_OP_LOGICB ... CC_OP_LOGICQ:
823 case CC_OP_CLR:
824 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
825
826 case CC_OP_INCB ... CC_OP_INCQ:
827 case CC_OP_DECB ... CC_OP_DECQ:
828 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
829 .mask = -1, .no_setcond = true };
830
831 case CC_OP_SHLB ... CC_OP_SHLQ:
832 /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
833 size = s->cc_op - CC_OP_SHLB;
834 shift = (8 << size) - 1;
835 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
836 .mask = (target_ulong)1 << shift };
837
838 case CC_OP_MULB ... CC_OP_MULQ:
839 return (CCPrepare) { .cond = TCG_COND_NE,
840 .reg = cpu_cc_src, .mask = -1 };
841
842 case CC_OP_BMILGB ... CC_OP_BMILGQ:
843 size = s->cc_op - CC_OP_BMILGB;
844 t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
845 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
846
847 case CC_OP_ADCX:
848 case CC_OP_ADCOX:
849 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
850 .mask = -1, .no_setcond = true };
851
852 case CC_OP_EFLAGS:
853 case CC_OP_SARB ... CC_OP_SARQ:
854 /* CC_SRC & 1 */
855 return (CCPrepare) { .cond = TCG_COND_NE,
856 .reg = cpu_cc_src, .mask = CC_C };
857
858 default:
859 /* The need to compute only C from CC_OP_DYNAMIC is important
860 in efficiently implementing e.g. INC at the start of a TB. */
861 gen_update_cc_op(s);
862 gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
863 cpu_cc_src2, cpu_cc_op);
864 return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
865 .mask = -1, .no_setcond = true };
866 }
867 }
868
869 /* compute eflags.P to reg */
870 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
871 {
872 gen_compute_eflags(s);
873 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
874 .mask = CC_P };
875 }
876
877 /* compute eflags.S to reg */
878 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
879 {
880 switch (s->cc_op) {
881 case CC_OP_DYNAMIC:
882 gen_compute_eflags(s);
883 /* FALLTHRU */
884 case CC_OP_EFLAGS:
885 case CC_OP_ADCX:
886 case CC_OP_ADOX:
887 case CC_OP_ADCOX:
888 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
889 .mask = CC_S };
890 case CC_OP_CLR:
891 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
892 default:
893 {
894 TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
895 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
896 return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
897 }
898 }
899 }
900
901 /* compute eflags.O to reg */
902 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
903 {
904 switch (s->cc_op) {
905 case CC_OP_ADOX:
906 case CC_OP_ADCOX:
907 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
908 .mask = -1, .no_setcond = true };
909 case CC_OP_CLR:
910 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
911 default:
912 gen_compute_eflags(s);
913 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
914 .mask = CC_O };
915 }
916 }
917
918 /* compute eflags.Z to reg */
919 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
920 {
921 switch (s->cc_op) {
922 case CC_OP_DYNAMIC:
923 gen_compute_eflags(s);
924 /* FALLTHRU */
925 case CC_OP_EFLAGS:
926 case CC_OP_ADCX:
927 case CC_OP_ADOX:
928 case CC_OP_ADCOX:
929 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
930 .mask = CC_Z };
931 case CC_OP_CLR:
932 return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
933 default:
934 {
935 TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
936 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
937 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
938 }
939 }
940 }
941
942 /* perform a conditional store into register 'reg' according to jump opcode
943 value 'b'. In the fast case, T0 is guaranted not to be used. */
944 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
945 {
946 int inv, jcc_op, cond;
947 TCGMemOp size;
948 CCPrepare cc;
949 TCGv t0;
950
951 inv = b & 1;
952 jcc_op = (b >> 1) & 7;
953
954 switch (s->cc_op) {
955 case CC_OP_SUBB ... CC_OP_SUBQ:
956 /* We optimize relational operators for the cmp/jcc case. */
957 size = s->cc_op - CC_OP_SUBB;
958 switch (jcc_op) {
959 case JCC_BE:
960 tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
961 gen_extu(size, cpu_tmp4);
962 t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
963 cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
964 .reg2 = t0, .mask = -1, .use_reg2 = true };
965 break;
966
967 case JCC_L:
968 cond = TCG_COND_LT;
969 goto fast_jcc_l;
970 case JCC_LE:
971 cond = TCG_COND_LE;
972 fast_jcc_l:
973 tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
974 gen_exts(size, cpu_tmp4);
975 t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
976 cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
977 .reg2 = t0, .mask = -1, .use_reg2 = true };
978 break;
979
980 default:
981 goto slow_jcc;
982 }
983 break;
984
985 default:
986 slow_jcc:
987 /* This actually generates good code for JC, JZ and JS. */
988 switch (jcc_op) {
989 case JCC_O:
990 cc = gen_prepare_eflags_o(s, reg);
991 break;
992 case JCC_B:
993 cc = gen_prepare_eflags_c(s, reg);
994 break;
995 case JCC_Z:
996 cc = gen_prepare_eflags_z(s, reg);
997 break;
998 case JCC_BE:
999 gen_compute_eflags(s);
1000 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1001 .mask = CC_Z | CC_C };
1002 break;
1003 case JCC_S:
1004 cc = gen_prepare_eflags_s(s, reg);
1005 break;
1006 case JCC_P:
1007 cc = gen_prepare_eflags_p(s, reg);
1008 break;
1009 case JCC_L:
1010 gen_compute_eflags(s);
1011 if (TCGV_EQUAL(reg, cpu_cc_src)) {
1012 reg = cpu_tmp0;
1013 }
1014 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1015 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1016 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1017 .mask = CC_S };
1018 break;
1019 default:
1020 case JCC_LE:
1021 gen_compute_eflags(s);
1022 if (TCGV_EQUAL(reg, cpu_cc_src)) {
1023 reg = cpu_tmp0;
1024 }
1025 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1026 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1027 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1028 .mask = CC_S | CC_Z };
1029 break;
1030 }
1031 break;
1032 }
1033
1034 if (inv) {
1035 cc.cond = tcg_invert_cond(cc.cond);
1036 }
1037 return cc;
1038 }
1039
1040 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1041 {
1042 CCPrepare cc = gen_prepare_cc(s, b, reg);
1043
1044 if (cc.no_setcond) {
1045 if (cc.cond == TCG_COND_EQ) {
1046 tcg_gen_xori_tl(reg, cc.reg, 1);
1047 } else {
1048 tcg_gen_mov_tl(reg, cc.reg);
1049 }
1050 return;
1051 }
1052
1053 if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1054 cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1055 tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1056 tcg_gen_andi_tl(reg, reg, 1);
1057 return;
1058 }
1059 if (cc.mask != -1) {
1060 tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1061 cc.reg = reg;
1062 }
1063 if (cc.use_reg2) {
1064 tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1065 } else {
1066 tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1067 }
1068 }
1069
1070 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1071 {
1072 gen_setcc1(s, JCC_B << 1, reg);
1073 }
1074
1075 /* generate a conditional jump to label 'l1' according to jump opcode
1076 value 'b'. In the fast case, T0 is guaranted not to be used. */
1077 static inline void gen_jcc1_noeob(DisasContext *s, int b, int l1)
1078 {
1079 CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
1080
1081 if (cc.mask != -1) {
1082 tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
1083 cc.reg = cpu_T[0];
1084 }
1085 if (cc.use_reg2) {
1086 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1087 } else {
1088 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1089 }
1090 }
1091
1092 /* Generate a conditional jump to label 'l1' according to jump opcode
1093 value 'b'. In the fast case, T0 is guaranted not to be used.
1094 A translation block must end soon. */
1095 static inline void gen_jcc1(DisasContext *s, int b, int l1)
1096 {
1097 CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
1098
1099 gen_update_cc_op(s);
1100 if (cc.mask != -1) {
1101 tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
1102 cc.reg = cpu_T[0];
1103 }
1104 set_cc_op(s, CC_OP_DYNAMIC);
1105 if (cc.use_reg2) {
1106 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1107 } else {
1108 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1109 }
1110 }
1111
1112 /* XXX: does not work with gdbstub "ice" single step - not a
1113 serious problem */
1114 static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1115 {
1116 int l1, l2;
1117
1118 l1 = gen_new_label();
1119 l2 = gen_new_label();
1120 gen_op_jnz_ecx(s->aflag, l1);
1121 gen_set_label(l2);
1122 gen_jmp_tb(s, next_eip, 1);
1123 gen_set_label(l1);
1124 return l2;
1125 }
1126
1127 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1128 {
1129 gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
1130 gen_string_movl_A0_EDI(s);
1131 gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
1132 gen_op_movl_T0_Dshift(ot);
1133 gen_op_add_reg_T0(s->aflag, R_EDI);
1134 }
1135
1136 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1137 {
1138 gen_string_movl_A0_ESI(s);
1139 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1140 gen_op_mov_reg_v(ot, R_EAX, cpu_T[0]);
1141 gen_op_movl_T0_Dshift(ot);
1142 gen_op_add_reg_T0(s->aflag, R_ESI);
1143 }
1144
1145 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1146 {
1147 gen_string_movl_A0_EDI(s);
1148 gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
1149 gen_op(s, OP_CMPL, ot, R_EAX);
1150 gen_op_movl_T0_Dshift(ot);
1151 gen_op_add_reg_T0(s->aflag, R_EDI);
1152 }
1153
1154 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1155 {
1156 gen_string_movl_A0_EDI(s);
1157 gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
1158 gen_string_movl_A0_ESI(s);
1159 gen_op(s, OP_CMPL, ot, OR_TMP0);
1160 gen_op_movl_T0_Dshift(ot);
1161 gen_op_add_reg_T0(s->aflag, R_ESI);
1162 gen_op_add_reg_T0(s->aflag, R_EDI);
1163 }
1164
1165 static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1166 {
1167 if (use_icount)
1168 gen_io_start();
1169 gen_string_movl_A0_EDI(s);
1170 /* Note: we must do this dummy write first to be restartable in
1171 case of page fault. */
1172 tcg_gen_movi_tl(cpu_T[0], 0);
1173 gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
1174 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1175 tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1176 gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32);
1177 gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
1178 gen_op_movl_T0_Dshift(ot);
1179 gen_op_add_reg_T0(s->aflag, R_EDI);
1180 if (use_icount)
1181 gen_io_end();
1182 }
1183
1184 static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1185 {
1186 if (use_icount)
1187 gen_io_start();
1188 gen_string_movl_A0_ESI(s);
1189 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1190
1191 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1192 tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1193 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
1194 gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1195
1196 gen_op_movl_T0_Dshift(ot);
1197 gen_op_add_reg_T0(s->aflag, R_ESI);
1198 if (use_icount)
1199 gen_io_end();
1200 }
1201
1202 /* same method as Valgrind : we generate jumps to current or next
1203 instruction */
1204 #define GEN_REPZ(op) \
1205 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1206 target_ulong cur_eip, target_ulong next_eip) \
1207 { \
1208 int l2;\
1209 gen_update_cc_op(s); \
1210 l2 = gen_jz_ecx_string(s, next_eip); \
1211 gen_ ## op(s, ot); \
1212 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1213 /* a loop would cause two single step exceptions if ECX = 1 \
1214 before rep string_insn */ \
1215 if (!s->jmp_opt) \
1216 gen_op_jz_ecx(s->aflag, l2); \
1217 gen_jmp(s, cur_eip); \
1218 }
1219
1220 #define GEN_REPZ2(op) \
1221 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1222 target_ulong cur_eip, \
1223 target_ulong next_eip, \
1224 int nz) \
1225 { \
1226 int l2;\
1227 gen_update_cc_op(s); \
1228 l2 = gen_jz_ecx_string(s, next_eip); \
1229 gen_ ## op(s, ot); \
1230 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1231 gen_update_cc_op(s); \
1232 gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \
1233 if (!s->jmp_opt) \
1234 gen_op_jz_ecx(s->aflag, l2); \
1235 gen_jmp(s, cur_eip); \
1236 }
1237
1238 GEN_REPZ(movs)
1239 GEN_REPZ(stos)
1240 GEN_REPZ(lods)
1241 GEN_REPZ(ins)
1242 GEN_REPZ(outs)
1243 GEN_REPZ2(scas)
1244 GEN_REPZ2(cmps)
1245
1246 static void gen_helper_fp_arith_ST0_FT0(int op)
1247 {
1248 switch (op) {
1249 case 0:
1250 gen_helper_fadd_ST0_FT0(cpu_env);
1251 break;
1252 case 1:
1253 gen_helper_fmul_ST0_FT0(cpu_env);
1254 break;
1255 case 2:
1256 gen_helper_fcom_ST0_FT0(cpu_env);
1257 break;
1258 case 3:
1259 gen_helper_fcom_ST0_FT0(cpu_env);
1260 break;
1261 case 4:
1262 gen_helper_fsub_ST0_FT0(cpu_env);
1263 break;
1264 case 5:
1265 gen_helper_fsubr_ST0_FT0(cpu_env);
1266 break;
1267 case 6:
1268 gen_helper_fdiv_ST0_FT0(cpu_env);
1269 break;
1270 case 7:
1271 gen_helper_fdivr_ST0_FT0(cpu_env);
1272 break;
1273 }
1274 }
1275
1276 /* NOTE the exception in "r" op ordering */
1277 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1278 {
1279 TCGv_i32 tmp = tcg_const_i32(opreg);
1280 switch (op) {
1281 case 0:
1282 gen_helper_fadd_STN_ST0(cpu_env, tmp);
1283 break;
1284 case 1:
1285 gen_helper_fmul_STN_ST0(cpu_env, tmp);
1286 break;
1287 case 4:
1288 gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1289 break;
1290 case 5:
1291 gen_helper_fsub_STN_ST0(cpu_env, tmp);
1292 break;
1293 case 6:
1294 gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1295 break;
1296 case 7:
1297 gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1298 break;
1299 }
1300 }
1301
1302 /* if d == OR_TMP0, it means memory operand (address in A0) */
1303 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1304 {
1305 if (d != OR_TMP0) {
1306 gen_op_mov_v_reg(ot, cpu_T[0], d);
1307 } else {
1308 gen_op_ld_v(s1, ot, cpu_T[0], cpu_A0);
1309 }
1310 switch(op) {
1311 case OP_ADCL:
1312 gen_compute_eflags_c(s1, cpu_tmp4);
1313 tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1314 tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1315 gen_op_st_rm_T0_A0(s1, ot, d);
1316 gen_op_update3_cc(cpu_tmp4);
1317 set_cc_op(s1, CC_OP_ADCB + ot);
1318 break;
1319 case OP_SBBL:
1320 gen_compute_eflags_c(s1, cpu_tmp4);
1321 tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1322 tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1323 gen_op_st_rm_T0_A0(s1, ot, d);
1324 gen_op_update3_cc(cpu_tmp4);
1325 set_cc_op(s1, CC_OP_SBBB + ot);
1326 break;
1327 case OP_ADDL:
1328 tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1329 gen_op_st_rm_T0_A0(s1, ot, d);
1330 gen_op_update2_cc();
1331 set_cc_op(s1, CC_OP_ADDB + ot);
1332 break;
1333 case OP_SUBL:
1334 tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
1335 tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1336 gen_op_st_rm_T0_A0(s1, ot, d);
1337 gen_op_update2_cc();
1338 set_cc_op(s1, CC_OP_SUBB + ot);
1339 break;
1340 default:
1341 case OP_ANDL:
1342 tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1343 gen_op_st_rm_T0_A0(s1, ot, d);
1344 gen_op_update1_cc();
1345 set_cc_op(s1, CC_OP_LOGICB + ot);
1346 break;
1347 case OP_ORL:
1348 tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1349 gen_op_st_rm_T0_A0(s1, ot, d);
1350 gen_op_update1_cc();
1351 set_cc_op(s1, CC_OP_LOGICB + ot);
1352 break;
1353 case OP_XORL:
1354 tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1355 gen_op_st_rm_T0_A0(s1, ot, d);
1356 gen_op_update1_cc();
1357 set_cc_op(s1, CC_OP_LOGICB + ot);
1358 break;
1359 case OP_CMPL:
1360 tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1361 tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
1362 tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
1363 set_cc_op(s1, CC_OP_SUBB + ot);
1364 break;
1365 }
1366 }
1367
1368 /* if d == OR_TMP0, it means memory operand (address in A0) */
1369 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1370 {
1371 if (d != OR_TMP0) {
1372 gen_op_mov_v_reg(ot, cpu_T[0], d);
1373 } else {
1374 gen_op_ld_v(s1, ot, cpu_T[0], cpu_A0);
1375 }
1376 gen_compute_eflags_c(s1, cpu_cc_src);
1377 if (c > 0) {
1378 tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
1379 set_cc_op(s1, CC_OP_INCB + ot);
1380 } else {
1381 tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
1382 set_cc_op(s1, CC_OP_DECB + ot);
1383 }
1384 gen_op_st_rm_T0_A0(s1, ot, d);
1385 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1386 }
1387
1388 static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1389 TCGv shm1, TCGv count, bool is_right)
1390 {
1391 TCGv_i32 z32, s32, oldop;
1392 TCGv z_tl;
1393
1394 /* Store the results into the CC variables. If we know that the
1395 variable must be dead, store unconditionally. Otherwise we'll
1396 need to not disrupt the current contents. */
1397 z_tl = tcg_const_tl(0);
1398 if (cc_op_live[s->cc_op] & USES_CC_DST) {
1399 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1400 result, cpu_cc_dst);
1401 } else {
1402 tcg_gen_mov_tl(cpu_cc_dst, result);
1403 }
1404 if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1405 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1406 shm1, cpu_cc_src);
1407 } else {
1408 tcg_gen_mov_tl(cpu_cc_src, shm1);
1409 }
1410 tcg_temp_free(z_tl);
1411
1412 /* Get the two potential CC_OP values into temporaries. */
1413 tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1414 if (s->cc_op == CC_OP_DYNAMIC) {
1415 oldop = cpu_cc_op;
1416 } else {
1417 tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1418 oldop = cpu_tmp3_i32;
1419 }
1420
1421 /* Conditionally store the CC_OP value. */
1422 z32 = tcg_const_i32(0);
1423 s32 = tcg_temp_new_i32();
1424 tcg_gen_trunc_tl_i32(s32, count);
1425 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1426 tcg_temp_free_i32(z32);
1427 tcg_temp_free_i32(s32);
1428
1429 /* The CC_OP value is no longer predictable. */
1430 set_cc_op(s, CC_OP_DYNAMIC);
1431 }
1432
1433 static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1434 int is_right, int is_arith)
1435 {
1436 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1437
1438 /* load */
1439 if (op1 == OR_TMP0) {
1440 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1441 } else {
1442 gen_op_mov_v_reg(ot, cpu_T[0], op1);
1443 }
1444
1445 tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
1446 tcg_gen_subi_tl(cpu_tmp0, cpu_T[1], 1);
1447
1448 if (is_right) {
1449 if (is_arith) {
1450 gen_exts(ot, cpu_T[0]);
1451 tcg_gen_sar_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1452 tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1453 } else {
1454 gen_extu(ot, cpu_T[0]);
1455 tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1456 tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1457 }
1458 } else {
1459 tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1460 tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1461 }
1462
1463 /* store */
1464 gen_op_st_rm_T0_A0(s, ot, op1);
1465
1466 gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right);
1467 }
1468
1469 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1470 int is_right, int is_arith)
1471 {
1472 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1473
1474 /* load */
1475 if (op1 == OR_TMP0)
1476 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1477 else
1478 gen_op_mov_v_reg(ot, cpu_T[0], op1);
1479
1480 op2 &= mask;
1481 if (op2 != 0) {
1482 if (is_right) {
1483 if (is_arith) {
1484 gen_exts(ot, cpu_T[0]);
1485 tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1486 tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
1487 } else {
1488 gen_extu(ot, cpu_T[0]);
1489 tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1490 tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
1491 }
1492 } else {
1493 tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1494 tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
1495 }
1496 }
1497
1498 /* store */
1499 gen_op_st_rm_T0_A0(s, ot, op1);
1500
1501 /* update eflags if non zero shift */
1502 if (op2 != 0) {
1503 tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1504 tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1505 set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1506 }
1507 }
1508
1509 static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
1510 {
1511 if (arg2 >= 0)
1512 tcg_gen_shli_tl(ret, arg1, arg2);
1513 else
1514 tcg_gen_shri_tl(ret, arg1, -arg2);
1515 }
1516
1517 static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1518 {
1519 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1520 TCGv_i32 t0, t1;
1521
1522 /* load */
1523 if (op1 == OR_TMP0) {
1524 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1525 } else {
1526 gen_op_mov_v_reg(ot, cpu_T[0], op1);
1527 }
1528
1529 tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
1530
1531 switch (ot) {
1532 case MO_8:
1533 /* Replicate the 8-bit input so that a 32-bit rotate works. */
1534 tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
1535 tcg_gen_muli_tl(cpu_T[0], cpu_T[0], 0x01010101);
1536 goto do_long;
1537 case MO_16:
1538 /* Replicate the 16-bit input so that a 32-bit rotate works. */
1539 tcg_gen_deposit_tl(cpu_T[0], cpu_T[0], cpu_T[0], 16, 16);
1540 goto do_long;
1541 do_long:
1542 #ifdef TARGET_X86_64
1543 case MO_32:
1544 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
1545 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
1546 if (is_right) {
1547 tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1548 } else {
1549 tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1550 }
1551 tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
1552 break;
1553 #endif
1554 default:
1555 if (is_right) {
1556 tcg_gen_rotr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1557 } else {
1558 tcg_gen_rotl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1559 }
1560 break;
1561 }
1562
1563 /* store */
1564 gen_op_st_rm_T0_A0(s, ot, op1);
1565
1566 /* We'll need the flags computed into CC_SRC. */
1567 gen_compute_eflags(s);
1568
1569 /* The value that was "rotated out" is now present at the other end
1570 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1571 since we've computed the flags into CC_SRC, these variables are
1572 currently dead. */
1573 if (is_right) {
1574 tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
1575 tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
1576 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1577 } else {
1578 tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
1579 tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
1580 }
1581 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1582 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1583
1584 /* Now conditionally store the new CC_OP value. If the shift count
1585 is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1586 Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1587 exactly as we computed above. */
1588 t0 = tcg_const_i32(0);
1589 t1 = tcg_temp_new_i32();
1590 tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
1591 tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
1592 tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1593 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1594 cpu_tmp2_i32, cpu_tmp3_i32);
1595 tcg_temp_free_i32(t0);
1596 tcg_temp_free_i32(t1);
1597
1598 /* The CC_OP value is no longer predictable. */
1599 set_cc_op(s, CC_OP_DYNAMIC);
1600 }
1601
1602 static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1603 int is_right)
1604 {
1605 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1606 int shift;
1607
1608 /* load */
1609 if (op1 == OR_TMP0) {
1610 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1611 } else {
1612 gen_op_mov_v_reg(ot, cpu_T[0], op1);
1613 }
1614
1615 op2 &= mask;
1616 if (op2 != 0) {
1617 switch (ot) {
1618 #ifdef TARGET_X86_64
1619 case MO_32:
1620 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
1621 if (is_right) {
1622 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1623 } else {
1624 tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1625 }
1626 tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
1627 break;
1628 #endif
1629 default:
1630 if (is_right) {
1631 tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], op2);
1632 } else {
1633 tcg_gen_rotli_tl(cpu_T[0], cpu_T[0], op2);
1634 }
1635 break;
1636 case MO_8:
1637 mask = 7;
1638 goto do_shifts;
1639 case MO_16:
1640 mask = 15;
1641 do_shifts:
1642 shift = op2 & mask;
1643 if (is_right) {
1644 shift = mask + 1 - shift;
1645 }
1646 gen_extu(ot, cpu_T[0]);
1647 tcg_gen_shli_tl(cpu_tmp0, cpu_T[0], shift);
1648 tcg_gen_shri_tl(cpu_T[0], cpu_T[0], mask + 1 - shift);
1649 tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
1650 break;
1651 }
1652 }
1653
1654 /* store */
1655 gen_op_st_rm_T0_A0(s, ot, op1);
1656
1657 if (op2 != 0) {
1658 /* Compute the flags into CC_SRC. */
1659 gen_compute_eflags(s);
1660
1661 /* The value that was "rotated out" is now present at the other end
1662 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1663 since we've computed the flags into CC_SRC, these variables are
1664 currently dead. */
1665 if (is_right) {
1666 tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
1667 tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
1668 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1669 } else {
1670 tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
1671 tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
1672 }
1673 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1674 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1675 set_cc_op(s, CC_OP_ADCOX);
1676 }
1677 }
1678
1679 /* XXX: add faster immediate = 1 case */
1680 static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1681 int is_right)
1682 {
1683 gen_compute_eflags(s);
1684 assert(s->cc_op == CC_OP_EFLAGS);
1685
1686 /* load */
1687 if (op1 == OR_TMP0)
1688 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1689 else
1690 gen_op_mov_v_reg(ot, cpu_T[0], op1);
1691
1692 if (is_right) {
1693 switch (ot) {
1694 case MO_8:
1695 gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1696 break;
1697 case MO_16:
1698 gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1699 break;
1700 case MO_32:
1701 gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1702 break;
1703 #ifdef TARGET_X86_64
1704 case MO_64:
1705 gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1706 break;
1707 #endif
1708 default:
1709 tcg_abort();
1710 }
1711 } else {
1712 switch (ot) {
1713 case MO_8:
1714 gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1715 break;
1716 case MO_16:
1717 gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1718 break;
1719 case MO_32:
1720 gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1721 break;
1722 #ifdef TARGET_X86_64
1723 case MO_64:
1724 gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1725 break;
1726 #endif
1727 default:
1728 tcg_abort();
1729 }
1730 }
1731 /* store */
1732 gen_op_st_rm_T0_A0(s, ot, op1);
1733 }
1734
1735 /* XXX: add faster immediate case */
1736 static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1737 bool is_right, TCGv count_in)
1738 {
1739 target_ulong mask = (ot == MO_64 ? 63 : 31);
1740 TCGv count;
1741
1742 /* load */
1743 if (op1 == OR_TMP0) {
1744 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1745 } else {
1746 gen_op_mov_v_reg(ot, cpu_T[0], op1);
1747 }
1748
1749 count = tcg_temp_new();
1750 tcg_gen_andi_tl(count, count_in, mask);
1751
1752 switch (ot) {
1753 case MO_16:
1754 /* Note: we implement the Intel behaviour for shift count > 16.
1755 This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
1756 portion by constructing it as a 32-bit value. */
1757 if (is_right) {
1758 tcg_gen_deposit_tl(cpu_tmp0, cpu_T[0], cpu_T[1], 16, 16);
1759 tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
1760 tcg_gen_mov_tl(cpu_T[0], cpu_tmp0);
1761 } else {
1762 tcg_gen_deposit_tl(cpu_T[1], cpu_T[0], cpu_T[1], 16, 16);
1763 }
1764 /* FALLTHRU */
1765 #ifdef TARGET_X86_64
1766 case MO_32:
1767 /* Concatenate the two 32-bit values and use a 64-bit shift. */
1768 tcg_gen_subi_tl(cpu_tmp0, count, 1);
1769 if (is_right) {
1770 tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[0], cpu_T[1]);
1771 tcg_gen_shr_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
1772 tcg_gen_shr_i64(cpu_T[0], cpu_T[0], count);
1773 } else {
1774 tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[1], cpu_T[0]);
1775 tcg_gen_shl_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
1776 tcg_gen_shl_i64(cpu_T[0], cpu_T[0], count);
1777 tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1778 tcg_gen_shri_i64(cpu_T[0], cpu_T[0], 32);
1779 }
1780 break;
1781 #endif
1782 default:
1783 tcg_gen_subi_tl(cpu_tmp0, count, 1);
1784 if (is_right) {
1785 tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1786
1787 tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1788 tcg_gen_shr_tl(cpu_T[0], cpu_T[0], count);
1789 tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
1790 } else {
1791 tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1792 if (ot == MO_16) {
1793 /* Only needed if count > 16, for Intel behaviour. */
1794 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1795 tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4);
1796 tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1797 }
1798
1799 tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1800 tcg_gen_shl_tl(cpu_T[0], cpu_T[0], count);
1801 tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
1802 }
1803 tcg_gen_movi_tl(cpu_tmp4, 0);
1804 tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[1], count, cpu_tmp4,
1805 cpu_tmp4, cpu_T[1]);
1806 tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1807 break;
1808 }
1809
1810 /* store */
1811 gen_op_st_rm_T0_A0(s, ot, op1);
1812
1813 gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right);
1814 tcg_temp_free(count);
1815 }
1816
1817 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1818 {
1819 if (s != OR_TMP1)
1820 gen_op_mov_v_reg(ot, cpu_T[1], s);
1821 switch(op) {
1822 case OP_ROL:
1823 gen_rot_rm_T1(s1, ot, d, 0);
1824 break;
1825 case OP_ROR:
1826 gen_rot_rm_T1(s1, ot, d, 1);
1827 break;
1828 case OP_SHL:
1829 case OP_SHL1:
1830 gen_shift_rm_T1(s1, ot, d, 0, 0);
1831 break;
1832 case OP_SHR:
1833 gen_shift_rm_T1(s1, ot, d, 1, 0);
1834 break;
1835 case OP_SAR:
1836 gen_shift_rm_T1(s1, ot, d, 1, 1);
1837 break;
1838 case OP_RCL:
1839 gen_rotc_rm_T1(s1, ot, d, 0);
1840 break;
1841 case OP_RCR:
1842 gen_rotc_rm_T1(s1, ot, d, 1);
1843 break;
1844 }
1845 }
1846
1847 static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1848 {
1849 switch(op) {
1850 case OP_ROL:
1851 gen_rot_rm_im(s1, ot, d, c, 0);
1852 break;
1853 case OP_ROR:
1854 gen_rot_rm_im(s1, ot, d, c, 1);
1855 break;
1856 case OP_SHL:
1857 case OP_SHL1:
1858 gen_shift_rm_im(s1, ot, d, c, 0, 0);
1859 break;
1860 case OP_SHR:
1861 gen_shift_rm_im(s1, ot, d, c, 1, 0);
1862 break;
1863 case OP_SAR:
1864 gen_shift_rm_im(s1, ot, d, c, 1, 1);
1865 break;
1866 default:
1867 /* currently not optimized */
1868 tcg_gen_movi_tl(cpu_T[1], c);
1869 gen_shift(s1, op, ot, d, OR_TMP1);
1870 break;
1871 }
1872 }
1873
1874 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
1875 {
1876 target_long disp;
1877 int havesib;
1878 int base;
1879 int index;
1880 int scale;
1881 int mod, rm, code, override, must_add_seg;
1882 TCGv sum;
1883
1884 override = s->override;
1885 must_add_seg = s->addseg;
1886 if (override >= 0)
1887 must_add_seg = 1;
1888 mod = (modrm >> 6) & 3;
1889 rm = modrm & 7;
1890
1891 switch (s->aflag) {
1892 case MO_64:
1893 case MO_32:
1894 havesib = 0;
1895 base = rm;
1896 index = -1;
1897 scale = 0;
1898
1899 if (base == 4) {
1900 havesib = 1;
1901 code = cpu_ldub_code(env, s->pc++);
1902 scale = (code >> 6) & 3;
1903 index = ((code >> 3) & 7) | REX_X(s);
1904 if (index == 4) {
1905 index = -1; /* no index */
1906 }
1907 base = (code & 7);
1908 }
1909 base |= REX_B(s);
1910
1911 switch (mod) {
1912 case 0:
1913 if ((base & 7) == 5) {
1914 base = -1;
1915 disp = (int32_t)cpu_ldl_code(env, s->pc);
1916 s->pc += 4;
1917 if (CODE64(s) && !havesib) {
1918 disp += s->pc + s->rip_offset;
1919 }
1920 } else {
1921 disp = 0;
1922 }
1923 break;
1924 case 1:
1925 disp = (int8_t)cpu_ldub_code(env, s->pc++);
1926 break;
1927 default:
1928 case 2:
1929 disp = (int32_t)cpu_ldl_code(env, s->pc);
1930 s->pc += 4;
1931 break;
1932 }
1933
1934 /* For correct popl handling with esp. */
1935 if (base == R_ESP && s->popl_esp_hack) {
1936 disp += s->popl_esp_hack;
1937 }
1938
1939 /* Compute the address, with a minimum number of TCG ops. */
1940 TCGV_UNUSED(sum);
1941 if (index >= 0) {
1942 if (scale == 0) {
1943 sum = cpu_regs[index];
1944 } else {
1945 tcg_gen_shli_tl(cpu_A0, cpu_regs[index], scale);
1946 sum = cpu_A0;
1947 }
1948 if (base >= 0) {
1949 tcg_gen_add_tl(cpu_A0, sum, cpu_regs[base]);
1950 sum = cpu_A0;
1951 }
1952 } else if (base >= 0) {
1953 sum = cpu_regs[base];
1954 }
1955 if (TCGV_IS_UNUSED(sum)) {
1956 tcg_gen_movi_tl(cpu_A0, disp);
1957 } else {
1958 tcg_gen_addi_tl(cpu_A0, sum, disp);
1959 }
1960
1961 if (must_add_seg) {
1962 if (override < 0) {
1963 if (base == R_EBP || base == R_ESP) {
1964 override = R_SS;
1965 } else {
1966 override = R_DS;
1967 }
1968 }
1969
1970 tcg_gen_ld_tl(cpu_tmp0, cpu_env,
1971 offsetof(CPUX86State, segs[override].base));
1972 if (CODE64(s)) {
1973 if (s->aflag == MO_32) {
1974 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
1975 }
1976 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
1977 return;
1978 }
1979
1980 tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
1981 }
1982
1983 if (s->aflag == MO_32) {
1984 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
1985 }
1986 break;
1987
1988 case MO_16:
1989 switch (mod) {
1990 case 0:
1991 if (rm == 6) {
1992 disp = cpu_lduw_code(env, s->pc);
1993 s->pc += 2;
1994 tcg_gen_movi_tl(cpu_A0, disp);
1995 rm = 0; /* avoid SS override */
1996 goto no_rm;
1997 } else {
1998 disp = 0;
1999 }
2000 break;
2001 case 1:
2002 disp = (int8_t)cpu_ldub_code(env, s->pc++);
2003 break;
2004 default:
2005 case 2:
2006 disp = (int16_t)cpu_lduw_code(env, s->pc);
2007 s->pc += 2;
2008 break;
2009 }
2010
2011 sum = cpu_A0;
2012 switch (rm) {
2013 case 0:
2014 tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_ESI]);
2015 break;
2016 case 1:
2017 tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_EDI]);
2018 break;
2019 case 2:
2020 tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_ESI]);
2021 break;
2022 case 3:
2023 tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_EDI]);
2024 break;
2025 case 4:
2026 sum = cpu_regs[R_ESI];
2027 break;
2028 case 5:
2029 sum = cpu_regs[R_EDI];
2030 break;
2031 case 6:
2032 sum = cpu_regs[R_EBP];
2033 break;
2034 default:
2035 case 7:
2036 sum = cpu_regs[R_EBX];
2037 break;
2038 }
2039 tcg_gen_addi_tl(cpu_A0, sum, disp);
2040 tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2041 no_rm:
2042 if (must_add_seg) {
2043 if (override < 0) {
2044 if (rm == 2 || rm == 3 || rm == 6) {
2045 override = R_SS;
2046 } else {
2047 override = R_DS;
2048 }
2049 }
2050 gen_op_addl_A0_seg(s, override);
2051 }
2052 break;
2053
2054 default:
2055 tcg_abort();
2056 }
2057 }
2058
2059 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2060 {
2061 int mod, rm, base, code;
2062
2063 mod = (modrm >> 6) & 3;
2064 if (mod == 3)
2065 return;
2066 rm = modrm & 7;
2067
2068 switch (s->aflag) {
2069 case MO_64:
2070 case MO_32:
2071 base = rm;
2072
2073 if (base == 4) {
2074 code = cpu_ldub_code(env, s->pc++);
2075 base = (code & 7);
2076 }
2077
2078 switch (mod) {
2079 case 0:
2080 if (base == 5) {
2081 s->pc += 4;
2082 }
2083 break;
2084 case 1:
2085 s->pc++;
2086 break;
2087 default:
2088 case 2:
2089 s->pc += 4;
2090 break;
2091 }
2092 break;
2093
2094 case MO_16:
2095 switch (mod) {
2096 case 0:
2097 if (rm == 6) {
2098 s->pc += 2;
2099 }
2100 break;
2101 case 1:
2102 s->pc++;
2103 break;
2104 default:
2105 case 2:
2106 s->pc += 2;
2107 break;
2108 }
2109 break;
2110
2111 default:
2112 tcg_abort();
2113 }
2114 }
2115
2116 /* used for LEA and MOV AX, mem */
2117 static void gen_add_A0_ds_seg(DisasContext *s)
2118 {
2119 int override, must_add_seg;
2120 must_add_seg = s->addseg;
2121 override = R_DS;
2122 if (s->override >= 0) {
2123 override = s->override;
2124 must_add_seg = 1;
2125 }
2126 if (must_add_seg) {
2127 #ifdef TARGET_X86_64
2128 if (CODE64(s)) {
2129 gen_op_addq_A0_seg(override);
2130 } else
2131 #endif
2132 {
2133 gen_op_addl_A0_seg(s, override);
2134 }
2135 }
2136 }
2137
2138 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2139 OR_TMP0 */
2140 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2141 TCGMemOp ot, int reg, int is_store)
2142 {
2143 int mod, rm;
2144
2145 mod = (modrm >> 6) & 3;
2146 rm = (modrm & 7) | REX_B(s);
2147 if (mod == 3) {
2148 if (is_store) {
2149 if (reg != OR_TMP0)
2150 gen_op_mov_v_reg(ot, cpu_T[0], reg);
2151 gen_op_mov_reg_v(ot, rm, cpu_T[0]);
2152 } else {
2153 gen_op_mov_v_reg(ot, cpu_T[0], rm);
2154 if (reg != OR_TMP0)
2155 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
2156 }
2157 } else {
2158 gen_lea_modrm(env, s, modrm);
2159 if (is_store) {
2160 if (reg != OR_TMP0)
2161 gen_op_mov_v_reg(ot, cpu_T[0], reg);
2162 gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
2163 } else {
2164 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
2165 if (reg != OR_TMP0)
2166 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
2167 }
2168 }
2169 }
2170
2171 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2172 {
2173 uint32_t ret;
2174
2175 switch (ot) {
2176 case MO_8:
2177 ret = cpu_ldub_code(env, s->pc);
2178 s->pc++;
2179 break;
2180 case MO_16:
2181 ret = cpu_lduw_code(env, s->pc);
2182 s->pc += 2;
2183 break;
2184 case MO_32:
2185 #ifdef TARGET_X86_64
2186 case MO_64:
2187 #endif
2188 ret = cpu_ldl_code(env, s->pc);
2189 s->pc += 4;
2190 break;
2191 default:
2192 tcg_abort();
2193 }
2194 return ret;
2195 }
2196
2197 static inline int insn_const_size(TCGMemOp ot)
2198 {
2199 if (ot <= MO_32) {
2200 return 1 << ot;
2201 } else {
2202 return 4;
2203 }
2204 }
2205
2206 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2207 {
2208 TranslationBlock *tb;
2209 target_ulong pc;
2210
2211 pc = s->cs_base + eip;
2212 tb = s->tb;
2213 /* NOTE: we handle the case where the TB spans two pages here */
2214 if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
2215 (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) {
2216 /* jump to same page: we can use a direct jump */
2217 tcg_gen_goto_tb(tb_num);
2218 gen_jmp_im(eip);
2219 tcg_gen_exit_tb((uintptr_t)tb + tb_num);
2220 } else {
2221 /* jump to another page: currently not optimized */
2222 gen_jmp_im(eip);
2223 gen_eob(s);
2224 }
2225 }
2226
2227 static inline void gen_jcc(DisasContext *s, int b,
2228 target_ulong val, target_ulong next_eip)
2229 {
2230 int l1, l2;
2231
2232 if (s->jmp_opt) {
2233 l1 = gen_new_label();
2234 gen_jcc1(s, b, l1);
2235
2236 gen_goto_tb(s, 0, next_eip);
2237
2238 gen_set_label(l1);
2239 gen_goto_tb(s, 1, val);
2240 s->is_jmp = DISAS_TB_JUMP;
2241 } else {
2242 l1 = gen_new_label();
2243 l2 = gen_new_label();
2244 gen_jcc1(s, b, l1);
2245
2246 gen_jmp_im(next_eip);
2247 tcg_gen_br(l2);
2248
2249 gen_set_label(l1);
2250 gen_jmp_im(val);
2251 gen_set_label(l2);
2252 gen_eob(s);
2253 }
2254 }
2255
2256 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2257 int modrm, int reg)
2258 {
2259 CCPrepare cc;
2260
2261 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2262
2263 cc = gen_prepare_cc(s, b, cpu_T[1]);
2264 if (cc.mask != -1) {
2265 TCGv t0 = tcg_temp_new();
2266 tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2267 cc.reg = t0;
2268 }
2269 if (!cc.use_reg2) {
2270 cc.reg2 = tcg_const_tl(cc.imm);
2271 }
2272
2273 tcg_gen_movcond_tl(cc.cond, cpu_T[0], cc.reg, cc.reg2,
2274 cpu_T[0], cpu_regs[reg]);
2275 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
2276
2277 if (cc.mask != -1) {
2278 tcg_temp_free(cc.reg);
2279 }
2280 if (!cc.use_reg2) {
2281 tcg_temp_free(cc.reg2);
2282 }
2283 }
2284
2285 static inline void gen_op_movl_T0_seg(int seg_reg)
2286 {
2287 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
2288 offsetof(CPUX86State,segs[seg_reg].selector));
2289 }
2290
2291 static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2292 {
2293 tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
2294 tcg_gen_st32_tl(cpu_T[0], cpu_env,
2295 offsetof(CPUX86State,segs[seg_reg].selector));
2296 tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
2297 tcg_gen_st_tl(cpu_T[0], cpu_env,
2298 offsetof(CPUX86State,segs[seg_reg].base));
2299 }
2300
2301 /* move T0 to seg_reg and compute if the CPU state may change. Never
2302 call this function with seg_reg == R_CS */
2303 static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
2304 {
2305 if (s->pe && !s->vm86) {
2306 /* XXX: optimize by finding processor state dynamically */
2307 gen_update_cc_op(s);
2308 gen_jmp_im(cur_eip);
2309 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
2310 gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2311 /* abort translation because the addseg value may change or
2312 because ss32 may change. For R_SS, translation must always
2313 stop as a special handling must be done to disable hardware
2314 interrupts for the next instruction */
2315 if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2316 s->is_jmp = DISAS_TB_JUMP;
2317 } else {
2318 gen_op_movl_seg_T0_vm(seg_reg);
2319 if (seg_reg == R_SS)
2320 s->is_jmp = DISAS_TB_JUMP;
2321 }
2322 }
2323
2324 static inline int svm_is_rep(int prefixes)
2325 {
2326 return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2327 }
2328
2329 static inline void
2330 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2331 uint32_t type, uint64_t param)
2332 {
2333 /* no SVM activated; fast case */
2334 if (likely(!(s->flags & HF_SVMI_MASK)))
2335 return;
2336 gen_update_cc_op(s);
2337 gen_jmp_im(pc_start - s->cs_base);
2338 gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2339 tcg_const_i64(param));
2340 }
2341
2342 static inline void
2343 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2344 {
2345 gen_svm_check_intercept_param(s, pc_start, type, 0);
2346 }
2347
2348 static inline void gen_stack_update(DisasContext *s, int addend)
2349 {
2350 #ifdef TARGET_X86_64
2351 if (CODE64(s)) {
2352 gen_op_add_reg_im(MO_64, R_ESP, addend);
2353 } else
2354 #endif
2355 if (s->ss32) {
2356 gen_op_add_reg_im(MO_32, R_ESP, addend);
2357 } else {
2358 gen_op_add_reg_im(MO_16, R_ESP, addend);
2359 }
2360 }
2361
2362 /* Generate a push. It depends on ss32, addseg and dflag. */
2363 static void gen_push_v(DisasContext *s, TCGv val)
2364 {
2365 TCGMemOp a_ot, d_ot = mo_pushpop(s, s->dflag);
2366 int size = 1 << d_ot;
2367 TCGv new_esp = cpu_A0;
2368
2369 tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2370
2371 if (CODE64(s)) {
2372 a_ot = MO_64;
2373 } else if (s->ss32) {
2374 a_ot = MO_32;
2375 if (s->addseg) {
2376 new_esp = cpu_tmp4;
2377 tcg_gen_mov_tl(new_esp, cpu_A0);
2378 gen_op_addl_A0_seg(s, R_SS);
2379 } else {
2380 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
2381 }
2382 } else {
2383 a_ot = MO_16;
2384 new_esp = cpu_tmp4;
2385 tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2386 tcg_gen_mov_tl(new_esp, cpu_A0);
2387 gen_op_addl_A0_seg(s, R_SS);
2388 }
2389
2390 gen_op_st_v(s, d_ot, val, cpu_A0);
2391 gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2392 }
2393
2394 /* two step pop is necessary for precise exceptions */
2395 static TCGMemOp gen_pop_T0(DisasContext *s)
2396 {
2397 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2398 TCGv addr = cpu_A0;
2399
2400 if (CODE64(s)) {
2401 addr = cpu_regs[R_ESP];
2402 } else if (!s->ss32) {
2403 tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_ESP]);
2404 gen_op_addl_A0_seg(s, R_SS);
2405 } else if (s->addseg) {
2406 tcg_gen_mov_tl(cpu_A0, cpu_regs[R_ESP]);
2407 gen_op_addl_A0_seg(s, R_SS);
2408 } else {
2409 tcg_gen_ext32u_tl(cpu_A0, cpu_regs[R_ESP]);
2410 }
2411
2412 gen_op_ld_v(s, d_ot, cpu_T[0], addr);
2413 return d_ot;
2414 }
2415
2416 static void gen_pop_update(DisasContext *s, TCGMemOp ot)
2417 {
2418 gen_stack_update(s, 1 << ot);
2419 }
2420
2421 static void gen_stack_A0(DisasContext *s)
2422 {
2423 gen_op_movl_A0_reg(R_ESP);
2424 if (!s->ss32)
2425 tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2426 tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2427 if (s->addseg)
2428 gen_op_addl_A0_seg(s, R_SS);
2429 }
2430
2431 /* NOTE: wrap around in 16 bit not fully handled */
2432 static void gen_pusha(DisasContext *s)
2433 {
2434 int i;
2435 gen_op_movl_A0_reg(R_ESP);
2436 gen_op_addl_A0_im(-8 << s->dflag);
2437 if (!s->ss32)
2438 tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2439 tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2440 if (s->addseg)
2441 gen_op_addl_A0_seg(s, R_SS);
2442 for(i = 0;i < 8; i++) {
2443 gen_op_mov_v_reg(MO_32, cpu_T[0], 7 - i);
2444 gen_op_st_v(s, s->dflag, cpu_T[0], cpu_A0);
2445 gen_op_addl_A0_im(1 << s->dflag);
2446 }
2447 gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
2448 }
2449
2450 /* NOTE: wrap around in 16 bit not fully handled */
2451 static void gen_popa(DisasContext *s)
2452 {
2453 int i;
2454 gen_op_movl_A0_reg(R_ESP);
2455 if (!s->ss32)
2456 tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2457 tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2458 tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 8 << s->dflag);
2459 if (s->addseg)
2460 gen_op_addl_A0_seg(s, R_SS);
2461 for(i = 0;i < 8; i++) {
2462 /* ESP is not reloaded */
2463 if (i != 3) {
2464 gen_op_ld_v(s, s->dflag, cpu_T[0], cpu_A0);
2465 gen_op_mov_reg_v(s->dflag, 7 - i, cpu_T[0]);
2466 }
2467 gen_op_addl_A0_im(1 << s->dflag);
2468 }
2469 gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
2470 }
2471
2472 static void gen_enter(DisasContext *s, int esp_addend, int level)
2473 {
2474 TCGMemOp ot = mo_pushpop(s, s->dflag);
2475 int opsize = 1 << ot;
2476
2477 level &= 0x1f;
2478 #ifdef TARGET_X86_64
2479 if (CODE64(s)) {
2480 gen_op_movl_A0_reg(R_ESP);
2481 gen_op_addq_A0_im(-opsize);
2482 tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2483
2484 /* push bp */
2485 gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
2486 gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
2487 if (level) {
2488 /* XXX: must save state */
2489 gen_helper_enter64_level(cpu_env, tcg_const_i32(level),
2490 tcg_const_i32((ot == MO_64)),
2491 cpu_T[1]);
2492 }
2493 gen_op_mov_reg_v(ot, R_EBP, cpu_T[1]);
2494 tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2495 gen_op_mov_reg_v(MO_64, R_ESP, cpu_T[1]);
2496 } else
2497 #endif
2498 {
2499 gen_op_movl_A0_reg(R_ESP);
2500 gen_op_addl_A0_im(-opsize);
2501 if (!s->ss32)
2502 tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2503 tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2504 if (s->addseg)
2505 gen_op_addl_A0_seg(s, R_SS);
2506 /* push bp */
2507 gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
2508 gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
2509 if (level) {
2510 /* XXX: must save state */
2511 gen_helper_enter_level(cpu_env, tcg_const_i32(level),
2512 tcg_const_i32(s->dflag - 1),
2513 cpu_T[1]);
2514 }
2515 gen_op_mov_reg_v(ot, R_EBP, cpu_T[1]);
2516 tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2517 gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
2518 }
2519 }
2520
2521 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2522 {
2523 gen_update_cc_op(s);
2524 gen_jmp_im(cur_eip);
2525 gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2526 s->is_jmp = DISAS_TB_JUMP;
2527 }
2528
2529 /* an interrupt is different from an exception because of the
2530 privilege checks */
2531 static void gen_interrupt(DisasContext *s, int intno,
2532 target_ulong cur_eip, target_ulong next_eip)
2533 {
2534 gen_update_cc_op(s);
2535 gen_jmp_im(cur_eip);
2536 gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2537 tcg_const_i32(next_eip - cur_eip));
2538 s->is_jmp = DISAS_TB_JUMP;
2539 }
2540
2541 static void gen_debug(DisasContext *s, target_ulong cur_eip)
2542 {
2543 gen_update_cc_op(s);
2544 gen_jmp_im(cur_eip);
2545 gen_helper_debug(cpu_env);
2546 s->is_jmp = DISAS_TB_JUMP;
2547 }
2548
2549 /* generate a generic end of block. Trace exception is also generated
2550 if needed */
2551 static void gen_eob(DisasContext *s)
2552 {
2553 gen_update_cc_op(s);
2554 if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
2555 gen_helper_reset_inhibit_irq(cpu_env);
2556 }
2557 if (s->tb->flags & HF_RF_MASK) {
2558 gen_helper_reset_rf(cpu_env);
2559 }
2560 if (s->singlestep_enabled) {
2561 gen_helper_debug(cpu_env);
2562 } else if (s->tf) {
2563 gen_helper_single_step(cpu_env);
2564 } else {
2565 tcg_gen_exit_tb(0);
2566 }
2567 s->is_jmp = DISAS_TB_JUMP;
2568 }
2569
2570 /* generate a jump to eip. No segment change must happen before as a
2571 direct call to the next block may occur */
2572 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2573 {
2574 gen_update_cc_op(s);
2575 set_cc_op(s, CC_OP_DYNAMIC);
2576 if (s->jmp_opt) {
2577 gen_goto_tb(s, tb_num, eip);
2578 s->is_jmp = DISAS_TB_JUMP;
2579 } else {
2580 gen_jmp_im(eip);
2581 gen_eob(s);
2582 }
2583 }
2584
2585 static void gen_jmp(DisasContext *s, target_ulong eip)
2586 {
2587 gen_jmp_tb(s, eip, 0);
2588 }
2589
2590 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2591 {
2592 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2593 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2594 }
2595
2596 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2597 {
2598 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2599 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2600 }
2601
2602 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2603 {
2604 int mem_index = s->mem_index;
2605 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2606 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2607 tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2608 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2609 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2610 }
2611
2612 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2613 {
2614 int mem_index = s->mem_index;
2615 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2616 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2617 tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2618 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2619 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2620 }
2621
2622 static inline void gen_op_movo(int d_offset, int s_offset)
2623 {
2624 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2625 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2626 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
2627 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
2628 }
2629
2630 static inline void gen_op_movq(int d_offset, int s_offset)
2631 {
2632 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2633 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2634 }
2635
2636 static inline void gen_op_movl(int d_offset, int s_offset)
2637 {
2638 tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2639 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2640 }
2641
2642 static inline void gen_op_movq_env_0(int d_offset)
2643 {
2644 tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2645 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2646 }
2647
2648 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2649 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2650 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2651 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2652 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2653 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2654 TCGv_i32 val);
2655 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2656 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2657 TCGv val);
2658
2659 #define SSE_SPECIAL ((void *)1)
2660 #define SSE_DUMMY ((void *)2)
2661
2662 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2663 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2664 gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2665
2666 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2667 /* 3DNow! extensions */
2668 [0x0e] = { SSE_DUMMY }, /* femms */
2669 [0x0f] = { SSE_DUMMY }, /* pf... */
2670 /* pure SSE operations */
2671 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2672 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2673 [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2674 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2675 [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2676 [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2677 [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
2678 [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
2679
2680 [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2681 [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2682 [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2683 [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2684 [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2685 [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2686 [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2687 [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2688 [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2689 [0x51] = SSE_FOP(sqrt),
2690 [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2691 [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2692 [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2693 [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2694 [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2695 [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2696 [0x58] = SSE_FOP(add),
2697 [0x59] = SSE_FOP(mul),
2698 [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2699 gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2700 [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2701 [0x5c] = SSE_FOP(sub),
2702 [0x5d] = SSE_FOP(min),
2703 [0x5e] = SSE_FOP(div),
2704 [0x5f] = SSE_FOP(max),
2705
2706 [0xc2] = SSE_FOP(cmpeq),
2707 [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2708 (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2709
2710 /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */
2711 [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2712 [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2713
2714 /* MMX ops and their SSE extensions */
2715 [0x60] = MMX_OP2(punpcklbw),
2716 [0x61] = MMX_OP2(punpcklwd),
2717 [0x62] = MMX_OP2(punpckldq),
2718 [0x63] = MMX_OP2(packsswb),
2719 [0x64] = MMX_OP2(pcmpgtb),
2720 [0x65] = MMX_OP2(pcmpgtw),
2721 [0x66] = MMX_OP2(pcmpgtl),
2722 [0x67] = MMX_OP2(packuswb),
2723 [0x68] = MMX_OP2(punpckhbw),
2724 [0x69] = MMX_OP2(punpckhwd),
2725 [0x6a] = MMX_OP2(punpckhdq),
2726 [0x6b] = MMX_OP2(packssdw),
2727 [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2728 [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2729 [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2730 [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2731 [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2732 (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2733 (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2734 (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2735 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2736 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2737 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2738 [0x74] = MMX_OP2(pcmpeqb),
2739 [0x75] = MMX_OP2(pcmpeqw),
2740 [0x76] = MMX_OP2(pcmpeql),
2741 [0x77] = { SSE_DUMMY }, /* emms */
2742 [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2743 [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2744 [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2745 [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2746 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2747 [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2748 [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2749 [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2750 [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2751 [0xd1] = MMX_OP2(psrlw),
2752 [0xd2] = MMX_OP2(psrld),
2753 [0xd3] = MMX_OP2(psrlq),
2754 [0xd4] = MMX_OP2(paddq),
2755 [0xd5] = MMX_OP2(pmullw),
2756 [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2757 [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2758 [0xd8] = MMX_OP2(psubusb),
2759 [0xd9] = MMX_OP2(psubusw),
2760 [0xda] = MMX_OP2(pminub),
2761 [0xdb] = MMX_OP2(pand),
2762 [0xdc] = MMX_OP2(paddusb),
2763 [0xdd] = MMX_OP2(paddusw),
2764 [0xde] = MMX_OP2(pmaxub),
2765 [0xdf] = MMX_OP2(pandn),
2766 [0xe0] = MMX_OP2(pavgb),
2767 [0xe1] = MMX_OP2(psraw),
2768 [0xe2] = MMX_OP2(psrad),
2769 [0xe3] = MMX_OP2(pavgw),
2770 [0xe4] = MMX_OP2(pmulhuw),
2771 [0xe5] = MMX_OP2(pmulhw),
2772 [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2773 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
2774 [0xe8] = MMX_OP2(psubsb),
2775 [0xe9] = MMX_OP2(psubsw),
2776 [0xea] = MMX_OP2(pminsw),
2777 [0xeb] = MMX_OP2(por),
2778 [0xec] = MMX_OP2(paddsb),
2779 [0xed] = MMX_OP2(paddsw),
2780 [0xee] = MMX_OP2(pmaxsw),
2781 [0xef] = MMX_OP2(pxor),
2782 [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2783 [0xf1] = MMX_OP2(psllw),
2784 [0xf2] = MMX_OP2(pslld),
2785 [0xf3] = MMX_OP2(psllq),
2786 [0xf4] = MMX_OP2(pmuludq),
2787 [0xf5] = MMX_OP2(pmaddwd),
2788 [0xf6] = MMX_OP2(psadbw),
2789 [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2790 (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2791 [0xf8] = MMX_OP2(psubb),
2792 [0xf9] = MMX_OP2(psubw),
2793 [0xfa] = MMX_OP2(psubl),
2794 [0xfb] = MMX_OP2(psubq),
2795 [0xfc] = MMX_OP2(paddb),
2796 [0xfd] = MMX_OP2(paddw),
2797 [0xfe] = MMX_OP2(paddl),
2798 };
2799
2800 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2801 [0 + 2] = MMX_OP2(psrlw),
2802 [0 + 4] = MMX_OP2(psraw),
2803 [0 + 6] = MMX_OP2(psllw),
2804 [8 + 2] = MMX_OP2(psrld),
2805 [8 + 4] = MMX_OP2(psrad),
2806 [8 + 6] = MMX_OP2(pslld),
2807 [16 + 2] = MMX_OP2(psrlq),
2808 [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2809 [16 + 6] = MMX_OP2(psllq),
2810 [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2811 };
2812
2813 static const SSEFunc_0_epi sse_op_table3ai[] = {
2814 gen_helper_cvtsi2ss,
2815 gen_helper_cvtsi2sd
2816 };
2817
2818 #ifdef TARGET_X86_64
2819 static const SSEFunc_0_epl sse_op_table3aq[] = {
2820 gen_helper_cvtsq2ss,
2821 gen_helper_cvtsq2sd
2822 };
2823 #endif
2824
2825 static const SSEFunc_i_ep sse_op_table3bi[] = {
2826 gen_helper_cvttss2si,
2827 gen_helper_cvtss2si,
2828 gen_helper_cvttsd2si,
2829 gen_helper_cvtsd2si
2830 };
2831
2832 #ifdef TARGET_X86_64
2833 static const SSEFunc_l_ep sse_op_table3bq[] = {
2834 gen_helper_cvttss2sq,
2835 gen_helper_cvtss2sq,
2836 gen_helper_cvttsd2sq,
2837 gen_helper_cvtsd2sq
2838 };
2839 #endif
2840
2841 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2842 SSE_FOP(cmpeq),
2843 SSE_FOP(cmplt),
2844 SSE_FOP(cmple),
2845 SSE_FOP(cmpunord),
2846 SSE_FOP(cmpneq),
2847 SSE_FOP(cmpnlt),
2848 SSE_FOP(cmpnle),
2849 SSE_FOP(cmpord),
2850 };
2851
2852 static const SSEFunc_0_epp sse_op_table5[256] = {
2853 [0x0c] = gen_helper_pi2fw,
2854 [0x0d] = gen_helper_pi2fd,
2855 [0x1c] = gen_helper_pf2iw,
2856 [0x1d] = gen_helper_pf2id,
2857 [0x8a] = gen_helper_pfnacc,
2858 [0x8e] = gen_helper_pfpnacc,
2859 [0x90] = gen_helper_pfcmpge,
2860 [0x94] = gen_helper_pfmin,
2861 [0x96] = gen_helper_pfrcp,
2862 [0x97] = gen_helper_pfrsqrt,
2863 [0x9a] = gen_helper_pfsub,
2864 [0x9e] = gen_helper_pfadd,
2865 [0xa0] = gen_helper_pfcmpgt,
2866 [0xa4] = gen_helper_pfmax,
2867 [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2868 [0xa7] = gen_helper_movq, /* pfrsqit1 */
2869 [0xaa] = gen_helper_pfsubr,
2870 [0xae] = gen_helper_pfacc,
2871 [0xb0] = gen_helper_pfcmpeq,
2872 [0xb4] = gen_helper_pfmul,
2873 [0xb6] = gen_helper_movq, /* pfrcpit2 */
2874 [0xb7] = gen_helper_pmulhrw_mmx,
2875 [0xbb] = gen_helper_pswapd,
2876 [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2877 };
2878
2879 struct SSEOpHelper_epp {
2880 SSEFunc_0_epp op[2];
2881 uint32_t ext_mask;
2882 };
2883
2884 struct SSEOpHelper_eppi {
2885 SSEFunc_0_eppi op[2];
2886 uint32_t ext_mask;
2887 };
2888
2889 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2890 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2891 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2892 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2893 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2894 CPUID_EXT_PCLMULQDQ }
2895 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2896
2897 static const struct SSEOpHelper_epp sse_op_table6[256] = {
2898 [0x00] = SSSE3_OP(pshufb),
2899 [0x01] = SSSE3_OP(phaddw),
2900 [0x02] = SSSE3_OP(phaddd),
2901 [0x03] = SSSE3_OP(phaddsw),
2902 [0x04] = SSSE3_OP(pmaddubsw),
2903 [0x05] = SSSE3_OP(phsubw),
2904 [0x06] = SSSE3_OP(phsubd),
2905 [0x07] = SSSE3_OP(phsubsw),
2906 [0x08] = SSSE3_OP(psignb),
2907 [0x09] = SSSE3_OP(psignw),
2908 [0x0a] = SSSE3_OP(psignd),
2909 [0x0b] = SSSE3_OP(pmulhrsw),
2910 [0x10] = SSE41_OP(pblendvb),
2911 [0x14] = SSE41_OP(blendvps),
2912 [0x15] = SSE41_OP(blendvpd),
2913 [0x17] = SSE41_OP(ptest),
2914 [0x1c] = SSSE3_OP(pabsb),
2915 [0x1d] = SSSE3_OP(pabsw),
2916 [0x1e] = SSSE3_OP(pabsd),
2917 [0x20] = SSE41_OP(pmovsxbw),
2918 [0x21] = SSE41_OP(pmovsxbd),
2919 [0x22] = SSE41_OP(pmovsxbq),
2920 [0x23] = SSE41_OP(pmovsxwd),
2921 [0x24] = SSE41_OP(pmovsxwq),
2922 [0x25] = SSE41_OP(pmovsxdq),
2923 [0x28] = SSE41_OP(pmuldq),
2924 [0x29] = SSE41_OP(pcmpeqq),
2925 [0x2a] = SSE41_SPECIAL, /* movntqda */
2926 [0x2b] = SSE41_OP(packusdw),
2927 [0x30] = SSE41_OP(pmovzxbw),
2928 [0x31] = SSE41_OP(pmovzxbd),
2929 [0x32] = SSE41_OP(pmovzxbq),
2930 [0x33] = SSE41_OP(pmovzxwd),
2931 [0x34] = SSE41_OP(pmovzxwq),
2932 [0x35] = SSE41_OP(pmovzxdq),
2933 [0x37] = SSE42_OP(pcmpgtq),
2934 [0x38] = SSE41_OP(pminsb),
2935 [0x39] = SSE41_OP(pminsd),
2936 [0x3a] = SSE41_OP(pminuw),
2937 [0x3b] = SSE41_OP(pminud),
2938 [0x3c] = SSE41_OP(pmaxsb),
2939 [0x3d] = SSE41_OP(pmaxsd),
2940 [0x3e] = SSE41_OP(pmaxuw),
2941 [0x3f] = SSE41_OP(pmaxud),
2942 [0x40] = SSE41_OP(pmulld),
2943 [0x41] = SSE41_OP(phminposuw),
2944 [0xdb] = AESNI_OP(aesimc),
2945 [0xdc] = AESNI_OP(aesenc),
2946 [0xdd] = AESNI_OP(aesenclast),
2947 [0xde] = AESNI_OP(aesdec),
2948 [0xdf] = AESNI_OP(aesdeclast),
2949 };
2950
2951 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2952 [0x08] = SSE41_OP(roundps),
2953 [0x09] = SSE41_OP(roundpd),
2954 [0x0a] = SSE41_OP(roundss),
2955 [0x0b] = SSE41_OP(roundsd),
2956 [0x0c] = SSE41_OP(blendps),
2957 [0x0d] = SSE41_OP(blendpd),
2958 [0x0e] = SSE41_OP(pblendw),
2959 [0x0f] = SSSE3_OP(palignr),
2960 [0x14] = SSE41_SPECIAL, /* pextrb */
2961 [0x15] = SSE41_SPECIAL, /* pextrw */
2962 [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2963 [0x17] = SSE41_SPECIAL, /* extractps */
2964 [0x20] = SSE41_SPECIAL, /* pinsrb */
2965 [0x21] = SSE41_SPECIAL, /* insertps */
2966 [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2967 [0x40] = SSE41_OP(dpps),
2968 [0x41] = SSE41_OP(dppd),
2969 [0x42] = SSE41_OP(mpsadbw),
2970 [0x44] = PCLMULQDQ_OP(pclmulqdq),
2971 [0x60] = SSE42_OP(pcmpestrm),
2972 [0x61] = SSE42_OP(pcmpestri),
2973 [0x62] = SSE42_OP(pcmpistrm),
2974 [0x63] = SSE42_OP(pcmpistri),
2975 [0xdf] = AESNI_OP(aeskeygenassist),
2976 };
2977
2978 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2979 target_ulong pc_start, int rex_r)
2980 {
2981 int b1, op1_offset, op2_offset, is_xmm, val;
2982 int modrm, mod, rm, reg;
2983 SSEFunc_0_epp sse_fn_epp;
2984 SSEFunc_0_eppi sse_fn_eppi;
2985 SSEFunc_0_ppi sse_fn_ppi;
2986 SSEFunc_0_eppt sse_fn_eppt;
2987 TCGMemOp ot;
2988
2989 b &= 0xff;
2990 if (s->prefix & PREFIX_DATA)
2991 b1 = 1;
2992 else if (s->prefix & PREFIX_REPZ)
2993 b1 = 2;
2994 else if (s->prefix & PREFIX_REPNZ)
2995 b1 = 3;
2996 else
2997 b1 = 0;
2998 sse_fn_epp = sse_op_table1[b][b1];
2999 if (!sse_fn_epp) {
3000 goto illegal_op;
3001 }
3002 if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3003 is_xmm = 1;
3004 } else {
3005 if (b1 == 0) {
3006 /* MMX case */
3007 is_xmm = 0;
3008 } else {
3009 is_xmm = 1;
3010 }
3011 }
3012 /* simple MMX/SSE operation */
3013 if (s->flags & HF_TS_MASK) {
3014 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3015 return;
3016 }
3017 if (s->flags & HF_EM_MASK) {
3018 illegal_op:
3019 gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
3020 return;
3021 }
3022 if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
3023 if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
3024 goto illegal_op;
3025 if (b == 0x0e) {
3026 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
3027 goto illegal_op;
3028 /* femms */
3029 gen_helper_emms(cpu_env);
3030 return;
3031 }
3032 if (b == 0x77) {
3033 /* emms */
3034 gen_helper_emms(cpu_env);
3035 return;
3036 }
3037 /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3038 the static cpu state) */
3039 if (!is_xmm) {
3040 gen_helper_enter_mmx(cpu_env);
3041 }
3042
3043 modrm = cpu_ldub_code(env, s->pc++);
3044 reg = ((modrm >> 3) & 7);
3045 if (is_xmm)
3046 reg |= rex_r;
3047 mod = (modrm >> 6) & 3;
3048 if (sse_fn_epp == SSE_SPECIAL) {
3049 b |= (b1 << 8);
3050 switch(b) {
3051 case 0x0e7: /* movntq */
3052 if (mod == 3)
3053 goto illegal_op;
3054 gen_lea_modrm(env, s, modrm);
3055 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3056 break;
3057 case 0x1e7: /* movntdq */
3058 case 0x02b: /* movntps */
3059 case 0x12b: /* movntps */
3060 if (mod == 3)
3061 goto illegal_op;
3062 gen_lea_modrm(env, s, modrm);
3063 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3064 break;
3065 case 0x3f0: /* lddqu */
3066 if (mod == 3)
3067 goto illegal_op;
3068 gen_lea_modrm(env, s, modrm);
3069 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3070 break;
3071 case 0x22b: /* movntss */
3072 case 0x32b: /* movntsd */
3073 if (mod == 3)
3074 goto illegal_op;
3075 gen_lea_modrm(env, s, modrm);
3076 if (b1 & 1) {
3077 gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3078 } else {
3079 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3080 xmm_regs[reg].XMM_L(0)));
3081 gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
3082 }
3083 break;
3084 case 0x6e: /* movd mm, ea */
3085 #ifdef TARGET_X86_64
3086 if (s->dflag == MO_64) {
3087 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3088 tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3089 } else
3090 #endif
3091 {
3092 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3093 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3094 offsetof(CPUX86State,fpregs[reg].mmx));
3095 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3096 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3097 }
3098 break;
3099 case 0x16e: /* movd xmm, ea */
3100 #ifdef TARGET_X86_64
3101 if (s->dflag == MO_64) {
3102 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3103 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3104 offsetof(CPUX86State,xmm_regs[reg]));
3105 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
3106 } else
3107 #endif
3108 {
3109 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3110 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3111 offsetof(CPUX86State,xmm_regs[reg]));
3112 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3113 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3114 }
3115 break;
3116 case 0x6f: /* movq mm, ea */
3117 if (mod != 3) {
3118 gen_lea_modrm(env, s, modrm);
3119 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3120 } else {
3121 rm = (modrm & 7);
3122 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3123 offsetof(CPUX86State,fpregs[rm].mmx));
3124 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3125 offsetof(CPUX86State,fpregs[reg].mmx));
3126 }
3127 break;
3128 case 0x010: /* movups */
3129 case 0x110: /* movupd */
3130 case 0x028: /* movaps */
3131 case 0x128: /* movapd */
3132 case 0x16f: /* movdqa xmm, ea */
3133 case 0x26f: /* movdqu xmm, ea */
3134 if (mod != 3) {
3135 gen_lea_modrm(env, s, modrm);
3136 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3137 } else {
3138 rm = (modrm & 7) | REX_B(s);
3139 gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3140 offsetof(CPUX86State,xmm_regs[rm]));
3141 }
3142 break;
3143 case 0x210: /* movss xmm, ea */
3144 if (mod != 3) {
3145 gen_lea_modrm(env, s, modrm);
3146 gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
3147 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3148 tcg_gen_movi_tl(cpu_T[0], 0);
3149 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3150 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3151 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3152 } else {
3153 rm = (modrm & 7) | REX_B(s);
3154 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3155