vmsvga: don't process more than 1024 fifo commands at once
[qemu.git] / target-i386 / translate.c
1 /*
2 * i386 translation
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg-op.h"
26 #include "exec/cpu_ldst.h"
27
28 #include "exec/helper-proto.h"
29 #include "exec/helper-gen.h"
30
31 #include "trace-tcg.h"
32 #include "exec/log.h"
33
34
35 #define PREFIX_REPZ 0x01
36 #define PREFIX_REPNZ 0x02
37 #define PREFIX_LOCK 0x04
38 #define PREFIX_DATA 0x08
39 #define PREFIX_ADR 0x10
40 #define PREFIX_VEX 0x20
41
42 #ifdef TARGET_X86_64
43 #define CODE64(s) ((s)->code64)
44 #define REX_X(s) ((s)->rex_x)
45 #define REX_B(s) ((s)->rex_b)
46 #else
47 #define CODE64(s) 0
48 #define REX_X(s) 0
49 #define REX_B(s) 0
50 #endif
51
52 #ifdef TARGET_X86_64
53 # define ctztl ctz64
54 # define clztl clz64
55 #else
56 # define ctztl ctz32
57 # define clztl clz32
58 #endif
59
60 /* For a switch indexed by MODRM, match all memory operands for a given OP. */
61 #define CASE_MODRM_MEM_OP(OP) \
62 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
63 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
64 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
65
66 #define CASE_MODRM_OP(OP) \
67 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
68 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
69 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
70 case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
71
72 //#define MACRO_TEST 1
73
74 /* global register indexes */
75 static TCGv_env cpu_env;
76 static TCGv cpu_A0;
77 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
78 static TCGv_i32 cpu_cc_op;
79 static TCGv cpu_regs[CPU_NB_REGS];
80 static TCGv cpu_seg_base[6];
81 static TCGv_i64 cpu_bndl[4];
82 static TCGv_i64 cpu_bndu[4];
83 /* local temps */
84 static TCGv cpu_T0, cpu_T1;
85 /* local register indexes (only used inside old micro ops) */
86 static TCGv cpu_tmp0, cpu_tmp4;
87 static TCGv_ptr cpu_ptr0, cpu_ptr1;
88 static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
89 static TCGv_i64 cpu_tmp1_i64;
90
91 #include "exec/gen-icount.h"
92
93 #ifdef TARGET_X86_64
94 static int x86_64_hregs;
95 #endif
96
97 typedef struct DisasContext {
98 /* current insn context */
99 int override; /* -1 if no override */
100 int prefix;
101 TCGMemOp aflag;
102 TCGMemOp dflag;
103 target_ulong pc_start;
104 target_ulong pc; /* pc = eip + cs_base */
105 int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
106 static state change (stop translation) */
107 /* current block context */
108 target_ulong cs_base; /* base of CS segment */
109 int pe; /* protected mode */
110 int code32; /* 32 bit code segment */
111 #ifdef TARGET_X86_64
112 int lma; /* long mode active */
113 int code64; /* 64 bit code segment */
114 int rex_x, rex_b;
115 #endif
116 int vex_l; /* vex vector length */
117 int vex_v; /* vex vvvv register, without 1's compliment. */
118 int ss32; /* 32 bit stack segment */
119 CCOp cc_op; /* current CC operation */
120 bool cc_op_dirty;
121 int addseg; /* non zero if either DS/ES/SS have a non zero base */
122 int f_st; /* currently unused */
123 int vm86; /* vm86 mode */
124 int cpl;
125 int iopl;
126 int tf; /* TF cpu flag */
127 int singlestep_enabled; /* "hardware" single step enabled */
128 int jmp_opt; /* use direct block chaining for direct jumps */
129 int repz_opt; /* optimize jumps within repz instructions */
130 int mem_index; /* select memory access functions */
131 uint64_t flags; /* all execution flags */
132 struct TranslationBlock *tb;
133 int popl_esp_hack; /* for correct popl with esp base handling */
134 int rip_offset; /* only used in x86_64, but left for simplicity */
135 int cpuid_features;
136 int cpuid_ext_features;
137 int cpuid_ext2_features;
138 int cpuid_ext3_features;
139 int cpuid_7_0_ebx_features;
140 int cpuid_xsave_features;
141 } DisasContext;
142
143 static void gen_eob(DisasContext *s);
144 static void gen_jmp(DisasContext *s, target_ulong eip);
145 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
146 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
147
148 /* i386 arith/logic operations */
149 enum {
150 OP_ADDL,
151 OP_ORL,
152 OP_ADCL,
153 OP_SBBL,
154 OP_ANDL,
155 OP_SUBL,
156 OP_XORL,
157 OP_CMPL,
158 };
159
160 /* i386 shift ops */
161 enum {
162 OP_ROL,
163 OP_ROR,
164 OP_RCL,
165 OP_RCR,
166 OP_SHL,
167 OP_SHR,
168 OP_SHL1, /* undocumented */
169 OP_SAR = 7,
170 };
171
172 enum {
173 JCC_O,
174 JCC_B,
175 JCC_Z,
176 JCC_BE,
177 JCC_S,
178 JCC_P,
179 JCC_L,
180 JCC_LE,
181 };
182
183 enum {
184 /* I386 int registers */
185 OR_EAX, /* MUST be even numbered */
186 OR_ECX,
187 OR_EDX,
188 OR_EBX,
189 OR_ESP,
190 OR_EBP,
191 OR_ESI,
192 OR_EDI,
193
194 OR_TMP0 = 16, /* temporary operand register */
195 OR_TMP1,
196 OR_A0, /* temporary register used when doing address evaluation */
197 };
198
199 enum {
200 USES_CC_DST = 1,
201 USES_CC_SRC = 2,
202 USES_CC_SRC2 = 4,
203 USES_CC_SRCT = 8,
204 };
205
206 /* Bit set if the global variable is live after setting CC_OP to X. */
207 static const uint8_t cc_op_live[CC_OP_NB] = {
208 [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
209 [CC_OP_EFLAGS] = USES_CC_SRC,
210 [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
211 [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
212 [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
213 [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
214 [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
215 [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
216 [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
217 [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
218 [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
219 [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
220 [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
221 [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
222 [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
223 [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
224 [CC_OP_CLR] = 0,
225 };
226
227 static void set_cc_op(DisasContext *s, CCOp op)
228 {
229 int dead;
230
231 if (s->cc_op == op) {
232 return;
233 }
234
235 /* Discard CC computation that will no longer be used. */
236 dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
237 if (dead & USES_CC_DST) {
238 tcg_gen_discard_tl(cpu_cc_dst);
239 }
240 if (dead & USES_CC_SRC) {
241 tcg_gen_discard_tl(cpu_cc_src);
242 }
243 if (dead & USES_CC_SRC2) {
244 tcg_gen_discard_tl(cpu_cc_src2);
245 }
246 if (dead & USES_CC_SRCT) {
247 tcg_gen_discard_tl(cpu_cc_srcT);
248 }
249
250 if (op == CC_OP_DYNAMIC) {
251 /* The DYNAMIC setting is translator only, and should never be
252 stored. Thus we always consider it clean. */
253 s->cc_op_dirty = false;
254 } else {
255 /* Discard any computed CC_OP value (see shifts). */
256 if (s->cc_op == CC_OP_DYNAMIC) {
257 tcg_gen_discard_i32(cpu_cc_op);
258 }
259 s->cc_op_dirty = true;
260 }
261 s->cc_op = op;
262 }
263
264 static void gen_update_cc_op(DisasContext *s)
265 {
266 if (s->cc_op_dirty) {
267 tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
268 s->cc_op_dirty = false;
269 }
270 }
271
272 #ifdef TARGET_X86_64
273
274 #define NB_OP_SIZES 4
275
276 #else /* !TARGET_X86_64 */
277
278 #define NB_OP_SIZES 3
279
280 #endif /* !TARGET_X86_64 */
281
282 #if defined(HOST_WORDS_BIGENDIAN)
283 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
284 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
285 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
286 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
287 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
288 #else
289 #define REG_B_OFFSET 0
290 #define REG_H_OFFSET 1
291 #define REG_W_OFFSET 0
292 #define REG_L_OFFSET 0
293 #define REG_LH_OFFSET 4
294 #endif
295
296 /* In instruction encodings for byte register accesses the
297 * register number usually indicates "low 8 bits of register N";
298 * however there are some special cases where N 4..7 indicates
299 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
300 * true for this special case, false otherwise.
301 */
302 static inline bool byte_reg_is_xH(int reg)
303 {
304 if (reg < 4) {
305 return false;
306 }
307 #ifdef TARGET_X86_64
308 if (reg >= 8 || x86_64_hregs) {
309 return false;
310 }
311 #endif
312 return true;
313 }
314
315 /* Select the size of a push/pop operation. */
316 static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
317 {
318 if (CODE64(s)) {
319 return ot == MO_16 ? MO_16 : MO_64;
320 } else {
321 return ot;
322 }
323 }
324
325 /* Select the size of the stack pointer. */
326 static inline TCGMemOp mo_stacksize(DisasContext *s)
327 {
328 return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
329 }
330
331 /* Select only size 64 else 32. Used for SSE operand sizes. */
332 static inline TCGMemOp mo_64_32(TCGMemOp ot)
333 {
334 #ifdef TARGET_X86_64
335 return ot == MO_64 ? MO_64 : MO_32;
336 #else
337 return MO_32;
338 #endif
339 }
340
341 /* Select size 8 if lsb of B is clear, else OT. Used for decoding
342 byte vs word opcodes. */
343 static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
344 {
345 return b & 1 ? ot : MO_8;
346 }
347
348 /* Select size 8 if lsb of B is clear, else OT capped at 32.
349 Used for decoding operand size of port opcodes. */
350 static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
351 {
352 return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
353 }
354
355 static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
356 {
357 switch(ot) {
358 case MO_8:
359 if (!byte_reg_is_xH(reg)) {
360 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
361 } else {
362 tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
363 }
364 break;
365 case MO_16:
366 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
367 break;
368 case MO_32:
369 /* For x86_64, this sets the higher half of register to zero.
370 For i386, this is equivalent to a mov. */
371 tcg_gen_ext32u_tl(cpu_regs[reg], t0);
372 break;
373 #ifdef TARGET_X86_64
374 case MO_64:
375 tcg_gen_mov_tl(cpu_regs[reg], t0);
376 break;
377 #endif
378 default:
379 tcg_abort();
380 }
381 }
382
383 static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
384 {
385 if (ot == MO_8 && byte_reg_is_xH(reg)) {
386 tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
387 tcg_gen_ext8u_tl(t0, t0);
388 } else {
389 tcg_gen_mov_tl(t0, cpu_regs[reg]);
390 }
391 }
392
393 static void gen_add_A0_im(DisasContext *s, int val)
394 {
395 tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
396 if (!CODE64(s)) {
397 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
398 }
399 }
400
401 static inline void gen_op_jmp_v(TCGv dest)
402 {
403 tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
404 }
405
406 static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
407 {
408 tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
409 gen_op_mov_reg_v(size, reg, cpu_tmp0);
410 }
411
412 static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
413 {
414 tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
415 gen_op_mov_reg_v(size, reg, cpu_tmp0);
416 }
417
418 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
419 {
420 tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
421 }
422
423 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
424 {
425 tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
426 }
427
428 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
429 {
430 if (d == OR_TMP0) {
431 gen_op_st_v(s, idx, cpu_T0, cpu_A0);
432 } else {
433 gen_op_mov_reg_v(idx, d, cpu_T0);
434 }
435 }
436
437 static inline void gen_jmp_im(target_ulong pc)
438 {
439 tcg_gen_movi_tl(cpu_tmp0, pc);
440 gen_op_jmp_v(cpu_tmp0);
441 }
442
443 /* Compute SEG:REG into A0. SEG is selected from the override segment
444 (OVR_SEG) and the default segment (DEF_SEG). OVR_SEG may be -1 to
445 indicate no override. */
446 static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
447 int def_seg, int ovr_seg)
448 {
449 switch (aflag) {
450 #ifdef TARGET_X86_64
451 case MO_64:
452 if (ovr_seg < 0) {
453 tcg_gen_mov_tl(cpu_A0, a0);
454 return;
455 }
456 break;
457 #endif
458 case MO_32:
459 /* 32 bit address */
460 if (ovr_seg < 0) {
461 if (s->addseg) {
462 ovr_seg = def_seg;
463 } else {
464 tcg_gen_ext32u_tl(cpu_A0, a0);
465 return;
466 }
467 }
468 break;
469 case MO_16:
470 /* 16 bit address */
471 tcg_gen_ext16u_tl(cpu_A0, a0);
472 a0 = cpu_A0;
473 if (ovr_seg < 0) {
474 if (s->addseg) {
475 ovr_seg = def_seg;
476 } else {
477 return;
478 }
479 }
480 break;
481 default:
482 tcg_abort();
483 }
484
485 if (ovr_seg >= 0) {
486 TCGv seg = cpu_seg_base[ovr_seg];
487
488 if (aflag == MO_64) {
489 tcg_gen_add_tl(cpu_A0, a0, seg);
490 } else if (CODE64(s)) {
491 tcg_gen_ext32u_tl(cpu_A0, a0);
492 tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
493 } else {
494 tcg_gen_add_tl(cpu_A0, a0, seg);
495 tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
496 }
497 }
498 }
499
500 static inline void gen_string_movl_A0_ESI(DisasContext *s)
501 {
502 gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
503 }
504
505 static inline void gen_string_movl_A0_EDI(DisasContext *s)
506 {
507 gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
508 }
509
510 static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
511 {
512 tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
513 tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
514 };
515
516 static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
517 {
518 switch (size) {
519 case MO_8:
520 if (sign) {
521 tcg_gen_ext8s_tl(dst, src);
522 } else {
523 tcg_gen_ext8u_tl(dst, src);
524 }
525 return dst;
526 case MO_16:
527 if (sign) {
528 tcg_gen_ext16s_tl(dst, src);
529 } else {
530 tcg_gen_ext16u_tl(dst, src);
531 }
532 return dst;
533 #ifdef TARGET_X86_64
534 case MO_32:
535 if (sign) {
536 tcg_gen_ext32s_tl(dst, src);
537 } else {
538 tcg_gen_ext32u_tl(dst, src);
539 }
540 return dst;
541 #endif
542 default:
543 return src;
544 }
545 }
546
547 static void gen_extu(TCGMemOp ot, TCGv reg)
548 {
549 gen_ext_tl(reg, reg, ot, false);
550 }
551
552 static void gen_exts(TCGMemOp ot, TCGv reg)
553 {
554 gen_ext_tl(reg, reg, ot, true);
555 }
556
557 static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
558 {
559 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
560 gen_extu(size, cpu_tmp0);
561 tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
562 }
563
564 static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
565 {
566 tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
567 gen_extu(size, cpu_tmp0);
568 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
569 }
570
571 static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
572 {
573 switch (ot) {
574 case MO_8:
575 gen_helper_inb(v, cpu_env, n);
576 break;
577 case MO_16:
578 gen_helper_inw(v, cpu_env, n);
579 break;
580 case MO_32:
581 gen_helper_inl(v, cpu_env, n);
582 break;
583 default:
584 tcg_abort();
585 }
586 }
587
588 static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
589 {
590 switch (ot) {
591 case MO_8:
592 gen_helper_outb(cpu_env, v, n);
593 break;
594 case MO_16:
595 gen_helper_outw(cpu_env, v, n);
596 break;
597 case MO_32:
598 gen_helper_outl(cpu_env, v, n);
599 break;
600 default:
601 tcg_abort();
602 }
603 }
604
605 static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
606 uint32_t svm_flags)
607 {
608 target_ulong next_eip;
609
610 if (s->pe && (s->cpl > s->iopl || s->vm86)) {
611 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
612 switch (ot) {
613 case MO_8:
614 gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
615 break;
616 case MO_16:
617 gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
618 break;
619 case MO_32:
620 gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
621 break;
622 default:
623 tcg_abort();
624 }
625 }
626 if(s->flags & HF_SVMI_MASK) {
627 gen_update_cc_op(s);
628 gen_jmp_im(cur_eip);
629 svm_flags |= (1 << (4 + ot));
630 next_eip = s->pc - s->cs_base;
631 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
632 gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
633 tcg_const_i32(svm_flags),
634 tcg_const_i32(next_eip - cur_eip));
635 }
636 }
637
638 static inline void gen_movs(DisasContext *s, TCGMemOp ot)
639 {
640 gen_string_movl_A0_ESI(s);
641 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
642 gen_string_movl_A0_EDI(s);
643 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
644 gen_op_movl_T0_Dshift(ot);
645 gen_op_add_reg_T0(s->aflag, R_ESI);
646 gen_op_add_reg_T0(s->aflag, R_EDI);
647 }
648
649 static void gen_op_update1_cc(void)
650 {
651 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
652 }
653
654 static void gen_op_update2_cc(void)
655 {
656 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
657 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
658 }
659
660 static void gen_op_update3_cc(TCGv reg)
661 {
662 tcg_gen_mov_tl(cpu_cc_src2, reg);
663 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
664 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
665 }
666
667 static inline void gen_op_testl_T0_T1_cc(void)
668 {
669 tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
670 }
671
672 static void gen_op_update_neg_cc(void)
673 {
674 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
675 tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
676 tcg_gen_movi_tl(cpu_cc_srcT, 0);
677 }
678
679 /* compute all eflags to cc_src */
680 static void gen_compute_eflags(DisasContext *s)
681 {
682 TCGv zero, dst, src1, src2;
683 int live, dead;
684
685 if (s->cc_op == CC_OP_EFLAGS) {
686 return;
687 }
688 if (s->cc_op == CC_OP_CLR) {
689 tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
690 set_cc_op(s, CC_OP_EFLAGS);
691 return;
692 }
693
694 TCGV_UNUSED(zero);
695 dst = cpu_cc_dst;
696 src1 = cpu_cc_src;
697 src2 = cpu_cc_src2;
698
699 /* Take care to not read values that are not live. */
700 live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
701 dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
702 if (dead) {
703 zero = tcg_const_tl(0);
704 if (dead & USES_CC_DST) {
705 dst = zero;
706 }
707 if (dead & USES_CC_SRC) {
708 src1 = zero;
709 }
710 if (dead & USES_CC_SRC2) {
711 src2 = zero;
712 }
713 }
714
715 gen_update_cc_op(s);
716 gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
717 set_cc_op(s, CC_OP_EFLAGS);
718
719 if (dead) {
720 tcg_temp_free(zero);
721 }
722 }
723
724 typedef struct CCPrepare {
725 TCGCond cond;
726 TCGv reg;
727 TCGv reg2;
728 target_ulong imm;
729 target_ulong mask;
730 bool use_reg2;
731 bool no_setcond;
732 } CCPrepare;
733
734 /* compute eflags.C to reg */
735 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
736 {
737 TCGv t0, t1;
738 int size, shift;
739
740 switch (s->cc_op) {
741 case CC_OP_SUBB ... CC_OP_SUBQ:
742 /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
743 size = s->cc_op - CC_OP_SUBB;
744 t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
745 /* If no temporary was used, be careful not to alias t1 and t0. */
746 t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
747 tcg_gen_mov_tl(t0, cpu_cc_srcT);
748 gen_extu(size, t0);
749 goto add_sub;
750
751 case CC_OP_ADDB ... CC_OP_ADDQ:
752 /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
753 size = s->cc_op - CC_OP_ADDB;
754 t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
755 t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
756 add_sub:
757 return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
758 .reg2 = t1, .mask = -1, .use_reg2 = true };
759
760 case CC_OP_LOGICB ... CC_OP_LOGICQ:
761 case CC_OP_CLR:
762 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
763
764 case CC_OP_INCB ... CC_OP_INCQ:
765 case CC_OP_DECB ... CC_OP_DECQ:
766 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
767 .mask = -1, .no_setcond = true };
768
769 case CC_OP_SHLB ... CC_OP_SHLQ:
770 /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
771 size = s->cc_op - CC_OP_SHLB;
772 shift = (8 << size) - 1;
773 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
774 .mask = (target_ulong)1 << shift };
775
776 case CC_OP_MULB ... CC_OP_MULQ:
777 return (CCPrepare) { .cond = TCG_COND_NE,
778 .reg = cpu_cc_src, .mask = -1 };
779
780 case CC_OP_BMILGB ... CC_OP_BMILGQ:
781 size = s->cc_op - CC_OP_BMILGB;
782 t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
783 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
784
785 case CC_OP_ADCX:
786 case CC_OP_ADCOX:
787 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
788 .mask = -1, .no_setcond = true };
789
790 case CC_OP_EFLAGS:
791 case CC_OP_SARB ... CC_OP_SARQ:
792 /* CC_SRC & 1 */
793 return (CCPrepare) { .cond = TCG_COND_NE,
794 .reg = cpu_cc_src, .mask = CC_C };
795
796 default:
797 /* The need to compute only C from CC_OP_DYNAMIC is important
798 in efficiently implementing e.g. INC at the start of a TB. */
799 gen_update_cc_op(s);
800 gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
801 cpu_cc_src2, cpu_cc_op);
802 return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
803 .mask = -1, .no_setcond = true };
804 }
805 }
806
807 /* compute eflags.P to reg */
808 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
809 {
810 gen_compute_eflags(s);
811 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
812 .mask = CC_P };
813 }
814
815 /* compute eflags.S to reg */
816 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
817 {
818 switch (s->cc_op) {
819 case CC_OP_DYNAMIC:
820 gen_compute_eflags(s);
821 /* FALLTHRU */
822 case CC_OP_EFLAGS:
823 case CC_OP_ADCX:
824 case CC_OP_ADOX:
825 case CC_OP_ADCOX:
826 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
827 .mask = CC_S };
828 case CC_OP_CLR:
829 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
830 default:
831 {
832 TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
833 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
834 return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
835 }
836 }
837 }
838
839 /* compute eflags.O to reg */
840 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
841 {
842 switch (s->cc_op) {
843 case CC_OP_ADOX:
844 case CC_OP_ADCOX:
845 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
846 .mask = -1, .no_setcond = true };
847 case CC_OP_CLR:
848 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
849 default:
850 gen_compute_eflags(s);
851 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
852 .mask = CC_O };
853 }
854 }
855
856 /* compute eflags.Z to reg */
857 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
858 {
859 switch (s->cc_op) {
860 case CC_OP_DYNAMIC:
861 gen_compute_eflags(s);
862 /* FALLTHRU */
863 case CC_OP_EFLAGS:
864 case CC_OP_ADCX:
865 case CC_OP_ADOX:
866 case CC_OP_ADCOX:
867 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
868 .mask = CC_Z };
869 case CC_OP_CLR:
870 return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
871 default:
872 {
873 TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
874 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
875 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
876 }
877 }
878 }
879
880 /* perform a conditional store into register 'reg' according to jump opcode
881 value 'b'. In the fast case, T0 is guaranted not to be used. */
882 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
883 {
884 int inv, jcc_op, cond;
885 TCGMemOp size;
886 CCPrepare cc;
887 TCGv t0;
888
889 inv = b & 1;
890 jcc_op = (b >> 1) & 7;
891
892 switch (s->cc_op) {
893 case CC_OP_SUBB ... CC_OP_SUBQ:
894 /* We optimize relational operators for the cmp/jcc case. */
895 size = s->cc_op - CC_OP_SUBB;
896 switch (jcc_op) {
897 case JCC_BE:
898 tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
899 gen_extu(size, cpu_tmp4);
900 t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
901 cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
902 .reg2 = t0, .mask = -1, .use_reg2 = true };
903 break;
904
905 case JCC_L:
906 cond = TCG_COND_LT;
907 goto fast_jcc_l;
908 case JCC_LE:
909 cond = TCG_COND_LE;
910 fast_jcc_l:
911 tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
912 gen_exts(size, cpu_tmp4);
913 t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
914 cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
915 .reg2 = t0, .mask = -1, .use_reg2 = true };
916 break;
917
918 default:
919 goto slow_jcc;
920 }
921 break;
922
923 default:
924 slow_jcc:
925 /* This actually generates good code for JC, JZ and JS. */
926 switch (jcc_op) {
927 case JCC_O:
928 cc = gen_prepare_eflags_o(s, reg);
929 break;
930 case JCC_B:
931 cc = gen_prepare_eflags_c(s, reg);
932 break;
933 case JCC_Z:
934 cc = gen_prepare_eflags_z(s, reg);
935 break;
936 case JCC_BE:
937 gen_compute_eflags(s);
938 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
939 .mask = CC_Z | CC_C };
940 break;
941 case JCC_S:
942 cc = gen_prepare_eflags_s(s, reg);
943 break;
944 case JCC_P:
945 cc = gen_prepare_eflags_p(s, reg);
946 break;
947 case JCC_L:
948 gen_compute_eflags(s);
949 if (TCGV_EQUAL(reg, cpu_cc_src)) {
950 reg = cpu_tmp0;
951 }
952 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
953 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
954 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
955 .mask = CC_S };
956 break;
957 default:
958 case JCC_LE:
959 gen_compute_eflags(s);
960 if (TCGV_EQUAL(reg, cpu_cc_src)) {
961 reg = cpu_tmp0;
962 }
963 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
964 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
965 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
966 .mask = CC_S | CC_Z };
967 break;
968 }
969 break;
970 }
971
972 if (inv) {
973 cc.cond = tcg_invert_cond(cc.cond);
974 }
975 return cc;
976 }
977
978 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
979 {
980 CCPrepare cc = gen_prepare_cc(s, b, reg);
981
982 if (cc.no_setcond) {
983 if (cc.cond == TCG_COND_EQ) {
984 tcg_gen_xori_tl(reg, cc.reg, 1);
985 } else {
986 tcg_gen_mov_tl(reg, cc.reg);
987 }
988 return;
989 }
990
991 if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
992 cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
993 tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
994 tcg_gen_andi_tl(reg, reg, 1);
995 return;
996 }
997 if (cc.mask != -1) {
998 tcg_gen_andi_tl(reg, cc.reg, cc.mask);
999 cc.reg = reg;
1000 }
1001 if (cc.use_reg2) {
1002 tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1003 } else {
1004 tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1005 }
1006 }
1007
1008 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1009 {
1010 gen_setcc1(s, JCC_B << 1, reg);
1011 }
1012
1013 /* generate a conditional jump to label 'l1' according to jump opcode
1014 value 'b'. In the fast case, T0 is guaranted not to be used. */
1015 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1016 {
1017 CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1018
1019 if (cc.mask != -1) {
1020 tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1021 cc.reg = cpu_T0;
1022 }
1023 if (cc.use_reg2) {
1024 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1025 } else {
1026 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1027 }
1028 }
1029
1030 /* Generate a conditional jump to label 'l1' according to jump opcode
1031 value 'b'. In the fast case, T0 is guaranted not to be used.
1032 A translation block must end soon. */
1033 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1034 {
1035 CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1036
1037 gen_update_cc_op(s);
1038 if (cc.mask != -1) {
1039 tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1040 cc.reg = cpu_T0;
1041 }
1042 set_cc_op(s, CC_OP_DYNAMIC);
1043 if (cc.use_reg2) {
1044 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1045 } else {
1046 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1047 }
1048 }
1049
1050 /* XXX: does not work with gdbstub "ice" single step - not a
1051 serious problem */
1052 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1053 {
1054 TCGLabel *l1 = gen_new_label();
1055 TCGLabel *l2 = gen_new_label();
1056 gen_op_jnz_ecx(s->aflag, l1);
1057 gen_set_label(l2);
1058 gen_jmp_tb(s, next_eip, 1);
1059 gen_set_label(l1);
1060 return l2;
1061 }
1062
1063 static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1064 {
1065 gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
1066 gen_string_movl_A0_EDI(s);
1067 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1068 gen_op_movl_T0_Dshift(ot);
1069 gen_op_add_reg_T0(s->aflag, R_EDI);
1070 }
1071
1072 static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1073 {
1074 gen_string_movl_A0_ESI(s);
1075 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1076 gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
1077 gen_op_movl_T0_Dshift(ot);
1078 gen_op_add_reg_T0(s->aflag, R_ESI);
1079 }
1080
1081 static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1082 {
1083 gen_string_movl_A0_EDI(s);
1084 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1085 gen_op(s, OP_CMPL, ot, R_EAX);
1086 gen_op_movl_T0_Dshift(ot);
1087 gen_op_add_reg_T0(s->aflag, R_EDI);
1088 }
1089
1090 static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1091 {
1092 gen_string_movl_A0_EDI(s);
1093 gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1094 gen_string_movl_A0_ESI(s);
1095 gen_op(s, OP_CMPL, ot, OR_TMP0);
1096 gen_op_movl_T0_Dshift(ot);
1097 gen_op_add_reg_T0(s->aflag, R_ESI);
1098 gen_op_add_reg_T0(s->aflag, R_EDI);
1099 }
1100
1101 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1102 {
1103 if (s->flags & HF_IOBPT_MASK) {
1104 TCGv_i32 t_size = tcg_const_i32(1 << ot);
1105 TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1106
1107 gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1108 tcg_temp_free_i32(t_size);
1109 tcg_temp_free(t_next);
1110 }
1111 }
1112
1113
1114 static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1115 {
1116 if (s->tb->cflags & CF_USE_ICOUNT) {
1117 gen_io_start();
1118 }
1119 gen_string_movl_A0_EDI(s);
1120 /* Note: we must do this dummy write first to be restartable in
1121 case of page fault. */
1122 tcg_gen_movi_tl(cpu_T0, 0);
1123 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1124 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1125 tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1126 gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
1127 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1128 gen_op_movl_T0_Dshift(ot);
1129 gen_op_add_reg_T0(s->aflag, R_EDI);
1130 gen_bpt_io(s, cpu_tmp2_i32, ot);
1131 if (s->tb->cflags & CF_USE_ICOUNT) {
1132 gen_io_end();
1133 }
1134 }
1135
1136 static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1137 {
1138 if (s->tb->cflags & CF_USE_ICOUNT) {
1139 gen_io_start();
1140 }
1141 gen_string_movl_A0_ESI(s);
1142 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1143
1144 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1145 tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1146 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
1147 gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1148 gen_op_movl_T0_Dshift(ot);
1149 gen_op_add_reg_T0(s->aflag, R_ESI);
1150 gen_bpt_io(s, cpu_tmp2_i32, ot);
1151 if (s->tb->cflags & CF_USE_ICOUNT) {
1152 gen_io_end();
1153 }
1154 }
1155
1156 /* same method as Valgrind : we generate jumps to current or next
1157 instruction */
1158 #define GEN_REPZ(op) \
1159 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1160 target_ulong cur_eip, target_ulong next_eip) \
1161 { \
1162 TCGLabel *l2; \
1163 gen_update_cc_op(s); \
1164 l2 = gen_jz_ecx_string(s, next_eip); \
1165 gen_ ## op(s, ot); \
1166 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1167 /* a loop would cause two single step exceptions if ECX = 1 \
1168 before rep string_insn */ \
1169 if (s->repz_opt) \
1170 gen_op_jz_ecx(s->aflag, l2); \
1171 gen_jmp(s, cur_eip); \
1172 }
1173
1174 #define GEN_REPZ2(op) \
1175 static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \
1176 target_ulong cur_eip, \
1177 target_ulong next_eip, \
1178 int nz) \
1179 { \
1180 TCGLabel *l2; \
1181 gen_update_cc_op(s); \
1182 l2 = gen_jz_ecx_string(s, next_eip); \
1183 gen_ ## op(s, ot); \
1184 gen_op_add_reg_im(s->aflag, R_ECX, -1); \
1185 gen_update_cc_op(s); \
1186 gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \
1187 if (s->repz_opt) \
1188 gen_op_jz_ecx(s->aflag, l2); \
1189 gen_jmp(s, cur_eip); \
1190 }
1191
1192 GEN_REPZ(movs)
1193 GEN_REPZ(stos)
1194 GEN_REPZ(lods)
1195 GEN_REPZ(ins)
1196 GEN_REPZ(outs)
1197 GEN_REPZ2(scas)
1198 GEN_REPZ2(cmps)
1199
1200 static void gen_helper_fp_arith_ST0_FT0(int op)
1201 {
1202 switch (op) {
1203 case 0:
1204 gen_helper_fadd_ST0_FT0(cpu_env);
1205 break;
1206 case 1:
1207 gen_helper_fmul_ST0_FT0(cpu_env);
1208 break;
1209 case 2:
1210 gen_helper_fcom_ST0_FT0(cpu_env);
1211 break;
1212 case 3:
1213 gen_helper_fcom_ST0_FT0(cpu_env);
1214 break;
1215 case 4:
1216 gen_helper_fsub_ST0_FT0(cpu_env);
1217 break;
1218 case 5:
1219 gen_helper_fsubr_ST0_FT0(cpu_env);
1220 break;
1221 case 6:
1222 gen_helper_fdiv_ST0_FT0(cpu_env);
1223 break;
1224 case 7:
1225 gen_helper_fdivr_ST0_FT0(cpu_env);
1226 break;
1227 }
1228 }
1229
1230 /* NOTE the exception in "r" op ordering */
1231 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1232 {
1233 TCGv_i32 tmp = tcg_const_i32(opreg);
1234 switch (op) {
1235 case 0:
1236 gen_helper_fadd_STN_ST0(cpu_env, tmp);
1237 break;
1238 case 1:
1239 gen_helper_fmul_STN_ST0(cpu_env, tmp);
1240 break;
1241 case 4:
1242 gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1243 break;
1244 case 5:
1245 gen_helper_fsub_STN_ST0(cpu_env, tmp);
1246 break;
1247 case 6:
1248 gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1249 break;
1250 case 7:
1251 gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1252 break;
1253 }
1254 }
1255
1256 /* if d == OR_TMP0, it means memory operand (address in A0) */
1257 static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1258 {
1259 if (d != OR_TMP0) {
1260 gen_op_mov_v_reg(ot, cpu_T0, d);
1261 } else {
1262 gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1263 }
1264 switch(op) {
1265 case OP_ADCL:
1266 gen_compute_eflags_c(s1, cpu_tmp4);
1267 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1268 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
1269 gen_op_st_rm_T0_A0(s1, ot, d);
1270 gen_op_update3_cc(cpu_tmp4);
1271 set_cc_op(s1, CC_OP_ADCB + ot);
1272 break;
1273 case OP_SBBL:
1274 gen_compute_eflags_c(s1, cpu_tmp4);
1275 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1276 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
1277 gen_op_st_rm_T0_A0(s1, ot, d);
1278 gen_op_update3_cc(cpu_tmp4);
1279 set_cc_op(s1, CC_OP_SBBB + ot);
1280 break;
1281 case OP_ADDL:
1282 tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1283 gen_op_st_rm_T0_A0(s1, ot, d);
1284 gen_op_update2_cc();
1285 set_cc_op(s1, CC_OP_ADDB + ot);
1286 break;
1287 case OP_SUBL:
1288 tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1289 tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1290 gen_op_st_rm_T0_A0(s1, ot, d);
1291 gen_op_update2_cc();
1292 set_cc_op(s1, CC_OP_SUBB + ot);
1293 break;
1294 default:
1295 case OP_ANDL:
1296 tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
1297 gen_op_st_rm_T0_A0(s1, ot, d);
1298 gen_op_update1_cc();
1299 set_cc_op(s1, CC_OP_LOGICB + ot);
1300 break;
1301 case OP_ORL:
1302 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1303 gen_op_st_rm_T0_A0(s1, ot, d);
1304 gen_op_update1_cc();
1305 set_cc_op(s1, CC_OP_LOGICB + ot);
1306 break;
1307 case OP_XORL:
1308 tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
1309 gen_op_st_rm_T0_A0(s1, ot, d);
1310 gen_op_update1_cc();
1311 set_cc_op(s1, CC_OP_LOGICB + ot);
1312 break;
1313 case OP_CMPL:
1314 tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
1315 tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1316 tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
1317 set_cc_op(s1, CC_OP_SUBB + ot);
1318 break;
1319 }
1320 }
1321
1322 /* if d == OR_TMP0, it means memory operand (address in A0) */
1323 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1324 {
1325 if (d != OR_TMP0) {
1326 gen_op_mov_v_reg(ot, cpu_T0, d);
1327 } else {
1328 gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1329 }
1330 gen_compute_eflags_c(s1, cpu_cc_src);
1331 if (c > 0) {
1332 tcg_gen_addi_tl(cpu_T0, cpu_T0, 1);
1333 set_cc_op(s1, CC_OP_INCB + ot);
1334 } else {
1335 tcg_gen_addi_tl(cpu_T0, cpu_T0, -1);
1336 set_cc_op(s1, CC_OP_DECB + ot);
1337 }
1338 gen_op_st_rm_T0_A0(s1, ot, d);
1339 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1340 }
1341
1342 static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1343 TCGv shm1, TCGv count, bool is_right)
1344 {
1345 TCGv_i32 z32, s32, oldop;
1346 TCGv z_tl;
1347
1348 /* Store the results into the CC variables. If we know that the
1349 variable must be dead, store unconditionally. Otherwise we'll
1350 need to not disrupt the current contents. */
1351 z_tl = tcg_const_tl(0);
1352 if (cc_op_live[s->cc_op] & USES_CC_DST) {
1353 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1354 result, cpu_cc_dst);
1355 } else {
1356 tcg_gen_mov_tl(cpu_cc_dst, result);
1357 }
1358 if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1359 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1360 shm1, cpu_cc_src);
1361 } else {
1362 tcg_gen_mov_tl(cpu_cc_src, shm1);
1363 }
1364 tcg_temp_free(z_tl);
1365
1366 /* Get the two potential CC_OP values into temporaries. */
1367 tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1368 if (s->cc_op == CC_OP_DYNAMIC) {
1369 oldop = cpu_cc_op;
1370 } else {
1371 tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1372 oldop = cpu_tmp3_i32;
1373 }
1374
1375 /* Conditionally store the CC_OP value. */
1376 z32 = tcg_const_i32(0);
1377 s32 = tcg_temp_new_i32();
1378 tcg_gen_trunc_tl_i32(s32, count);
1379 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1380 tcg_temp_free_i32(z32);
1381 tcg_temp_free_i32(s32);
1382
1383 /* The CC_OP value is no longer predictable. */
1384 set_cc_op(s, CC_OP_DYNAMIC);
1385 }
1386
1387 static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1388 int is_right, int is_arith)
1389 {
1390 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1391
1392 /* load */
1393 if (op1 == OR_TMP0) {
1394 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1395 } else {
1396 gen_op_mov_v_reg(ot, cpu_T0, op1);
1397 }
1398
1399 tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1400 tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
1401
1402 if (is_right) {
1403 if (is_arith) {
1404 gen_exts(ot, cpu_T0);
1405 tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1406 tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
1407 } else {
1408 gen_extu(ot, cpu_T0);
1409 tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1410 tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
1411 }
1412 } else {
1413 tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1414 tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
1415 }
1416
1417 /* store */
1418 gen_op_st_rm_T0_A0(s, ot, op1);
1419
1420 gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
1421 }
1422
1423 static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1424 int is_right, int is_arith)
1425 {
1426 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1427
1428 /* load */
1429 if (op1 == OR_TMP0)
1430 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1431 else
1432 gen_op_mov_v_reg(ot, cpu_T0, op1);
1433
1434 op2 &= mask;
1435 if (op2 != 0) {
1436 if (is_right) {
1437 if (is_arith) {
1438 gen_exts(ot, cpu_T0);
1439 tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
1440 tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
1441 } else {
1442 gen_extu(ot, cpu_T0);
1443 tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
1444 tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
1445 }
1446 } else {
1447 tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
1448 tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
1449 }
1450 }
1451
1452 /* store */
1453 gen_op_st_rm_T0_A0(s, ot, op1);
1454
1455 /* update eflags if non zero shift */
1456 if (op2 != 0) {
1457 tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1458 tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1459 set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1460 }
1461 }
1462
1463 static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1464 {
1465 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1466 TCGv_i32 t0, t1;
1467
1468 /* load */
1469 if (op1 == OR_TMP0) {
1470 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1471 } else {
1472 gen_op_mov_v_reg(ot, cpu_T0, op1);
1473 }
1474
1475 tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1476
1477 switch (ot) {
1478 case MO_8:
1479 /* Replicate the 8-bit input so that a 32-bit rotate works. */
1480 tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
1481 tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
1482 goto do_long;
1483 case MO_16:
1484 /* Replicate the 16-bit input so that a 32-bit rotate works. */
1485 tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
1486 goto do_long;
1487 do_long:
1488 #ifdef TARGET_X86_64
1489 case MO_32:
1490 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1491 tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
1492 if (is_right) {
1493 tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1494 } else {
1495 tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1496 }
1497 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1498 break;
1499 #endif
1500 default:
1501 if (is_right) {
1502 tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
1503 } else {
1504 tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
1505 }
1506 break;
1507 }
1508
1509 /* store */
1510 gen_op_st_rm_T0_A0(s, ot, op1);
1511
1512 /* We'll need the flags computed into CC_SRC. */
1513 gen_compute_eflags(s);
1514
1515 /* The value that was "rotated out" is now present at the other end
1516 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1517 since we've computed the flags into CC_SRC, these variables are
1518 currently dead. */
1519 if (is_right) {
1520 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1521 tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1522 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1523 } else {
1524 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1525 tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1526 }
1527 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1528 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1529
1530 /* Now conditionally store the new CC_OP value. If the shift count
1531 is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1532 Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1533 exactly as we computed above. */
1534 t0 = tcg_const_i32(0);
1535 t1 = tcg_temp_new_i32();
1536 tcg_gen_trunc_tl_i32(t1, cpu_T1);
1537 tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
1538 tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1539 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1540 cpu_tmp2_i32, cpu_tmp3_i32);
1541 tcg_temp_free_i32(t0);
1542 tcg_temp_free_i32(t1);
1543
1544 /* The CC_OP value is no longer predictable. */
1545 set_cc_op(s, CC_OP_DYNAMIC);
1546 }
1547
1548 static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1549 int is_right)
1550 {
1551 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1552 int shift;
1553
1554 /* load */
1555 if (op1 == OR_TMP0) {
1556 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1557 } else {
1558 gen_op_mov_v_reg(ot, cpu_T0, op1);
1559 }
1560
1561 op2 &= mask;
1562 if (op2 != 0) {
1563 switch (ot) {
1564 #ifdef TARGET_X86_64
1565 case MO_32:
1566 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1567 if (is_right) {
1568 tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1569 } else {
1570 tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1571 }
1572 tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1573 break;
1574 #endif
1575 default:
1576 if (is_right) {
1577 tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
1578 } else {
1579 tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
1580 }
1581 break;
1582 case MO_8:
1583 mask = 7;
1584 goto do_shifts;
1585 case MO_16:
1586 mask = 15;
1587 do_shifts:
1588 shift = op2 & mask;
1589 if (is_right) {
1590 shift = mask + 1 - shift;
1591 }
1592 gen_extu(ot, cpu_T0);
1593 tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
1594 tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
1595 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
1596 break;
1597 }
1598 }
1599
1600 /* store */
1601 gen_op_st_rm_T0_A0(s, ot, op1);
1602
1603 if (op2 != 0) {
1604 /* Compute the flags into CC_SRC. */
1605 gen_compute_eflags(s);
1606
1607 /* The value that was "rotated out" is now present at the other end
1608 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1609 since we've computed the flags into CC_SRC, these variables are
1610 currently dead. */
1611 if (is_right) {
1612 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1613 tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1614 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1615 } else {
1616 tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1617 tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1618 }
1619 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1620 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1621 set_cc_op(s, CC_OP_ADCOX);
1622 }
1623 }
1624
1625 /* XXX: add faster immediate = 1 case */
1626 static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1627 int is_right)
1628 {
1629 gen_compute_eflags(s);
1630 assert(s->cc_op == CC_OP_EFLAGS);
1631
1632 /* load */
1633 if (op1 == OR_TMP0)
1634 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1635 else
1636 gen_op_mov_v_reg(ot, cpu_T0, op1);
1637
1638 if (is_right) {
1639 switch (ot) {
1640 case MO_8:
1641 gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1642 break;
1643 case MO_16:
1644 gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1645 break;
1646 case MO_32:
1647 gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1648 break;
1649 #ifdef TARGET_X86_64
1650 case MO_64:
1651 gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1652 break;
1653 #endif
1654 default:
1655 tcg_abort();
1656 }
1657 } else {
1658 switch (ot) {
1659 case MO_8:
1660 gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1661 break;
1662 case MO_16:
1663 gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1664 break;
1665 case MO_32:
1666 gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1667 break;
1668 #ifdef TARGET_X86_64
1669 case MO_64:
1670 gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1671 break;
1672 #endif
1673 default:
1674 tcg_abort();
1675 }
1676 }
1677 /* store */
1678 gen_op_st_rm_T0_A0(s, ot, op1);
1679 }
1680
1681 /* XXX: add faster immediate case */
1682 static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1683 bool is_right, TCGv count_in)
1684 {
1685 target_ulong mask = (ot == MO_64 ? 63 : 31);
1686 TCGv count;
1687
1688 /* load */
1689 if (op1 == OR_TMP0) {
1690 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1691 } else {
1692 gen_op_mov_v_reg(ot, cpu_T0, op1);
1693 }
1694
1695 count = tcg_temp_new();
1696 tcg_gen_andi_tl(count, count_in, mask);
1697
1698 switch (ot) {
1699 case MO_16:
1700 /* Note: we implement the Intel behaviour for shift count > 16.
1701 This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
1702 portion by constructing it as a 32-bit value. */
1703 if (is_right) {
1704 tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
1705 tcg_gen_mov_tl(cpu_T1, cpu_T0);
1706 tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
1707 } else {
1708 tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
1709 }
1710 /* FALLTHRU */
1711 #ifdef TARGET_X86_64
1712 case MO_32:
1713 /* Concatenate the two 32-bit values and use a 64-bit shift. */
1714 tcg_gen_subi_tl(cpu_tmp0, count, 1);
1715 if (is_right) {
1716 tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
1717 tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1718 tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
1719 } else {
1720 tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
1721 tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1722 tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
1723 tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1724 tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
1725 }
1726 break;
1727 #endif
1728 default:
1729 tcg_gen_subi_tl(cpu_tmp0, count, 1);
1730 if (is_right) {
1731 tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1732
1733 tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1734 tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
1735 tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
1736 } else {
1737 tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1738 if (ot == MO_16) {
1739 /* Only needed if count > 16, for Intel behaviour. */
1740 tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1741 tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
1742 tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1743 }
1744
1745 tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1746 tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
1747 tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
1748 }
1749 tcg_gen_movi_tl(cpu_tmp4, 0);
1750 tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
1751 cpu_tmp4, cpu_T1);
1752 tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1753 break;
1754 }
1755
1756 /* store */
1757 gen_op_st_rm_T0_A0(s, ot, op1);
1758
1759 gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
1760 tcg_temp_free(count);
1761 }
1762
1763 static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1764 {
1765 if (s != OR_TMP1)
1766 gen_op_mov_v_reg(ot, cpu_T1, s);
1767 switch(op) {
1768 case OP_ROL:
1769 gen_rot_rm_T1(s1, ot, d, 0);
1770 break;
1771 case OP_ROR:
1772 gen_rot_rm_T1(s1, ot, d, 1);
1773 break;
1774 case OP_SHL:
1775 case OP_SHL1:
1776 gen_shift_rm_T1(s1, ot, d, 0, 0);
1777 break;
1778 case OP_SHR:
1779 gen_shift_rm_T1(s1, ot, d, 1, 0);
1780 break;
1781 case OP_SAR:
1782 gen_shift_rm_T1(s1, ot, d, 1, 1);
1783 break;
1784 case OP_RCL:
1785 gen_rotc_rm_T1(s1, ot, d, 0);
1786 break;
1787 case OP_RCR:
1788 gen_rotc_rm_T1(s1, ot, d, 1);
1789 break;
1790 }
1791 }
1792
1793 static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1794 {
1795 switch(op) {
1796 case OP_ROL:
1797 gen_rot_rm_im(s1, ot, d, c, 0);
1798 break;
1799 case OP_ROR:
1800 gen_rot_rm_im(s1, ot, d, c, 1);
1801 break;
1802 case OP_SHL:
1803 case OP_SHL1:
1804 gen_shift_rm_im(s1, ot, d, c, 0, 0);
1805 break;
1806 case OP_SHR:
1807 gen_shift_rm_im(s1, ot, d, c, 1, 0);
1808 break;
1809 case OP_SAR:
1810 gen_shift_rm_im(s1, ot, d, c, 1, 1);
1811 break;
1812 default:
1813 /* currently not optimized */
1814 tcg_gen_movi_tl(cpu_T1, c);
1815 gen_shift(s1, op, ot, d, OR_TMP1);
1816 break;
1817 }
1818 }
1819
1820 /* Decompose an address. */
1821
1822 typedef struct AddressParts {
1823 int def_seg;
1824 int base;
1825 int index;
1826 int scale;
1827 target_long disp;
1828 } AddressParts;
1829
1830 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1831 int modrm)
1832 {
1833 int def_seg, base, index, scale, mod, rm;
1834 target_long disp;
1835 bool havesib;
1836
1837 def_seg = R_DS;
1838 index = -1;
1839 scale = 0;
1840 disp = 0;
1841
1842 mod = (modrm >> 6) & 3;
1843 rm = modrm & 7;
1844 base = rm | REX_B(s);
1845
1846 if (mod == 3) {
1847 /* Normally filtered out earlier, but including this path
1848 simplifies multi-byte nop, as well as bndcl, bndcu, bndcn. */
1849 goto done;
1850 }
1851
1852 switch (s->aflag) {
1853 case MO_64:
1854 case MO_32:
1855 havesib = 0;
1856 if (rm == 4) {
1857 int code = cpu_ldub_code(env, s->pc++);
1858 scale = (code >> 6) & 3;
1859 index = ((code >> 3) & 7) | REX_X(s);
1860 if (index == 4) {
1861 index = -1; /* no index */
1862 }
1863 base = (code & 7) | REX_B(s);
1864 havesib = 1;
1865 }
1866
1867 switch (mod) {
1868 case 0:
1869 if ((base & 7) == 5) {
1870 base = -1;
1871 disp = (int32_t)cpu_ldl_code(env, s->pc);
1872 s->pc += 4;
1873 if (CODE64(s) && !havesib) {
1874 base = -2;
1875 disp += s->pc + s->rip_offset;
1876 }
1877 }
1878 break;
1879 case 1:
1880 disp = (int8_t)cpu_ldub_code(env, s->pc++);
1881 break;
1882 default:
1883 case 2:
1884 disp = (int32_t)cpu_ldl_code(env, s->pc);
1885 s->pc += 4;
1886 break;
1887 }
1888
1889 /* For correct popl handling with esp. */
1890 if (base == R_ESP && s->popl_esp_hack) {
1891 disp += s->popl_esp_hack;
1892 }
1893 if (base == R_EBP || base == R_ESP) {
1894 def_seg = R_SS;
1895 }
1896 break;
1897
1898 case MO_16:
1899 if (mod == 0) {
1900 if (rm == 6) {
1901 base = -1;
1902 disp = cpu_lduw_code(env, s->pc);
1903 s->pc += 2;
1904 break;
1905 }
1906 } else if (mod == 1) {
1907 disp = (int8_t)cpu_ldub_code(env, s->pc++);
1908 } else {
1909 disp = (int16_t)cpu_lduw_code(env, s->pc);
1910 s->pc += 2;
1911 }
1912
1913 switch (rm) {
1914 case 0:
1915 base = R_EBX;
1916 index = R_ESI;
1917 break;
1918 case 1:
1919 base = R_EBX;
1920 index = R_EDI;
1921 break;
1922 case 2:
1923 base = R_EBP;
1924 index = R_ESI;
1925 def_seg = R_SS;
1926 break;
1927 case 3:
1928 base = R_EBP;
1929 index = R_EDI;
1930 def_seg = R_SS;
1931 break;
1932 case 4:
1933 base = R_ESI;
1934 break;
1935 case 5:
1936 base = R_EDI;
1937 break;
1938 case 6:
1939 base = R_EBP;
1940 def_seg = R_SS;
1941 break;
1942 default:
1943 case 7:
1944 base = R_EBX;
1945 break;
1946 }
1947 break;
1948
1949 default:
1950 tcg_abort();
1951 }
1952
1953 done:
1954 return (AddressParts){ def_seg, base, index, scale, disp };
1955 }
1956
1957 /* Compute the address, with a minimum number of TCG ops. */
1958 static TCGv gen_lea_modrm_1(AddressParts a)
1959 {
1960 TCGv ea;
1961
1962 TCGV_UNUSED(ea);
1963 if (a.index >= 0) {
1964 if (a.scale == 0) {
1965 ea = cpu_regs[a.index];
1966 } else {
1967 tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
1968 ea = cpu_A0;
1969 }
1970 if (a.base >= 0) {
1971 tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
1972 ea = cpu_A0;
1973 }
1974 } else if (a.base >= 0) {
1975 ea = cpu_regs[a.base];
1976 }
1977 if (TCGV_IS_UNUSED(ea)) {
1978 tcg_gen_movi_tl(cpu_A0, a.disp);
1979 ea = cpu_A0;
1980 } else if (a.disp != 0) {
1981 tcg_gen_addi_tl(cpu_A0, ea, a.disp);
1982 ea = cpu_A0;
1983 }
1984
1985 return ea;
1986 }
1987
1988 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
1989 {
1990 AddressParts a = gen_lea_modrm_0(env, s, modrm);
1991 TCGv ea = gen_lea_modrm_1(a);
1992 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
1993 }
1994
1995 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
1996 {
1997 (void)gen_lea_modrm_0(env, s, modrm);
1998 }
1999
2000 /* Used for BNDCL, BNDCU, BNDCN. */
2001 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2002 TCGCond cond, TCGv_i64 bndv)
2003 {
2004 TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
2005
2006 tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
2007 if (!CODE64(s)) {
2008 tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
2009 }
2010 tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
2011 tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
2012 gen_helper_bndck(cpu_env, cpu_tmp2_i32);
2013 }
2014
2015 /* used for LEA and MOV AX, mem */
2016 static void gen_add_A0_ds_seg(DisasContext *s)
2017 {
2018 gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
2019 }
2020
2021 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2022 OR_TMP0 */
2023 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2024 TCGMemOp ot, int reg, int is_store)
2025 {
2026 int mod, rm;
2027
2028 mod = (modrm >> 6) & 3;
2029 rm = (modrm & 7) | REX_B(s);
2030 if (mod == 3) {
2031 if (is_store) {
2032 if (reg != OR_TMP0)
2033 gen_op_mov_v_reg(ot, cpu_T0, reg);
2034 gen_op_mov_reg_v(ot, rm, cpu_T0);
2035 } else {
2036 gen_op_mov_v_reg(ot, cpu_T0, rm);
2037 if (reg != OR_TMP0)
2038 gen_op_mov_reg_v(ot, reg, cpu_T0);
2039 }
2040 } else {
2041 gen_lea_modrm(env, s, modrm);
2042 if (is_store) {
2043 if (reg != OR_TMP0)
2044 gen_op_mov_v_reg(ot, cpu_T0, reg);
2045 gen_op_st_v(s, ot, cpu_T0, cpu_A0);
2046 } else {
2047 gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
2048 if (reg != OR_TMP0)
2049 gen_op_mov_reg_v(ot, reg, cpu_T0);
2050 }
2051 }
2052 }
2053
2054 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2055 {
2056 uint32_t ret;
2057
2058 switch (ot) {
2059 case MO_8:
2060 ret = cpu_ldub_code(env, s->pc);
2061 s->pc++;
2062 break;
2063 case MO_16:
2064 ret = cpu_lduw_code(env, s->pc);
2065 s->pc += 2;
2066 break;
2067 case MO_32:
2068 #ifdef TARGET_X86_64
2069 case MO_64:
2070 #endif
2071 ret = cpu_ldl_code(env, s->pc);
2072 s->pc += 4;
2073 break;
2074 default:
2075 tcg_abort();
2076 }
2077 return ret;
2078 }
2079
2080 static inline int insn_const_size(TCGMemOp ot)
2081 {
2082 if (ot <= MO_32) {
2083 return 1 << ot;
2084 } else {
2085 return 4;
2086 }
2087 }
2088
2089 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2090 {
2091 #ifndef CONFIG_USER_ONLY
2092 return (pc & TARGET_PAGE_MASK) == (s->tb->pc & TARGET_PAGE_MASK) ||
2093 (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2094 #else
2095 return true;
2096 #endif
2097 }
2098
2099 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2100 {
2101 target_ulong pc = s->cs_base + eip;
2102
2103 if (use_goto_tb(s, pc)) {
2104 /* jump to same page: we can use a direct jump */
2105 tcg_gen_goto_tb(tb_num);
2106 gen_jmp_im(eip);
2107 tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
2108 } else {
2109 /* jump to another page: currently not optimized */
2110 gen_jmp_im(eip);
2111 gen_eob(s);
2112 }
2113 }
2114
2115 static inline void gen_jcc(DisasContext *s, int b,
2116 target_ulong val, target_ulong next_eip)
2117 {
2118 TCGLabel *l1, *l2;
2119
2120 if (s->jmp_opt) {
2121 l1 = gen_new_label();
2122 gen_jcc1(s, b, l1);
2123
2124 gen_goto_tb(s, 0, next_eip);
2125
2126 gen_set_label(l1);
2127 gen_goto_tb(s, 1, val);
2128 s->is_jmp = DISAS_TB_JUMP;
2129 } else {
2130 l1 = gen_new_label();
2131 l2 = gen_new_label();
2132 gen_jcc1(s, b, l1);
2133
2134 gen_jmp_im(next_eip);
2135 tcg_gen_br(l2);
2136
2137 gen_set_label(l1);
2138 gen_jmp_im(val);
2139 gen_set_label(l2);
2140 gen_eob(s);
2141 }
2142 }
2143
2144 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2145 int modrm, int reg)
2146 {
2147 CCPrepare cc;
2148
2149 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2150
2151 cc = gen_prepare_cc(s, b, cpu_T1);
2152 if (cc.mask != -1) {
2153 TCGv t0 = tcg_temp_new();
2154 tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2155 cc.reg = t0;
2156 }
2157 if (!cc.use_reg2) {
2158 cc.reg2 = tcg_const_tl(cc.imm);
2159 }
2160
2161 tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
2162 cpu_T0, cpu_regs[reg]);
2163 gen_op_mov_reg_v(ot, reg, cpu_T0);
2164
2165 if (cc.mask != -1) {
2166 tcg_temp_free(cc.reg);
2167 }
2168 if (!cc.use_reg2) {
2169 tcg_temp_free(cc.reg2);
2170 }
2171 }
2172
2173 static inline void gen_op_movl_T0_seg(int seg_reg)
2174 {
2175 tcg_gen_ld32u_tl(cpu_T0, cpu_env,
2176 offsetof(CPUX86State,segs[seg_reg].selector));
2177 }
2178
2179 static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2180 {
2181 tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
2182 tcg_gen_st32_tl(cpu_T0, cpu_env,
2183 offsetof(CPUX86State,segs[seg_reg].selector));
2184 tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
2185 }
2186
2187 /* move T0 to seg_reg and compute if the CPU state may change. Never
2188 call this function with seg_reg == R_CS */
2189 static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2190 {
2191 if (s->pe && !s->vm86) {
2192 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2193 gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2194 /* abort translation because the addseg value may change or
2195 because ss32 may change. For R_SS, translation must always
2196 stop as a special handling must be done to disable hardware
2197 interrupts for the next instruction */
2198 if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2199 s->is_jmp = DISAS_TB_JUMP;
2200 } else {
2201 gen_op_movl_seg_T0_vm(seg_reg);
2202 if (seg_reg == R_SS)
2203 s->is_jmp = DISAS_TB_JUMP;
2204 }
2205 }
2206
2207 static inline int svm_is_rep(int prefixes)
2208 {
2209 return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2210 }
2211
2212 static inline void
2213 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2214 uint32_t type, uint64_t param)
2215 {
2216 /* no SVM activated; fast case */
2217 if (likely(!(s->flags & HF_SVMI_MASK)))
2218 return;
2219 gen_update_cc_op(s);
2220 gen_jmp_im(pc_start - s->cs_base);
2221 gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2222 tcg_const_i64(param));
2223 }
2224
2225 static inline void
2226 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2227 {
2228 gen_svm_check_intercept_param(s, pc_start, type, 0);
2229 }
2230
2231 static inline void gen_stack_update(DisasContext *s, int addend)
2232 {
2233 gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
2234 }
2235
2236 /* Generate a push. It depends on ss32, addseg and dflag. */
2237 static void gen_push_v(DisasContext *s, TCGv val)
2238 {
2239 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2240 TCGMemOp a_ot = mo_stacksize(s);
2241 int size = 1 << d_ot;
2242 TCGv new_esp = cpu_A0;
2243
2244 tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2245
2246 if (!CODE64(s)) {
2247 if (s->addseg) {
2248 new_esp = cpu_tmp4;
2249 tcg_gen_mov_tl(new_esp, cpu_A0);
2250 }
2251 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2252 }
2253
2254 gen_op_st_v(s, d_ot, val, cpu_A0);
2255 gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2256 }
2257
2258 /* two step pop is necessary for precise exceptions */
2259 static TCGMemOp gen_pop_T0(DisasContext *s)
2260 {
2261 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2262
2263 gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2264 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2265
2266 return d_ot;
2267 }
2268
2269 static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2270 {
2271 gen_stack_update(s, 1 << ot);
2272 }
2273
2274 static inline void gen_stack_A0(DisasContext *s)
2275 {
2276 gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2277 }
2278
2279 static void gen_pusha(DisasContext *s)
2280 {
2281 TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2282 TCGMemOp d_ot = s->dflag;
2283 int size = 1 << d_ot;
2284 int i;
2285
2286 for (i = 0; i < 8; i++) {
2287 tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
2288 gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2289 gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
2290 }
2291
2292 gen_stack_update(s, -8 * size);
2293 }
2294
2295 static void gen_popa(DisasContext *s)
2296 {
2297 TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2298 TCGMemOp d_ot = s->dflag;
2299 int size = 1 << d_ot;
2300 int i;
2301
2302 for (i = 0; i < 8; i++) {
2303 /* ESP is not reloaded */
2304 if (7 - i == R_ESP) {
2305 continue;
2306 }
2307 tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
2308 gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2309 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2310 gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
2311 }
2312
2313 gen_stack_update(s, 8 * size);
2314 }
2315
2316 static void gen_enter(DisasContext *s, int esp_addend, int level)
2317 {
2318 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2319 TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2320 int size = 1 << d_ot;
2321
2322 /* Push BP; compute FrameTemp into T1. */
2323 tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
2324 gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
2325 gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
2326
2327 level &= 31;
2328 if (level != 0) {
2329 int i;
2330
2331 /* Copy level-1 pointers from the previous frame. */
2332 for (i = 1; i < level; ++i) {
2333 tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
2334 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2335 gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
2336
2337 tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
2338 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2339 gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
2340 }
2341
2342 /* Push the current FrameTemp as the last level. */
2343 tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
2344 gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2345 gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
2346 }
2347
2348 /* Copy the FrameTemp value to EBP. */
2349 gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
2350
2351 /* Compute the final value of ESP. */
2352 tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
2353 gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2354 }
2355
2356 static void gen_leave(DisasContext *s)
2357 {
2358 TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2359 TCGMemOp a_ot = mo_stacksize(s);
2360
2361 gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2362 gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2363
2364 tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
2365
2366 gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
2367 gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2368 }
2369
2370 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2371 {
2372 gen_update_cc_op(s);
2373 gen_jmp_im(cur_eip);
2374 gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2375 s->is_jmp = DISAS_TB_JUMP;
2376 }
2377
2378 /* Generate #UD for the current instruction. The assumption here is that
2379 the instruction is known, but it isn't allowed in the current cpu mode. */
2380 static void gen_illegal_opcode(DisasContext *s)
2381 {
2382 gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
2383 }
2384
2385 /* Similarly, except that the assumption here is that we don't decode
2386 the instruction at all -- either a missing opcode, an unimplemented
2387 feature, or just a bogus instruction stream. */
2388 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2389 {
2390 gen_illegal_opcode(s);
2391
2392 if (qemu_loglevel_mask(LOG_UNIMP)) {
2393 target_ulong pc = s->pc_start, end = s->pc;
2394 qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2395 for (; pc < end; ++pc) {
2396 qemu_log(" %02x", cpu_ldub_code(env, pc));
2397 }
2398 qemu_log("\n");
2399 }
2400 }
2401
2402 /* an interrupt is different from an exception because of the
2403 privilege checks */
2404 static void gen_interrupt(DisasContext *s, int intno,
2405 target_ulong cur_eip, target_ulong next_eip)
2406 {
2407 gen_update_cc_op(s);
2408 gen_jmp_im(cur_eip);
2409 gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2410 tcg_const_i32(next_eip - cur_eip));
2411 s->is_jmp = DISAS_TB_JUMP;
2412 }
2413
2414 static void gen_debug(DisasContext *s, target_ulong cur_eip)
2415 {
2416 gen_update_cc_op(s);
2417 gen_jmp_im(cur_eip);
2418 gen_helper_debug(cpu_env);
2419 s->is_jmp = DISAS_TB_JUMP;
2420 }
2421
2422 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2423 {
2424 if ((s->flags & mask) == 0) {
2425 TCGv_i32 t = tcg_temp_new_i32();
2426 tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2427 tcg_gen_ori_i32(t, t, mask);
2428 tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2429 tcg_temp_free_i32(t);
2430 s->flags |= mask;
2431 }
2432 }
2433
2434 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2435 {
2436 if (s->flags & mask) {
2437 TCGv_i32 t = tcg_temp_new_i32();
2438 tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2439 tcg_gen_andi_i32(t, t, ~mask);
2440 tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2441 tcg_temp_free_i32(t);
2442 s->flags &= ~mask;
2443 }
2444 }
2445
2446 /* Clear BND registers during legacy branches. */
2447 static void gen_bnd_jmp(DisasContext *s)
2448 {
2449 /* Clear the registers only if BND prefix is missing, MPX is enabled,
2450 and if the BNDREGs are known to be in use (non-zero) already.
2451 The helper itself will check BNDPRESERVE at runtime. */
2452 if ((s->prefix & PREFIX_REPNZ) == 0
2453 && (s->flags & HF_MPX_EN_MASK) != 0
2454 && (s->flags & HF_MPX_IU_MASK) != 0) {
2455 gen_helper_bnd_jmp(cpu_env);
2456 }
2457 }
2458
2459 /* Generate an end of block. Trace exception is also generated if needed.
2460 If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set. */
2461 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2462 {
2463 gen_update_cc_op(s);
2464
2465 /* If several instructions disable interrupts, only the first does it. */
2466 if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2467 gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2468 } else {
2469 gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2470 }
2471
2472 if (s->tb->flags & HF_RF_MASK) {
2473 gen_helper_reset_rf(cpu_env);
2474 }
2475 if (s->singlestep_enabled) {
2476 gen_helper_debug(cpu_env);
2477 } else if (s->tf) {
2478 gen_helper_single_step(cpu_env);
2479 } else {
2480 tcg_gen_exit_tb(0);
2481 }
2482 s->is_jmp = DISAS_TB_JUMP;
2483 }
2484
2485 /* End of block, resetting the inhibit irq flag. */
2486 static void gen_eob(DisasContext *s)
2487 {
2488 gen_eob_inhibit_irq(s, false);
2489 }
2490
2491 /* generate a jump to eip. No segment change must happen before as a
2492 direct call to the next block may occur */
2493 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2494 {
2495 gen_update_cc_op(s);
2496 set_cc_op(s, CC_OP_DYNAMIC);
2497 if (s->jmp_opt) {
2498 gen_goto_tb(s, tb_num, eip);
2499 s->is_jmp = DISAS_TB_JUMP;
2500 } else {
2501 gen_jmp_im(eip);
2502 gen_eob(s);
2503 }
2504 }
2505
2506 static void gen_jmp(DisasContext *s, target_ulong eip)
2507 {
2508 gen_jmp_tb(s, eip, 0);
2509 }
2510
2511 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2512 {
2513 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2514 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2515 }
2516
2517 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2518 {
2519 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2520 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2521 }
2522
2523 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2524 {
2525 int mem_index = s->mem_index;
2526 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2527 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2528 tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2529 tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2530 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2531 }
2532
2533 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2534 {
2535 int mem_index = s->mem_index;
2536 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2537 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2538 tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2539 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2540 tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2541 }
2542
2543 static inline void gen_op_movo(int d_offset, int s_offset)
2544 {
2545 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2546 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2547 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2548 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2549 }
2550
2551 static inline void gen_op_movq(int d_offset, int s_offset)
2552 {
2553 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2554 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2555 }
2556
2557 static inline void gen_op_movl(int d_offset, int s_offset)
2558 {
2559 tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2560 tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2561 }
2562
2563 static inline void gen_op_movq_env_0(int d_offset)
2564 {
2565 tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2566 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2567 }
2568
2569 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2570 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2571 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2572 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2573 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2574 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2575 TCGv_i32 val);
2576 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2577 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2578 TCGv val);
2579
2580 #define SSE_SPECIAL ((void *)1)
2581 #define SSE_DUMMY ((void *)2)
2582
2583 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2584 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2585 gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2586
2587 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2588 /* 3DNow! extensions */
2589 [0x0e] = { SSE_DUMMY }, /* femms */
2590 [0x0f] = { SSE_DUMMY }, /* pf... */
2591 /* pure SSE operations */
2592 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2593 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2594 [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2595 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2596 [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2597 [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2598 [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
2599 [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
2600
2601 [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2602 [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2603 [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2604 [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2605 [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2606 [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2607 [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2608 [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2609 [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2610 [0x51] = SSE_FOP(sqrt),
2611 [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2612 [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2613 [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2614 [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2615 [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2616 [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2617 [0x58] = SSE_FOP(add),
2618 [0x59] = SSE_FOP(mul),
2619 [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2620 gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2621 [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2622 [0x5c] = SSE_FOP(sub),
2623 [0x5d] = SSE_FOP(min),
2624 [0x5e] = SSE_FOP(div),
2625 [0x5f] = SSE_FOP(max),
2626
2627 [0xc2] = SSE_FOP(cmpeq),
2628 [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2629 (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2630
2631 /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */
2632 [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2633 [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2634
2635 /* MMX ops and their SSE extensions */
2636 [0x60] = MMX_OP2(punpcklbw),
2637 [0x61] = MMX_OP2(punpcklwd),
2638 [0x62] = MMX_OP2(punpckldq),
2639 [0x63] = MMX_OP2(packsswb),
2640 [0x64] = MMX_OP2(pcmpgtb),
2641 [0x65] = MMX_OP2(pcmpgtw),
2642 [0x66] = MMX_OP2(pcmpgtl),
2643 [0x67] = MMX_OP2(packuswb),
2644 [0x68] = MMX_OP2(punpckhbw),
2645 [0x69] = MMX_OP2(punpckhwd),
2646 [0x6a] = MMX_OP2(punpckhdq),
2647 [0x6b] = MMX_OP2(packssdw),
2648 [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2649 [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2650 [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2651 [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2652 [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2653 (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2654 (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2655 (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2656 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2657 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2658 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2659 [0x74] = MMX_OP2(pcmpeqb),
2660 [0x75] = MMX_OP2(pcmpeqw),
2661 [0x76] = MMX_OP2(pcmpeql),
2662 [0x77] = { SSE_DUMMY }, /* emms */
2663 [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2664 [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2665 [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2666 [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2667 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2668 [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2669 [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2670 [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2671 [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2672 [0xd1] = MMX_OP2(psrlw),
2673 [0xd2] = MMX_OP2(psrld),
2674 [0xd3] = MMX_OP2(psrlq),
2675 [0xd4] = MMX_OP2(paddq),
2676 [0xd5] = MMX_OP2(pmullw),
2677 [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2678 [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2679 [0xd8] = MMX_OP2(psubusb),
2680 [0xd9] = MMX_OP2(psubusw),
2681 [0xda] = MMX_OP2(pminub),
2682 [0xdb] = MMX_OP2(pand),
2683 [0xdc] = MMX_OP2(paddusb),
2684 [0xdd] = MMX_OP2(paddusw),
2685 [0xde] = MMX_OP2(pmaxub),
2686 [0xdf] = MMX_OP2(pandn),
2687 [0xe0] = MMX_OP2(pavgb),
2688 [0xe1] = MMX_OP2(psraw),
2689 [0xe2] = MMX_OP2(psrad),
2690 [0xe3] = MMX_OP2(pavgw),
2691 [0xe4] = MMX_OP2(pmulhuw),
2692 [0xe5] = MMX_OP2(pmulhw),
2693 [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2694 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
2695 [0xe8] = MMX_OP2(psubsb),
2696 [0xe9] = MMX_OP2(psubsw),
2697 [0xea] = MMX_OP2(pminsw),
2698 [0xeb] = MMX_OP2(por),
2699 [0xec] = MMX_OP2(paddsb),
2700 [0xed] = MMX_OP2(paddsw),
2701 [0xee] = MMX_OP2(pmaxsw),
2702 [0xef] = MMX_OP2(pxor),
2703 [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2704 [0xf1] = MMX_OP2(psllw),
2705 [0xf2] = MMX_OP2(pslld),
2706 [0xf3] = MMX_OP2(psllq),
2707 [0xf4] = MMX_OP2(pmuludq),
2708 [0xf5] = MMX_OP2(pmaddwd),
2709 [0xf6] = MMX_OP2(psadbw),
2710 [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2711 (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2712 [0xf8] = MMX_OP2(psubb),
2713 [0xf9] = MMX_OP2(psubw),
2714 [0xfa] = MMX_OP2(psubl),
2715 [0xfb] = MMX_OP2(psubq),
2716 [0xfc] = MMX_OP2(paddb),
2717 [0xfd] = MMX_OP2(paddw),
2718 [0xfe] = MMX_OP2(paddl),
2719 };
2720
2721 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2722 [0 + 2] = MMX_OP2(psrlw),
2723 [0 + 4] = MMX_OP2(psraw),
2724 [0 + 6] = MMX_OP2(psllw),
2725 [8 + 2] = MMX_OP2(psrld),
2726 [8 + 4] = MMX_OP2(psrad),
2727 [8 + 6] = MMX_OP2(pslld),
2728 [16 + 2] = MMX_OP2(psrlq),
2729 [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2730 [16 + 6] = MMX_OP2(psllq),
2731 [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2732 };
2733
2734 static const SSEFunc_0_epi sse_op_table3ai[] = {
2735 gen_helper_cvtsi2ss,
2736 gen_helper_cvtsi2sd
2737 };
2738
2739 #ifdef TARGET_X86_64
2740 static const SSEFunc_0_epl sse_op_table3aq[] = {
2741 gen_helper_cvtsq2ss,
2742 gen_helper_cvtsq2sd
2743 };
2744 #endif
2745
2746 static const SSEFunc_i_ep sse_op_table3bi[] = {
2747 gen_helper_cvttss2si,
2748 gen_helper_cvtss2si,
2749 gen_helper_cvttsd2si,
2750 gen_helper_cvtsd2si
2751 };
2752
2753 #ifdef TARGET_X86_64
2754 static const SSEFunc_l_ep sse_op_table3bq[] = {
2755 gen_helper_cvttss2sq,
2756 gen_helper_cvtss2sq,
2757 gen_helper_cvttsd2sq,
2758 gen_helper_cvtsd2sq
2759 };
2760 #endif
2761
2762 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2763 SSE_FOP(cmpeq),
2764 SSE_FOP(cmplt),
2765 SSE_FOP(cmple),
2766 SSE_FOP(cmpunord),
2767 SSE_FOP(cmpneq),
2768 SSE_FOP(cmpnlt),
2769 SSE_FOP(cmpnle),
2770 SSE_FOP(cmpord),
2771 };
2772
2773 static const SSEFunc_0_epp sse_op_table5[256] = {
2774 [0x0c] = gen_helper_pi2fw,
2775 [0x0d] = gen_helper_pi2fd,
2776 [0x1c] = gen_helper_pf2iw,
2777 [0x1d] = gen_helper_pf2id,
2778 [0x8a] = gen_helper_pfnacc,
2779 [0x8e] = gen_helper_pfpnacc,
2780 [0x90] = gen_helper_pfcmpge,
2781 [0x94] = gen_helper_pfmin,
2782 [0x96] = gen_helper_pfrcp,
2783 [0x97] = gen_helper_pfrsqrt,
2784 [0x9a] = gen_helper_pfsub,
2785 [0x9e] = gen_helper_pfadd,
2786 [0xa0] = gen_helper_pfcmpgt,
2787 [0xa4] = gen_helper_pfmax,
2788 [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2789 [0xa7] = gen_helper_movq, /* pfrsqit1 */
2790 [0xaa] = gen_helper_pfsubr,
2791 [0xae] = gen_helper_pfacc,
2792 [0xb0] = gen_helper_pfcmpeq,
2793 [0xb4] = gen_helper_pfmul,
2794 [0xb6] = gen_helper_movq, /* pfrcpit2 */
2795 [0xb7] = gen_helper_pmulhrw_mmx,
2796 [0xbb] = gen_helper_pswapd,
2797 [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2798 };
2799
2800 struct SSEOpHelper_epp {
2801 SSEFunc_0_epp op[2];
2802 uint32_t ext_mask;
2803 };
2804
2805 struct SSEOpHelper_eppi {
2806 SSEFunc_0_eppi op[2];
2807 uint32_t ext_mask;
2808 };
2809
2810 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2811 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2812 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2813 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2814 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2815 CPUID_EXT_PCLMULQDQ }
2816 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2817
2818 static const struct SSEOpHelper_epp sse_op_table6[256] = {
2819 [0x00] = SSSE3_OP(pshufb),
2820 [0x01] = SSSE3_OP(phaddw),
2821 [0x02] = SSSE3_OP(phaddd),
2822 [0x03] = SSSE3_OP(phaddsw),
2823 [0x04] = SSSE3_OP(pmaddubsw),
2824 [0x05] = SSSE3_OP(phsubw),
2825 [0x06] = SSSE3_OP(phsubd),
2826 [0x07] = SSSE3_OP(phsubsw),
2827 [0x08] = SSSE3_OP(psignb),
2828 [0x09] = SSSE3_OP(psignw),
2829 [0x0a] = SSSE3_OP(psignd),
2830 [0x0b] = SSSE3_OP(pmulhrsw),
2831 [0x10] = SSE41_OP(pblendvb),
2832 [0x14] = SSE41_OP(blendvps),
2833 [0x15] = SSE41_OP(blendvpd),
2834 [0x17] = SSE41_OP(ptest),
2835 [0x1c] = SSSE3_OP(pabsb),
2836 [0x1d] = SSSE3_OP(pabsw),
2837 [0x1e] = SSSE3_OP(pabsd),
2838 [0x20] = SSE41_OP(pmovsxbw),
2839 [0x21] = SSE41_OP(pmovsxbd),
2840 [0x22] = SSE41_OP(pmovsxbq),
2841 [0x23] = SSE41_OP(pmovsxwd),
2842 [0x24] = SSE41_OP(pmovsxwq),
2843 [0x25] = SSE41_OP(pmovsxdq),
2844 [0x28] = SSE41_OP(pmuldq),
2845 [0x29] = SSE41_OP(pcmpeqq),
2846 [0x2a] = SSE41_SPECIAL, /* movntqda */
2847 [0x2b] = SSE41_OP(packusdw),
2848 [0x30] = SSE41_OP(pmovzxbw),
2849 [0x31] = SSE41_OP(pmovzxbd),
2850 [0x32] = SSE41_OP(pmovzxbq),
2851 [0x33] = SSE41_OP(pmovzxwd),
2852 [0x34] = SSE41_OP(pmovzxwq),
2853 [0x35] = SSE41_OP(pmovzxdq),
2854 [0x37] = SSE42_OP(pcmpgtq),
2855 [0x38] = SSE41_OP(pminsb),
2856 [0x39] = SSE41_OP(pminsd),
2857 [0x3a] = SSE41_OP(pminuw),
2858 [0x3b] = SSE41_OP(pminud),
2859 [0x3c] = SSE41_OP(pmaxsb),
2860 [0x3d] = SSE41_OP(pmaxsd),
2861 [0x3e] = SSE41_OP(pmaxuw),
2862 [0x3f] = SSE41_OP(pmaxud),
2863 [0x40] = SSE41_OP(pmulld),
2864 [0x41] = SSE41_OP(phminposuw),
2865 [0xdb] = AESNI_OP(aesimc),
2866 [0xdc] = AESNI_OP(aesenc),
2867 [0xdd] = AESNI_OP(aesenclast),
2868 [0xde] = AESNI_OP(aesdec),
2869 [0xdf] = AESNI_OP(aesdeclast),
2870 };
2871
2872 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2873 [0x08] = SSE41_OP(roundps),
2874 [0x09] = SSE41_OP(roundpd),
2875 [0x0a] = SSE41_OP(roundss),
2876 [0x0b] = SSE41_OP(roundsd),
2877 [0x0c] = SSE41_OP(blendps),
2878 [0x0d] = SSE41_OP(blendpd),
2879 [0x0e] = SSE41_OP(pblendw),
2880 [0x0f] = SSSE3_OP(palignr),
2881 [0x14] = SSE41_SPECIAL, /* pextrb */
2882 [0x15] = SSE41_SPECIAL, /* pextrw */
2883 [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2884 [0x17] = SSE41_SPECIAL, /* extractps */
2885 [0x20] = SSE41_SPECIAL, /* pinsrb */
2886 [0x21] = SSE41_SPECIAL, /* insertps */
2887 [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2888 [0x40] = SSE41_OP(dpps),
2889 [0x41] = SSE41_OP(dppd),
2890 [0x42] = SSE41_OP(mpsadbw),
2891 [0x44] = PCLMULQDQ_OP(pclmulqdq),
2892 [0x60] = SSE42_OP(pcmpestrm),
2893 [0x61] = SSE42_OP(pcmpestri),
2894 [0x62] = SSE42_OP(pcmpistrm),
2895 [0x63] = SSE42_OP(pcmpistri),
2896 [0xdf] = AESNI_OP(aeskeygenassist),
2897 };
2898
2899 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2900 target_ulong pc_start, int rex_r)
2901 {
2902 int b1, op1_offset, op2_offset, is_xmm, val;
2903 int modrm, mod, rm, reg;
2904 SSEFunc_0_epp sse_fn_epp;
2905 SSEFunc_0_eppi sse_fn_eppi;
2906 SSEFunc_0_ppi sse_fn_ppi;
2907 SSEFunc_0_eppt sse_fn_eppt;
2908 TCGMemOp ot;
2909
2910 b &= 0xff;
2911 if (s->prefix & PREFIX_DATA)
2912 b1 = 1;
2913 else if (s->prefix & PREFIX_REPZ)
2914 b1 = 2;
2915 else if (s->prefix & PREFIX_REPNZ)
2916 b1 = 3;
2917 else
2918 b1 = 0;
2919 sse_fn_epp = sse_op_table1[b][b1];
2920 if (!sse_fn_epp) {
2921 goto unknown_op;
2922 }
2923 if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
2924 is_xmm = 1;
2925 } else {
2926 if (b1 == 0) {
2927 /* MMX case */
2928 is_xmm = 0;
2929 } else {
2930 is_xmm = 1;
2931 }
2932 }
2933 /* simple MMX/SSE operation */
2934 if (s->flags & HF_TS_MASK) {
2935 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
2936 return;
2937 }
2938 if (s->flags & HF_EM_MASK) {
2939 illegal_op:
2940 gen_illegal_opcode(s);
2941 return;
2942 }
2943 if (is_xmm
2944 && !(s->flags & HF_OSFXSR_MASK)
2945 && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
2946 goto unknown_op;
2947 }
2948 if (b == 0x0e) {
2949 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
2950 /* If we were fully decoding this we might use illegal_op. */
2951 goto unknown_op;
2952 }
2953 /* femms */
2954 gen_helper_emms(cpu_env);
2955 return;
2956 }
2957 if (b == 0x77) {
2958 /* emms */
2959 gen_helper_emms(cpu_env);
2960 return;
2961 }
2962 /* prepare MMX state (XXX: optimize by storing fptt and fptags in
2963 the static cpu state) */
2964 if (!is_xmm) {
2965 gen_helper_enter_mmx(cpu_env);
2966 }
2967
2968 modrm = cpu_ldub_code(env, s->pc++);
2969 reg = ((modrm >> 3) & 7);
2970 if (is_xmm)
2971 reg |= rex_r;
2972 mod = (modrm >> 6) & 3;
2973 if (sse_fn_epp == SSE_SPECIAL) {
2974 b |= (b1 << 8);
2975 switch(b) {
2976 case 0x0e7: /* movntq */
2977 if (mod == 3) {
2978 goto illegal_op;
2979 }
2980 gen_lea_modrm(env, s, modrm);
2981 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
2982 break;
2983 case 0x1e7: /* movntdq */
2984 case 0x02b: /* movntps */
2985 case 0x12b: /* movntps */
2986 if (mod == 3)
2987 goto illegal_op;
2988 gen_lea_modrm(env, s, modrm);
2989 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2990 break;
2991 case 0x3f0: /* lddqu */
2992 if (mod == 3)
2993 goto illegal_op;
2994 gen_lea_modrm(env, s, modrm);
2995 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2996 break;
2997 case 0x22b: /* movntss */
2998 case 0x32b: /* movntsd */
2999 if (mod == 3)
3000 goto illegal_op;
3001 gen_lea_modrm(env, s, modrm);
3002 if (b1 & 1) {
3003 gen_stq_env_A0(s, offsetof(CPUX86State,
3004 xmm_regs[reg].ZMM_Q(0)));
3005 } else {
3006 tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
3007 xmm_regs[reg].ZMM_L(0)));
3008 gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3009 }
3010 break;
3011 case 0x6e: /* movd mm, ea */
3012 #ifdef TARGET_X86_64
3013 if (s->dflag == MO_64) {
3014 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3015 tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3016 } else
3017 #endif
3018 {
3019 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3020 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3021 offsetof(CPUX86State,fpregs[reg].mmx));
3022 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3023 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3024 }
3025 break;
3026 case 0x16e: /* movd xmm, ea */
3027 #ifdef TARGET_X86_64
3028 if (s->dflag == MO_64) {
3029 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3030 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3031 offsetof(CPUX86State,xmm_regs[reg]));
3032 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
3033 } else
3034 #endif
3035 {
3036 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3037 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3038 offsetof(CPUX86State,xmm_regs[reg]));
3039 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3040 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3041 }
3042 break;
3043 case 0x6f: /* movq mm, ea */
3044 if (mod != 3) {
3045 gen_lea_modrm(env, s, modrm);
3046 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3047 } else {
3048 rm = (modrm & 7);
3049 tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3050 offsetof(CPUX86State,fpregs[rm].mmx));
3051 tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3052 offsetof(CPUX86State,fpregs[reg].mmx));
3053 }
3054 break;
3055 case 0x010: /* movups */
3056 case 0x110: /* movupd */
3057 case 0x028: /* movaps */
3058 case 0x128: /* movapd */
3059 case 0x16f: /* movdqa xmm, ea */
3060 case 0x26f: /* movdqu xmm, ea */
3061 if (mod != 3) {
3062 gen_lea_modrm(env, s, modrm);
3063 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3064 } else {
3065 rm = (modrm & 7) | REX_B(s);
3066 gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3067 offsetof(CPUX86State,xmm_regs[rm]));
3068 }
3069 break;
3070 case 0x210: /* movss xmm, ea */
3071 if (mod != 3) {
3072 gen_lea_modrm(env, s, modrm);
3073 gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3074 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3075 tcg_gen_movi_tl(cpu_T0, 0);
3076 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3077 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3078 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3079 } else {
3080 rm = (modrm & 7) | REX_B(s);
3081 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3082 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3083 }
3084 break;
3085 case 0x310: /* movsd xmm, ea */
3086 if (mod != 3) {
3087 gen_lea_modrm(env, s, modrm);
3088 gen_ldq_env_A0(s, offsetof(CPUX86State,
3089 xmm_regs[reg].ZMM_Q(0)));
3090 tcg_gen_movi_tl(cpu_T0, 0);
3091 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3092 tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3093 } else {
3094 rm = (modrm & 7) | REX_B(s);
3095 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3096 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3097 }
3098 break;
3099 case 0x012: /* movlps */
3100 case 0x112: /* movlpd */
3101 if (mod != 3) {
3102 gen_lea_modrm(env, s, modrm);
3103 gen_ldq_env_A0(s, offsetof(CPUX86State,
3104 xmm_regs[reg].ZMM_Q(0)));
3105 } else {
3106 /* movhlps */
3107 rm = (modrm & 7) | REX_B(s);
3108 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3109 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3110 }
3111 break;
3112 case 0x212: /* movsldup */
3113 if (mod != 3) {
3114 gen_lea_modrm(env, s, modrm);
3115 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3116 } else {
3117 rm = (modrm & 7) | REX_B(s);
3118 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3119 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3120 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3121 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3122 }
3123 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3124 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3125 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3126 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3127 break;
3128 case 0x312: /* movddup */
3129 if (mod != 3) {
3130 gen_lea_modrm(env, s, modrm);
3131 gen_ldq_env_A0(s, offsetof(CPUX86State,
3132 xmm_regs[reg].ZMM_Q(0)));
3133 } else {
3134 rm = (modrm & 7) | REX_B(s);
3135 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3136 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3137 }
3138 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3139 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3140 break;
3141 case 0x016: /* movhps */
3142 case 0x116: /* movhpd */
3143 if (mod != 3) {
3144 gen_lea_modrm(env, s, modrm);
3145 gen_ldq_env_A0(s, offsetof(CPUX86State,
3146 xmm_regs[reg].ZMM_Q(1)));
3147 } else {
3148 /* movlhps */
3149 rm = (modrm & 7) | REX_B(s);
3150 gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3151 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3152 }
3153 break;
3154 case 0x216: /* movshdup */
3155 if (mod != 3) {
3156 gen_lea_modrm(env, s, modrm);
3157 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3158 } else {
3159 rm = (modrm & 7) | REX_B(s);
3160 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3161 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3162 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3163 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3164 }
3165 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3166 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3167 gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),