Merge remote-tracking branch 'remotes/jsnow-gitlab/tags/python-pull-request' into...
[qemu.git] / target / i386 / tcg / translate.c
1 /*
2 * i386 translation
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32
33 #include "trace-tcg.h"
34 #include "exec/log.h"
35
36 #define PREFIX_REPZ 0x01
37 #define PREFIX_REPNZ 0x02
38 #define PREFIX_LOCK 0x04
39 #define PREFIX_DATA 0x08
40 #define PREFIX_ADR 0x10
41 #define PREFIX_VEX 0x20
42 #define PREFIX_REX 0x40
43
44 #ifdef TARGET_X86_64
45 # define ctztl ctz64
46 # define clztl clz64
47 #else
48 # define ctztl ctz32
49 # define clztl clz32
50 #endif
51
52 /* For a switch indexed by MODRM, match all memory operands for a given OP. */
53 #define CASE_MODRM_MEM_OP(OP) \
54 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
55 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
56 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
57
58 #define CASE_MODRM_OP(OP) \
59 case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
60 case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
61 case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
62 case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
63
64 //#define MACRO_TEST 1
65
66 /* global register indexes */
67 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
68 static TCGv_i32 cpu_cc_op;
69 static TCGv cpu_regs[CPU_NB_REGS];
70 static TCGv cpu_seg_base[6];
71 static TCGv_i64 cpu_bndl[4];
72 static TCGv_i64 cpu_bndu[4];
73
74 #include "exec/gen-icount.h"
75
76 typedef struct DisasContext {
77 DisasContextBase base;
78
79 target_ulong pc; /* pc = eip + cs_base */
80 target_ulong pc_start; /* pc at TB entry */
81 target_ulong cs_base; /* base of CS segment */
82
83 MemOp aflag;
84 MemOp dflag;
85
86 int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
87 uint8_t prefix;
88
89 #ifndef CONFIG_USER_ONLY
90 uint8_t cpl; /* code priv level */
91 uint8_t iopl; /* i/o priv level */
92 #endif
93 uint8_t vex_l; /* vex vector length */
94 uint8_t vex_v; /* vex vvvv register, without 1's complement. */
95 uint8_t popl_esp_hack; /* for correct popl with esp base handling */
96 uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
97
98 #ifdef TARGET_X86_64
99 uint8_t rex_r;
100 uint8_t rex_x;
101 uint8_t rex_b;
102 bool rex_w;
103 #endif
104 bool jmp_opt; /* use direct block chaining for direct jumps */
105 bool repz_opt; /* optimize jumps within repz instructions */
106 bool cc_op_dirty;
107
108 CCOp cc_op; /* current CC operation */
109 int mem_index; /* select memory access functions */
110 uint32_t flags; /* all execution flags */
111 int cpuid_features;
112 int cpuid_ext_features;
113 int cpuid_ext2_features;
114 int cpuid_ext3_features;
115 int cpuid_7_0_ebx_features;
116 int cpuid_xsave_features;
117
118 /* TCG local temps */
119 TCGv cc_srcT;
120 TCGv A0;
121 TCGv T0;
122 TCGv T1;
123
124 /* TCG local register indexes (only used inside old micro ops) */
125 TCGv tmp0;
126 TCGv tmp4;
127 TCGv_ptr ptr0;
128 TCGv_ptr ptr1;
129 TCGv_i32 tmp2_i32;
130 TCGv_i32 tmp3_i32;
131 TCGv_i64 tmp1_i64;
132
133 sigjmp_buf jmpbuf;
134 } DisasContext;
135
136 /* The environment in which user-only runs is constrained. */
137 #ifdef CONFIG_USER_ONLY
138 #define PE(S) true
139 #define CPL(S) 3
140 #define IOPL(S) 0
141 #define SVME(S) false
142 #define GUEST(S) false
143 #else
144 #define PE(S) (((S)->flags & HF_PE_MASK) != 0)
145 #define CPL(S) ((S)->cpl)
146 #define IOPL(S) ((S)->iopl)
147 #define SVME(S) (((S)->flags & HF_SVME_MASK) != 0)
148 #define GUEST(S) (((S)->flags & HF_GUEST_MASK) != 0)
149 #endif
150 #if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
151 #define VM86(S) false
152 #define CODE32(S) true
153 #define SS32(S) true
154 #define ADDSEG(S) false
155 #else
156 #define VM86(S) (((S)->flags & HF_VM_MASK) != 0)
157 #define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
158 #define SS32(S) (((S)->flags & HF_SS32_MASK) != 0)
159 #define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
160 #endif
161 #if !defined(TARGET_X86_64)
162 #define CODE64(S) false
163 #define LMA(S) false
164 #elif defined(CONFIG_USER_ONLY)
165 #define CODE64(S) true
166 #define LMA(S) true
167 #else
168 #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
169 #define LMA(S) (((S)->flags & HF_LMA_MASK) != 0)
170 #endif
171
172 #ifdef TARGET_X86_64
173 #define REX_PREFIX(S) (((S)->prefix & PREFIX_REX) != 0)
174 #define REX_W(S) ((S)->rex_w)
175 #define REX_R(S) ((S)->rex_r + 0)
176 #define REX_X(S) ((S)->rex_x + 0)
177 #define REX_B(S) ((S)->rex_b + 0)
178 #else
179 #define REX_PREFIX(S) false
180 #define REX_W(S) false
181 #define REX_R(S) 0
182 #define REX_X(S) 0
183 #define REX_B(S) 0
184 #endif
185
186 /*
187 * Many sysemu-only helpers are not reachable for user-only.
188 * Define stub generators here, so that we need not either sprinkle
189 * ifdefs through the translator, nor provide the helper function.
190 */
191 #define STUB_HELPER(NAME, ...) \
192 static inline void gen_helper_##NAME(__VA_ARGS__) \
193 { qemu_build_not_reached(); }
194
195 #ifdef CONFIG_USER_ONLY
196 STUB_HELPER(clgi, TCGv_env env)
197 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
198 STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
199 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
200 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
201 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
202 STUB_HELPER(monitor, TCGv_env env, TCGv addr)
203 STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
204 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
205 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
206 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
207 STUB_HELPER(rdmsr, TCGv_env env)
208 STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
209 STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
210 STUB_HELPER(stgi, TCGv_env env)
211 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
212 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
213 STUB_HELPER(vmmcall, TCGv_env env)
214 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
215 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
216 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
217 STUB_HELPER(wrmsr, TCGv_env env)
218 #endif
219
220 static void gen_eob(DisasContext *s);
221 static void gen_jr(DisasContext *s, TCGv dest);
222 static void gen_jmp(DisasContext *s, target_ulong eip);
223 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
224 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
225 static void gen_exception_gpf(DisasContext *s);
226
227 /* i386 arith/logic operations */
228 enum {
229 OP_ADDL,
230 OP_ORL,
231 OP_ADCL,
232 OP_SBBL,
233 OP_ANDL,
234 OP_SUBL,
235 OP_XORL,
236 OP_CMPL,
237 };
238
239 /* i386 shift ops */
240 enum {
241 OP_ROL,
242 OP_ROR,
243 OP_RCL,
244 OP_RCR,
245 OP_SHL,
246 OP_SHR,
247 OP_SHL1, /* undocumented */
248 OP_SAR = 7,
249 };
250
251 enum {
252 JCC_O,
253 JCC_B,
254 JCC_Z,
255 JCC_BE,
256 JCC_S,
257 JCC_P,
258 JCC_L,
259 JCC_LE,
260 };
261
262 enum {
263 /* I386 int registers */
264 OR_EAX, /* MUST be even numbered */
265 OR_ECX,
266 OR_EDX,
267 OR_EBX,
268 OR_ESP,
269 OR_EBP,
270 OR_ESI,
271 OR_EDI,
272
273 OR_TMP0 = 16, /* temporary operand register */
274 OR_TMP1,
275 OR_A0, /* temporary register used when doing address evaluation */
276 };
277
278 enum {
279 USES_CC_DST = 1,
280 USES_CC_SRC = 2,
281 USES_CC_SRC2 = 4,
282 USES_CC_SRCT = 8,
283 };
284
285 /* Bit set if the global variable is live after setting CC_OP to X. */
286 static const uint8_t cc_op_live[CC_OP_NB] = {
287 [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
288 [CC_OP_EFLAGS] = USES_CC_SRC,
289 [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
290 [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
291 [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
292 [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
293 [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
294 [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
295 [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
296 [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
297 [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
298 [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
299 [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
300 [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
301 [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
302 [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
303 [CC_OP_CLR] = 0,
304 [CC_OP_POPCNT] = USES_CC_SRC,
305 };
306
307 static void set_cc_op(DisasContext *s, CCOp op)
308 {
309 int dead;
310
311 if (s->cc_op == op) {
312 return;
313 }
314
315 /* Discard CC computation that will no longer be used. */
316 dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
317 if (dead & USES_CC_DST) {
318 tcg_gen_discard_tl(cpu_cc_dst);
319 }
320 if (dead & USES_CC_SRC) {
321 tcg_gen_discard_tl(cpu_cc_src);
322 }
323 if (dead & USES_CC_SRC2) {
324 tcg_gen_discard_tl(cpu_cc_src2);
325 }
326 if (dead & USES_CC_SRCT) {
327 tcg_gen_discard_tl(s->cc_srcT);
328 }
329
330 if (op == CC_OP_DYNAMIC) {
331 /* The DYNAMIC setting is translator only, and should never be
332 stored. Thus we always consider it clean. */
333 s->cc_op_dirty = false;
334 } else {
335 /* Discard any computed CC_OP value (see shifts). */
336 if (s->cc_op == CC_OP_DYNAMIC) {
337 tcg_gen_discard_i32(cpu_cc_op);
338 }
339 s->cc_op_dirty = true;
340 }
341 s->cc_op = op;
342 }
343
344 static void gen_update_cc_op(DisasContext *s)
345 {
346 if (s->cc_op_dirty) {
347 tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
348 s->cc_op_dirty = false;
349 }
350 }
351
352 #ifdef TARGET_X86_64
353
354 #define NB_OP_SIZES 4
355
356 #else /* !TARGET_X86_64 */
357
358 #define NB_OP_SIZES 3
359
360 #endif /* !TARGET_X86_64 */
361
362 #if defined(HOST_WORDS_BIGENDIAN)
363 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
364 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
365 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
366 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
367 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
368 #else
369 #define REG_B_OFFSET 0
370 #define REG_H_OFFSET 1
371 #define REG_W_OFFSET 0
372 #define REG_L_OFFSET 0
373 #define REG_LH_OFFSET 4
374 #endif
375
376 /* In instruction encodings for byte register accesses the
377 * register number usually indicates "low 8 bits of register N";
378 * however there are some special cases where N 4..7 indicates
379 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
380 * true for this special case, false otherwise.
381 */
382 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
383 {
384 /* Any time the REX prefix is present, byte registers are uniform */
385 if (reg < 4 || REX_PREFIX(s)) {
386 return false;
387 }
388 return true;
389 }
390
391 /* Select the size of a push/pop operation. */
392 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
393 {
394 if (CODE64(s)) {
395 return ot == MO_16 ? MO_16 : MO_64;
396 } else {
397 return ot;
398 }
399 }
400
401 /* Select the size of the stack pointer. */
402 static inline MemOp mo_stacksize(DisasContext *s)
403 {
404 return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
405 }
406
407 /* Select only size 64 else 32. Used for SSE operand sizes. */
408 static inline MemOp mo_64_32(MemOp ot)
409 {
410 #ifdef TARGET_X86_64
411 return ot == MO_64 ? MO_64 : MO_32;
412 #else
413 return MO_32;
414 #endif
415 }
416
417 /* Select size 8 if lsb of B is clear, else OT. Used for decoding
418 byte vs word opcodes. */
419 static inline MemOp mo_b_d(int b, MemOp ot)
420 {
421 return b & 1 ? ot : MO_8;
422 }
423
424 /* Select size 8 if lsb of B is clear, else OT capped at 32.
425 Used for decoding operand size of port opcodes. */
426 static inline MemOp mo_b_d32(int b, MemOp ot)
427 {
428 return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
429 }
430
431 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
432 {
433 switch(ot) {
434 case MO_8:
435 if (!byte_reg_is_xH(s, reg)) {
436 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
437 } else {
438 tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
439 }
440 break;
441 case MO_16:
442 tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
443 break;
444 case MO_32:
445 /* For x86_64, this sets the higher half of register to zero.
446 For i386, this is equivalent to a mov. */
447 tcg_gen_ext32u_tl(cpu_regs[reg], t0);
448 break;
449 #ifdef TARGET_X86_64
450 case MO_64:
451 tcg_gen_mov_tl(cpu_regs[reg], t0);
452 break;
453 #endif
454 default:
455 tcg_abort();
456 }
457 }
458
459 static inline
460 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
461 {
462 if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
463 tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
464 } else {
465 tcg_gen_mov_tl(t0, cpu_regs[reg]);
466 }
467 }
468
469 static void gen_add_A0_im(DisasContext *s, int val)
470 {
471 tcg_gen_addi_tl(s->A0, s->A0, val);
472 if (!CODE64(s)) {
473 tcg_gen_ext32u_tl(s->A0, s->A0);
474 }
475 }
476
477 static inline void gen_op_jmp_v(TCGv dest)
478 {
479 tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
480 }
481
482 static inline
483 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
484 {
485 tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
486 gen_op_mov_reg_v(s, size, reg, s->tmp0);
487 }
488
489 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
490 {
491 tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
492 gen_op_mov_reg_v(s, size, reg, s->tmp0);
493 }
494
495 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
496 {
497 tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
498 }
499
500 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
501 {
502 tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
503 }
504
505 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
506 {
507 if (d == OR_TMP0) {
508 gen_op_st_v(s, idx, s->T0, s->A0);
509 } else {
510 gen_op_mov_reg_v(s, idx, d, s->T0);
511 }
512 }
513
514 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
515 {
516 tcg_gen_movi_tl(s->tmp0, pc);
517 gen_op_jmp_v(s->tmp0);
518 }
519
520 /* Compute SEG:REG into A0. SEG is selected from the override segment
521 (OVR_SEG) and the default segment (DEF_SEG). OVR_SEG may be -1 to
522 indicate no override. */
523 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
524 int def_seg, int ovr_seg)
525 {
526 switch (aflag) {
527 #ifdef TARGET_X86_64
528 case MO_64:
529 if (ovr_seg < 0) {
530 tcg_gen_mov_tl(s->A0, a0);
531 return;
532 }
533 break;
534 #endif
535 case MO_32:
536 /* 32 bit address */
537 if (ovr_seg < 0 && ADDSEG(s)) {
538 ovr_seg = def_seg;
539 }
540 if (ovr_seg < 0) {
541 tcg_gen_ext32u_tl(s->A0, a0);
542 return;
543 }
544 break;
545 case MO_16:
546 /* 16 bit address */
547 tcg_gen_ext16u_tl(s->A0, a0);
548 a0 = s->A0;
549 if (ovr_seg < 0) {
550 if (ADDSEG(s)) {
551 ovr_seg = def_seg;
552 } else {
553 return;
554 }
555 }
556 break;
557 default:
558 tcg_abort();
559 }
560
561 if (ovr_seg >= 0) {
562 TCGv seg = cpu_seg_base[ovr_seg];
563
564 if (aflag == MO_64) {
565 tcg_gen_add_tl(s->A0, a0, seg);
566 } else if (CODE64(s)) {
567 tcg_gen_ext32u_tl(s->A0, a0);
568 tcg_gen_add_tl(s->A0, s->A0, seg);
569 } else {
570 tcg_gen_add_tl(s->A0, a0, seg);
571 tcg_gen_ext32u_tl(s->A0, s->A0);
572 }
573 }
574 }
575
576 static inline void gen_string_movl_A0_ESI(DisasContext *s)
577 {
578 gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
579 }
580
581 static inline void gen_string_movl_A0_EDI(DisasContext *s)
582 {
583 gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
584 }
585
586 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
587 {
588 tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
589 tcg_gen_shli_tl(s->T0, s->T0, ot);
590 };
591
592 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
593 {
594 switch (size) {
595 case MO_8:
596 if (sign) {
597 tcg_gen_ext8s_tl(dst, src);
598 } else {
599 tcg_gen_ext8u_tl(dst, src);
600 }
601 return dst;
602 case MO_16:
603 if (sign) {
604 tcg_gen_ext16s_tl(dst, src);
605 } else {
606 tcg_gen_ext16u_tl(dst, src);
607 }
608 return dst;
609 #ifdef TARGET_X86_64
610 case MO_32:
611 if (sign) {
612 tcg_gen_ext32s_tl(dst, src);
613 } else {
614 tcg_gen_ext32u_tl(dst, src);
615 }
616 return dst;
617 #endif
618 default:
619 return src;
620 }
621 }
622
623 static void gen_extu(MemOp ot, TCGv reg)
624 {
625 gen_ext_tl(reg, reg, ot, false);
626 }
627
628 static void gen_exts(MemOp ot, TCGv reg)
629 {
630 gen_ext_tl(reg, reg, ot, true);
631 }
632
633 static inline
634 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
635 {
636 tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
637 gen_extu(size, s->tmp0);
638 tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
639 }
640
641 static inline
642 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
643 {
644 tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
645 gen_extu(size, s->tmp0);
646 tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
647 }
648
649 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
650 {
651 switch (ot) {
652 case MO_8:
653 gen_helper_inb(v, cpu_env, n);
654 break;
655 case MO_16:
656 gen_helper_inw(v, cpu_env, n);
657 break;
658 case MO_32:
659 gen_helper_inl(v, cpu_env, n);
660 break;
661 default:
662 tcg_abort();
663 }
664 }
665
666 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
667 {
668 switch (ot) {
669 case MO_8:
670 gen_helper_outb(cpu_env, v, n);
671 break;
672 case MO_16:
673 gen_helper_outw(cpu_env, v, n);
674 break;
675 case MO_32:
676 gen_helper_outl(cpu_env, v, n);
677 break;
678 default:
679 tcg_abort();
680 }
681 }
682
683 /*
684 * Validate that access to [port, port + 1<<ot) is allowed.
685 * Raise #GP, or VMM exit if not.
686 */
687 static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
688 uint32_t svm_flags)
689 {
690 #ifdef CONFIG_USER_ONLY
691 /*
692 * We do not implement the ioperm(2) syscall, so the TSS check
693 * will always fail.
694 */
695 gen_exception_gpf(s);
696 return false;
697 #else
698 if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
699 gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
700 }
701 if (GUEST(s)) {
702 target_ulong cur_eip = s->base.pc_next - s->cs_base;
703 target_ulong next_eip = s->pc - s->cs_base;
704
705 gen_update_cc_op(s);
706 gen_jmp_im(s, cur_eip);
707 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
708 svm_flags |= SVM_IOIO_REP_MASK;
709 }
710 svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
711 gen_helper_svm_check_io(cpu_env, port,
712 tcg_constant_i32(svm_flags),
713 tcg_constant_i32(next_eip - cur_eip));
714 }
715 return true;
716 #endif
717 }
718
719 static inline void gen_movs(DisasContext *s, MemOp ot)
720 {
721 gen_string_movl_A0_ESI(s);
722 gen_op_ld_v(s, ot, s->T0, s->A0);
723 gen_string_movl_A0_EDI(s);
724 gen_op_st_v(s, ot, s->T0, s->A0);
725 gen_op_movl_T0_Dshift(s, ot);
726 gen_op_add_reg_T0(s, s->aflag, R_ESI);
727 gen_op_add_reg_T0(s, s->aflag, R_EDI);
728 }
729
730 static void gen_op_update1_cc(DisasContext *s)
731 {
732 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
733 }
734
735 static void gen_op_update2_cc(DisasContext *s)
736 {
737 tcg_gen_mov_tl(cpu_cc_src, s->T1);
738 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
739 }
740
741 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
742 {
743 tcg_gen_mov_tl(cpu_cc_src2, reg);
744 tcg_gen_mov_tl(cpu_cc_src, s->T1);
745 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
746 }
747
748 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
749 {
750 tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
751 }
752
753 static void gen_op_update_neg_cc(DisasContext *s)
754 {
755 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
756 tcg_gen_neg_tl(cpu_cc_src, s->T0);
757 tcg_gen_movi_tl(s->cc_srcT, 0);
758 }
759
760 /* compute all eflags to cc_src */
761 static void gen_compute_eflags(DisasContext *s)
762 {
763 TCGv zero, dst, src1, src2;
764 int live, dead;
765
766 if (s->cc_op == CC_OP_EFLAGS) {
767 return;
768 }
769 if (s->cc_op == CC_OP_CLR) {
770 tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
771 set_cc_op(s, CC_OP_EFLAGS);
772 return;
773 }
774
775 zero = NULL;
776 dst = cpu_cc_dst;
777 src1 = cpu_cc_src;
778 src2 = cpu_cc_src2;
779
780 /* Take care to not read values that are not live. */
781 live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
782 dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
783 if (dead) {
784 zero = tcg_const_tl(0);
785 if (dead & USES_CC_DST) {
786 dst = zero;
787 }
788 if (dead & USES_CC_SRC) {
789 src1 = zero;
790 }
791 if (dead & USES_CC_SRC2) {
792 src2 = zero;
793 }
794 }
795
796 gen_update_cc_op(s);
797 gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
798 set_cc_op(s, CC_OP_EFLAGS);
799
800 if (dead) {
801 tcg_temp_free(zero);
802 }
803 }
804
805 typedef struct CCPrepare {
806 TCGCond cond;
807 TCGv reg;
808 TCGv reg2;
809 target_ulong imm;
810 target_ulong mask;
811 bool use_reg2;
812 bool no_setcond;
813 } CCPrepare;
814
815 /* compute eflags.C to reg */
816 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
817 {
818 TCGv t0, t1;
819 int size, shift;
820
821 switch (s->cc_op) {
822 case CC_OP_SUBB ... CC_OP_SUBQ:
823 /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
824 size = s->cc_op - CC_OP_SUBB;
825 t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
826 /* If no temporary was used, be careful not to alias t1 and t0. */
827 t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
828 tcg_gen_mov_tl(t0, s->cc_srcT);
829 gen_extu(size, t0);
830 goto add_sub;
831
832 case CC_OP_ADDB ... CC_OP_ADDQ:
833 /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
834 size = s->cc_op - CC_OP_ADDB;
835 t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
836 t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
837 add_sub:
838 return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
839 .reg2 = t1, .mask = -1, .use_reg2 = true };
840
841 case CC_OP_LOGICB ... CC_OP_LOGICQ:
842 case CC_OP_CLR:
843 case CC_OP_POPCNT:
844 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
845
846 case CC_OP_INCB ... CC_OP_INCQ:
847 case CC_OP_DECB ... CC_OP_DECQ:
848 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
849 .mask = -1, .no_setcond = true };
850
851 case CC_OP_SHLB ... CC_OP_SHLQ:
852 /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
853 size = s->cc_op - CC_OP_SHLB;
854 shift = (8 << size) - 1;
855 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
856 .mask = (target_ulong)1 << shift };
857
858 case CC_OP_MULB ... CC_OP_MULQ:
859 return (CCPrepare) { .cond = TCG_COND_NE,
860 .reg = cpu_cc_src, .mask = -1 };
861
862 case CC_OP_BMILGB ... CC_OP_BMILGQ:
863 size = s->cc_op - CC_OP_BMILGB;
864 t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
865 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
866
867 case CC_OP_ADCX:
868 case CC_OP_ADCOX:
869 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
870 .mask = -1, .no_setcond = true };
871
872 case CC_OP_EFLAGS:
873 case CC_OP_SARB ... CC_OP_SARQ:
874 /* CC_SRC & 1 */
875 return (CCPrepare) { .cond = TCG_COND_NE,
876 .reg = cpu_cc_src, .mask = CC_C };
877
878 default:
879 /* The need to compute only C from CC_OP_DYNAMIC is important
880 in efficiently implementing e.g. INC at the start of a TB. */
881 gen_update_cc_op(s);
882 gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
883 cpu_cc_src2, cpu_cc_op);
884 return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
885 .mask = -1, .no_setcond = true };
886 }
887 }
888
889 /* compute eflags.P to reg */
890 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
891 {
892 gen_compute_eflags(s);
893 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
894 .mask = CC_P };
895 }
896
897 /* compute eflags.S to reg */
898 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
899 {
900 switch (s->cc_op) {
901 case CC_OP_DYNAMIC:
902 gen_compute_eflags(s);
903 /* FALLTHRU */
904 case CC_OP_EFLAGS:
905 case CC_OP_ADCX:
906 case CC_OP_ADOX:
907 case CC_OP_ADCOX:
908 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
909 .mask = CC_S };
910 case CC_OP_CLR:
911 case CC_OP_POPCNT:
912 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
913 default:
914 {
915 MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
916 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
917 return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
918 }
919 }
920 }
921
922 /* compute eflags.O to reg */
923 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
924 {
925 switch (s->cc_op) {
926 case CC_OP_ADOX:
927 case CC_OP_ADCOX:
928 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
929 .mask = -1, .no_setcond = true };
930 case CC_OP_CLR:
931 case CC_OP_POPCNT:
932 return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
933 default:
934 gen_compute_eflags(s);
935 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
936 .mask = CC_O };
937 }
938 }
939
940 /* compute eflags.Z to reg */
941 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
942 {
943 switch (s->cc_op) {
944 case CC_OP_DYNAMIC:
945 gen_compute_eflags(s);
946 /* FALLTHRU */
947 case CC_OP_EFLAGS:
948 case CC_OP_ADCX:
949 case CC_OP_ADOX:
950 case CC_OP_ADCOX:
951 return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
952 .mask = CC_Z };
953 case CC_OP_CLR:
954 return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
955 case CC_OP_POPCNT:
956 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
957 .mask = -1 };
958 default:
959 {
960 MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
961 TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
962 return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
963 }
964 }
965 }
966
967 /* perform a conditional store into register 'reg' according to jump opcode
968 value 'b'. In the fast case, T0 is guaranted not to be used. */
969 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
970 {
971 int inv, jcc_op, cond;
972 MemOp size;
973 CCPrepare cc;
974 TCGv t0;
975
976 inv = b & 1;
977 jcc_op = (b >> 1) & 7;
978
979 switch (s->cc_op) {
980 case CC_OP_SUBB ... CC_OP_SUBQ:
981 /* We optimize relational operators for the cmp/jcc case. */
982 size = s->cc_op - CC_OP_SUBB;
983 switch (jcc_op) {
984 case JCC_BE:
985 tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
986 gen_extu(size, s->tmp4);
987 t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
988 cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
989 .reg2 = t0, .mask = -1, .use_reg2 = true };
990 break;
991
992 case JCC_L:
993 cond = TCG_COND_LT;
994 goto fast_jcc_l;
995 case JCC_LE:
996 cond = TCG_COND_LE;
997 fast_jcc_l:
998 tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
999 gen_exts(size, s->tmp4);
1000 t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1001 cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1002 .reg2 = t0, .mask = -1, .use_reg2 = true };
1003 break;
1004
1005 default:
1006 goto slow_jcc;
1007 }
1008 break;
1009
1010 default:
1011 slow_jcc:
1012 /* This actually generates good code for JC, JZ and JS. */
1013 switch (jcc_op) {
1014 case JCC_O:
1015 cc = gen_prepare_eflags_o(s, reg);
1016 break;
1017 case JCC_B:
1018 cc = gen_prepare_eflags_c(s, reg);
1019 break;
1020 case JCC_Z:
1021 cc = gen_prepare_eflags_z(s, reg);
1022 break;
1023 case JCC_BE:
1024 gen_compute_eflags(s);
1025 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1026 .mask = CC_Z | CC_C };
1027 break;
1028 case JCC_S:
1029 cc = gen_prepare_eflags_s(s, reg);
1030 break;
1031 case JCC_P:
1032 cc = gen_prepare_eflags_p(s, reg);
1033 break;
1034 case JCC_L:
1035 gen_compute_eflags(s);
1036 if (reg == cpu_cc_src) {
1037 reg = s->tmp0;
1038 }
1039 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1040 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1041 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1042 .mask = CC_S };
1043 break;
1044 default:
1045 case JCC_LE:
1046 gen_compute_eflags(s);
1047 if (reg == cpu_cc_src) {
1048 reg = s->tmp0;
1049 }
1050 tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1051 tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1052 cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1053 .mask = CC_S | CC_Z };
1054 break;
1055 }
1056 break;
1057 }
1058
1059 if (inv) {
1060 cc.cond = tcg_invert_cond(cc.cond);
1061 }
1062 return cc;
1063 }
1064
1065 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1066 {
1067 CCPrepare cc = gen_prepare_cc(s, b, reg);
1068
1069 if (cc.no_setcond) {
1070 if (cc.cond == TCG_COND_EQ) {
1071 tcg_gen_xori_tl(reg, cc.reg, 1);
1072 } else {
1073 tcg_gen_mov_tl(reg, cc.reg);
1074 }
1075 return;
1076 }
1077
1078 if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1079 cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1080 tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1081 tcg_gen_andi_tl(reg, reg, 1);
1082 return;
1083 }
1084 if (cc.mask != -1) {
1085 tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1086 cc.reg = reg;
1087 }
1088 if (cc.use_reg2) {
1089 tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1090 } else {
1091 tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1092 }
1093 }
1094
1095 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1096 {
1097 gen_setcc1(s, JCC_B << 1, reg);
1098 }
1099
1100 /* generate a conditional jump to label 'l1' according to jump opcode
1101 value 'b'. In the fast case, T0 is guaranted not to be used. */
1102 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1103 {
1104 CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1105
1106 if (cc.mask != -1) {
1107 tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1108 cc.reg = s->T0;
1109 }
1110 if (cc.use_reg2) {
1111 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1112 } else {
1113 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1114 }
1115 }
1116
1117 /* Generate a conditional jump to label 'l1' according to jump opcode
1118 value 'b'. In the fast case, T0 is guaranted not to be used.
1119 A translation block must end soon. */
1120 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1121 {
1122 CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1123
1124 gen_update_cc_op(s);
1125 if (cc.mask != -1) {
1126 tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1127 cc.reg = s->T0;
1128 }
1129 set_cc_op(s, CC_OP_DYNAMIC);
1130 if (cc.use_reg2) {
1131 tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1132 } else {
1133 tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1134 }
1135 }
1136
1137 /* XXX: does not work with gdbstub "ice" single step - not a
1138 serious problem */
1139 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1140 {
1141 TCGLabel *l1 = gen_new_label();
1142 TCGLabel *l2 = gen_new_label();
1143 gen_op_jnz_ecx(s, s->aflag, l1);
1144 gen_set_label(l2);
1145 gen_jmp_tb(s, next_eip, 1);
1146 gen_set_label(l1);
1147 return l2;
1148 }
1149
1150 static inline void gen_stos(DisasContext *s, MemOp ot)
1151 {
1152 gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1153 gen_string_movl_A0_EDI(s);
1154 gen_op_st_v(s, ot, s->T0, s->A0);
1155 gen_op_movl_T0_Dshift(s, ot);
1156 gen_op_add_reg_T0(s, s->aflag, R_EDI);
1157 }
1158
1159 static inline void gen_lods(DisasContext *s, MemOp ot)
1160 {
1161 gen_string_movl_A0_ESI(s);
1162 gen_op_ld_v(s, ot, s->T0, s->A0);
1163 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1164 gen_op_movl_T0_Dshift(s, ot);
1165 gen_op_add_reg_T0(s, s->aflag, R_ESI);
1166 }
1167
1168 static inline void gen_scas(DisasContext *s, MemOp ot)
1169 {
1170 gen_string_movl_A0_EDI(s);
1171 gen_op_ld_v(s, ot, s->T1, s->A0);
1172 gen_op(s, OP_CMPL, ot, R_EAX);
1173 gen_op_movl_T0_Dshift(s, ot);
1174 gen_op_add_reg_T0(s, s->aflag, R_EDI);
1175 }
1176
1177 static inline void gen_cmps(DisasContext *s, MemOp ot)
1178 {
1179 gen_string_movl_A0_EDI(s);
1180 gen_op_ld_v(s, ot, s->T1, s->A0);
1181 gen_string_movl_A0_ESI(s);
1182 gen_op(s, OP_CMPL, ot, OR_TMP0);
1183 gen_op_movl_T0_Dshift(s, ot);
1184 gen_op_add_reg_T0(s, s->aflag, R_ESI);
1185 gen_op_add_reg_T0(s, s->aflag, R_EDI);
1186 }
1187
1188 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1189 {
1190 if (s->flags & HF_IOBPT_MASK) {
1191 #ifdef CONFIG_USER_ONLY
1192 /* user-mode cpu should not be in IOBPT mode */
1193 g_assert_not_reached();
1194 #else
1195 TCGv_i32 t_size = tcg_const_i32(1 << ot);
1196 TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1197
1198 gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1199 tcg_temp_free_i32(t_size);
1200 tcg_temp_free(t_next);
1201 #endif /* CONFIG_USER_ONLY */
1202 }
1203 }
1204
1205 static inline void gen_ins(DisasContext *s, MemOp ot)
1206 {
1207 gen_string_movl_A0_EDI(s);
1208 /* Note: we must do this dummy write first to be restartable in
1209 case of page fault. */
1210 tcg_gen_movi_tl(s->T0, 0);
1211 gen_op_st_v(s, ot, s->T0, s->A0);
1212 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1213 tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1214 gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1215 gen_op_st_v(s, ot, s->T0, s->A0);
1216 gen_op_movl_T0_Dshift(s, ot);
1217 gen_op_add_reg_T0(s, s->aflag, R_EDI);
1218 gen_bpt_io(s, s->tmp2_i32, ot);
1219 }
1220
1221 static inline void gen_outs(DisasContext *s, MemOp ot)
1222 {
1223 gen_string_movl_A0_ESI(s);
1224 gen_op_ld_v(s, ot, s->T0, s->A0);
1225
1226 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1227 tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1228 tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1229 gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1230 gen_op_movl_T0_Dshift(s, ot);
1231 gen_op_add_reg_T0(s, s->aflag, R_ESI);
1232 gen_bpt_io(s, s->tmp2_i32, ot);
1233 }
1234
1235 /* same method as Valgrind : we generate jumps to current or next
1236 instruction */
1237 #define GEN_REPZ(op) \
1238 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot, \
1239 target_ulong cur_eip, target_ulong next_eip) \
1240 { \
1241 TCGLabel *l2; \
1242 gen_update_cc_op(s); \
1243 l2 = gen_jz_ecx_string(s, next_eip); \
1244 gen_ ## op(s, ot); \
1245 gen_op_add_reg_im(s, s->aflag, R_ECX, -1); \
1246 /* a loop would cause two single step exceptions if ECX = 1 \
1247 before rep string_insn */ \
1248 if (s->repz_opt) \
1249 gen_op_jz_ecx(s, s->aflag, l2); \
1250 gen_jmp(s, cur_eip); \
1251 }
1252
1253 #define GEN_REPZ2(op) \
1254 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot, \
1255 target_ulong cur_eip, \
1256 target_ulong next_eip, \
1257 int nz) \
1258 { \
1259 TCGLabel *l2; \
1260 gen_update_cc_op(s); \
1261 l2 = gen_jz_ecx_string(s, next_eip); \
1262 gen_ ## op(s, ot); \
1263 gen_op_add_reg_im(s, s->aflag, R_ECX, -1); \
1264 gen_update_cc_op(s); \
1265 gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \
1266 if (s->repz_opt) \
1267 gen_op_jz_ecx(s, s->aflag, l2); \
1268 gen_jmp(s, cur_eip); \
1269 }
1270
1271 GEN_REPZ(movs)
1272 GEN_REPZ(stos)
1273 GEN_REPZ(lods)
1274 GEN_REPZ(ins)
1275 GEN_REPZ(outs)
1276 GEN_REPZ2(scas)
1277 GEN_REPZ2(cmps)
1278
1279 static void gen_helper_fp_arith_ST0_FT0(int op)
1280 {
1281 switch (op) {
1282 case 0:
1283 gen_helper_fadd_ST0_FT0(cpu_env);
1284 break;
1285 case 1:
1286 gen_helper_fmul_ST0_FT0(cpu_env);
1287 break;
1288 case 2:
1289 gen_helper_fcom_ST0_FT0(cpu_env);
1290 break;
1291 case 3:
1292 gen_helper_fcom_ST0_FT0(cpu_env);
1293 break;
1294 case 4:
1295 gen_helper_fsub_ST0_FT0(cpu_env);
1296 break;
1297 case 5:
1298 gen_helper_fsubr_ST0_FT0(cpu_env);
1299 break;
1300 case 6:
1301 gen_helper_fdiv_ST0_FT0(cpu_env);
1302 break;
1303 case 7:
1304 gen_helper_fdivr_ST0_FT0(cpu_env);
1305 break;
1306 }
1307 }
1308
1309 /* NOTE the exception in "r" op ordering */
1310 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1311 {
1312 TCGv_i32 tmp = tcg_const_i32(opreg);
1313 switch (op) {
1314 case 0:
1315 gen_helper_fadd_STN_ST0(cpu_env, tmp);
1316 break;
1317 case 1:
1318 gen_helper_fmul_STN_ST0(cpu_env, tmp);
1319 break;
1320 case 4:
1321 gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1322 break;
1323 case 5:
1324 gen_helper_fsub_STN_ST0(cpu_env, tmp);
1325 break;
1326 case 6:
1327 gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1328 break;
1329 case 7:
1330 gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1331 break;
1332 }
1333 }
1334
1335 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1336 {
1337 gen_update_cc_op(s);
1338 gen_jmp_im(s, cur_eip);
1339 gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1340 s->base.is_jmp = DISAS_NORETURN;
1341 }
1342
1343 /* Generate #UD for the current instruction. The assumption here is that
1344 the instruction is known, but it isn't allowed in the current cpu mode. */
1345 static void gen_illegal_opcode(DisasContext *s)
1346 {
1347 gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1348 }
1349
1350 /* Generate #GP for the current instruction. */
1351 static void gen_exception_gpf(DisasContext *s)
1352 {
1353 gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1354 }
1355
1356 /* Check for cpl == 0; if not, raise #GP and return false. */
1357 static bool check_cpl0(DisasContext *s)
1358 {
1359 if (CPL(s) == 0) {
1360 return true;
1361 }
1362 gen_exception_gpf(s);
1363 return false;
1364 }
1365
1366 /* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1367 static bool check_vm86_iopl(DisasContext *s)
1368 {
1369 if (!VM86(s) || IOPL(s) == 3) {
1370 return true;
1371 }
1372 gen_exception_gpf(s);
1373 return false;
1374 }
1375
1376 /* Check for iopl allowing access; if not, raise #GP and return false. */
1377 static bool check_iopl(DisasContext *s)
1378 {
1379 if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1380 return true;
1381 }
1382 gen_exception_gpf(s);
1383 return false;
1384 }
1385
1386 /* if d == OR_TMP0, it means memory operand (address in A0) */
1387 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1388 {
1389 if (d != OR_TMP0) {
1390 if (s1->prefix & PREFIX_LOCK) {
1391 /* Lock prefix when destination is not memory. */
1392 gen_illegal_opcode(s1);
1393 return;
1394 }
1395 gen_op_mov_v_reg(s1, ot, s1->T0, d);
1396 } else if (!(s1->prefix & PREFIX_LOCK)) {
1397 gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1398 }
1399 switch(op) {
1400 case OP_ADCL:
1401 gen_compute_eflags_c(s1, s1->tmp4);
1402 if (s1->prefix & PREFIX_LOCK) {
1403 tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1404 tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1405 s1->mem_index, ot | MO_LE);
1406 } else {
1407 tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1408 tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1409 gen_op_st_rm_T0_A0(s1, ot, d);
1410 }
1411 gen_op_update3_cc(s1, s1->tmp4);
1412 set_cc_op(s1, CC_OP_ADCB + ot);
1413 break;
1414 case OP_SBBL:
1415 gen_compute_eflags_c(s1, s1->tmp4);
1416 if (s1->prefix & PREFIX_LOCK) {
1417 tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1418 tcg_gen_neg_tl(s1->T0, s1->T0);
1419 tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1420 s1->mem_index, ot | MO_LE);
1421 } else {
1422 tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1423 tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1424 gen_op_st_rm_T0_A0(s1, ot, d);
1425 }
1426 gen_op_update3_cc(s1, s1->tmp4);
1427 set_cc_op(s1, CC_OP_SBBB + ot);
1428 break;
1429 case OP_ADDL:
1430 if (s1->prefix & PREFIX_LOCK) {
1431 tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1432 s1->mem_index, ot | MO_LE);
1433 } else {
1434 tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1435 gen_op_st_rm_T0_A0(s1, ot, d);
1436 }
1437 gen_op_update2_cc(s1);
1438 set_cc_op(s1, CC_OP_ADDB + ot);
1439 break;
1440 case OP_SUBL:
1441 if (s1->prefix & PREFIX_LOCK) {
1442 tcg_gen_neg_tl(s1->T0, s1->T1);
1443 tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1444 s1->mem_index, ot | MO_LE);
1445 tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1446 } else {
1447 tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1448 tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1449 gen_op_st_rm_T0_A0(s1, ot, d);
1450 }
1451 gen_op_update2_cc(s1);
1452 set_cc_op(s1, CC_OP_SUBB + ot);
1453 break;
1454 default:
1455 case OP_ANDL:
1456 if (s1->prefix & PREFIX_LOCK) {
1457 tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1458 s1->mem_index, ot | MO_LE);
1459 } else {
1460 tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1461 gen_op_st_rm_T0_A0(s1, ot, d);
1462 }
1463 gen_op_update1_cc(s1);
1464 set_cc_op(s1, CC_OP_LOGICB + ot);
1465 break;
1466 case OP_ORL:
1467 if (s1->prefix & PREFIX_LOCK) {
1468 tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1469 s1->mem_index, ot | MO_LE);
1470 } else {
1471 tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1472 gen_op_st_rm_T0_A0(s1, ot, d);
1473 }
1474 gen_op_update1_cc(s1);
1475 set_cc_op(s1, CC_OP_LOGICB + ot);
1476 break;
1477 case OP_XORL:
1478 if (s1->prefix & PREFIX_LOCK) {
1479 tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1480 s1->mem_index, ot | MO_LE);
1481 } else {
1482 tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1483 gen_op_st_rm_T0_A0(s1, ot, d);
1484 }
1485 gen_op_update1_cc(s1);
1486 set_cc_op(s1, CC_OP_LOGICB + ot);
1487 break;
1488 case OP_CMPL:
1489 tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1490 tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1491 tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1492 set_cc_op(s1, CC_OP_SUBB + ot);
1493 break;
1494 }
1495 }
1496
1497 /* if d == OR_TMP0, it means memory operand (address in A0) */
1498 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1499 {
1500 if (s1->prefix & PREFIX_LOCK) {
1501 if (d != OR_TMP0) {
1502 /* Lock prefix when destination is not memory */
1503 gen_illegal_opcode(s1);
1504 return;
1505 }
1506 tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1507 tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1508 s1->mem_index, ot | MO_LE);
1509 } else {
1510 if (d != OR_TMP0) {
1511 gen_op_mov_v_reg(s1, ot, s1->T0, d);
1512 } else {
1513 gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1514 }
1515 tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1516 gen_op_st_rm_T0_A0(s1, ot, d);
1517 }
1518
1519 gen_compute_eflags_c(s1, cpu_cc_src);
1520 tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1521 set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1522 }
1523
1524 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1525 TCGv shm1, TCGv count, bool is_right)
1526 {
1527 TCGv_i32 z32, s32, oldop;
1528 TCGv z_tl;
1529
1530 /* Store the results into the CC variables. If we know that the
1531 variable must be dead, store unconditionally. Otherwise we'll
1532 need to not disrupt the current contents. */
1533 z_tl = tcg_const_tl(0);
1534 if (cc_op_live[s->cc_op] & USES_CC_DST) {
1535 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1536 result, cpu_cc_dst);
1537 } else {
1538 tcg_gen_mov_tl(cpu_cc_dst, result);
1539 }
1540 if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1541 tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1542 shm1, cpu_cc_src);
1543 } else {
1544 tcg_gen_mov_tl(cpu_cc_src, shm1);
1545 }
1546 tcg_temp_free(z_tl);
1547
1548 /* Get the two potential CC_OP values into temporaries. */
1549 tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1550 if (s->cc_op == CC_OP_DYNAMIC) {
1551 oldop = cpu_cc_op;
1552 } else {
1553 tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1554 oldop = s->tmp3_i32;
1555 }
1556
1557 /* Conditionally store the CC_OP value. */
1558 z32 = tcg_const_i32(0);
1559 s32 = tcg_temp_new_i32();
1560 tcg_gen_trunc_tl_i32(s32, count);
1561 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1562 tcg_temp_free_i32(z32);
1563 tcg_temp_free_i32(s32);
1564
1565 /* The CC_OP value is no longer predictable. */
1566 set_cc_op(s, CC_OP_DYNAMIC);
1567 }
1568
1569 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1570 int is_right, int is_arith)
1571 {
1572 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1573
1574 /* load */
1575 if (op1 == OR_TMP0) {
1576 gen_op_ld_v(s, ot, s->T0, s->A0);
1577 } else {
1578 gen_op_mov_v_reg(s, ot, s->T0, op1);
1579 }
1580
1581 tcg_gen_andi_tl(s->T1, s->T1, mask);
1582 tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1583
1584 if (is_right) {
1585 if (is_arith) {
1586 gen_exts(ot, s->T0);
1587 tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1588 tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1589 } else {
1590 gen_extu(ot, s->T0);
1591 tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1592 tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1593 }
1594 } else {
1595 tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1596 tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1597 }
1598
1599 /* store */
1600 gen_op_st_rm_T0_A0(s, ot, op1);
1601
1602 gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1603 }
1604
1605 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1606 int is_right, int is_arith)
1607 {
1608 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1609
1610 /* load */
1611 if (op1 == OR_TMP0)
1612 gen_op_ld_v(s, ot, s->T0, s->A0);
1613 else
1614 gen_op_mov_v_reg(s, ot, s->T0, op1);
1615
1616 op2 &= mask;
1617 if (op2 != 0) {
1618 if (is_right) {
1619 if (is_arith) {
1620 gen_exts(ot, s->T0);
1621 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1622 tcg_gen_sari_tl(s->T0, s->T0, op2);
1623 } else {
1624 gen_extu(ot, s->T0);
1625 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1626 tcg_gen_shri_tl(s->T0, s->T0, op2);
1627 }
1628 } else {
1629 tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1630 tcg_gen_shli_tl(s->T0, s->T0, op2);
1631 }
1632 }
1633
1634 /* store */
1635 gen_op_st_rm_T0_A0(s, ot, op1);
1636
1637 /* update eflags if non zero shift */
1638 if (op2 != 0) {
1639 tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1640 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1641 set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1642 }
1643 }
1644
1645 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1646 {
1647 target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1648 TCGv_i32 t0, t1;
1649
1650 /* load */
1651 if (op1 == OR_TMP0) {
1652 gen_op_ld_v(s, ot, s->T0, s->A0);
1653 } else {
1654 gen_op_mov_v_reg(s, ot, s->T0, op1);
1655 }
1656
1657 tcg_gen_andi_tl(s->T1, s->T1, mask);
1658
1659 switch (ot) {
1660 case MO_8:
1661 /* Replicate the 8-bit input so that a 32-bit rotate works. */
1662 tcg_gen_ext8u_tl(s->T0, s->T0);
1663 tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1664 goto do_long;
1665 case MO_16:
1666 /* Replicate the 16-bit input so that a 32-bit rotate works. */
1667 tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1668 goto do_long;
1669 do_long:
1670 #ifdef TARGET_X86_64
1671 case MO_32:
1672 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1673 tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1674 if (is_right) {
1675 tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1676 } else {
1677 tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1678 }
1679 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1680 break;
1681 #endif
1682 default:
1683 if (is_right) {
1684 tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1685 } else {
1686 tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1687 }
1688 break;
1689 }
1690
1691 /* store */
1692 gen_op_st_rm_T0_A0(s, ot, op1);
1693
1694 /* We'll need the flags computed into CC_SRC. */
1695 gen_compute_eflags(s);
1696
1697 /* The value that was "rotated out" is now present at the other end
1698 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1699 since we've computed the flags into CC_SRC, these variables are
1700 currently dead. */
1701 if (is_right) {
1702 tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1703 tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1704 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1705 } else {
1706 tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1707 tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1708 }
1709 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1710 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1711
1712 /* Now conditionally store the new CC_OP value. If the shift count
1713 is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1714 Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1715 exactly as we computed above. */
1716 t0 = tcg_const_i32(0);
1717 t1 = tcg_temp_new_i32();
1718 tcg_gen_trunc_tl_i32(t1, s->T1);
1719 tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1720 tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1721 tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1722 s->tmp2_i32, s->tmp3_i32);
1723 tcg_temp_free_i32(t0);
1724 tcg_temp_free_i32(t1);
1725
1726 /* The CC_OP value is no longer predictable. */
1727 set_cc_op(s, CC_OP_DYNAMIC);
1728 }
1729
1730 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1731 int is_right)
1732 {
1733 int mask = (ot == MO_64 ? 0x3f : 0x1f);
1734 int shift;
1735
1736 /* load */
1737 if (op1 == OR_TMP0) {
1738 gen_op_ld_v(s, ot, s->T0, s->A0);
1739 } else {
1740 gen_op_mov_v_reg(s, ot, s->T0, op1);
1741 }
1742
1743 op2 &= mask;
1744 if (op2 != 0) {
1745 switch (ot) {
1746 #ifdef TARGET_X86_64
1747 case MO_32:
1748 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1749 if (is_right) {
1750 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1751 } else {
1752 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1753 }
1754 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1755 break;
1756 #endif
1757 default:
1758 if (is_right) {
1759 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1760 } else {
1761 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1762 }
1763 break;
1764 case MO_8:
1765 mask = 7;
1766 goto do_shifts;
1767 case MO_16:
1768 mask = 15;
1769 do_shifts:
1770 shift = op2 & mask;
1771 if (is_right) {
1772 shift = mask + 1 - shift;
1773 }
1774 gen_extu(ot, s->T0);
1775 tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1776 tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1777 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1778 break;
1779 }
1780 }
1781
1782 /* store */
1783 gen_op_st_rm_T0_A0(s, ot, op1);
1784
1785 if (op2 != 0) {
1786 /* Compute the flags into CC_SRC. */
1787 gen_compute_eflags(s);
1788
1789 /* The value that was "rotated out" is now present at the other end
1790 of the word. Compute C into CC_DST and O into CC_SRC2. Note that
1791 since we've computed the flags into CC_SRC, these variables are
1792 currently dead. */
1793 if (is_right) {
1794 tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1795 tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1796 tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1797 } else {
1798 tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1799 tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1800 }
1801 tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1802 tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1803 set_cc_op(s, CC_OP_ADCOX);
1804 }
1805 }
1806
1807 /* XXX: add faster immediate = 1 case */
1808 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1809 int is_right)
1810 {
1811 gen_compute_eflags(s);
1812 assert(s->cc_op == CC_OP_EFLAGS);
1813
1814 /* load */
1815 if (op1 == OR_TMP0)
1816 gen_op_ld_v(s, ot, s->T0, s->A0);
1817 else
1818 gen_op_mov_v_reg(s, ot, s->T0, op1);
1819
1820 if (is_right) {
1821 switch (ot) {
1822 case MO_8:
1823 gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1824 break;
1825 case MO_16:
1826 gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1827 break;
1828 case MO_32:
1829 gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1830 break;
1831 #ifdef TARGET_X86_64
1832 case MO_64:
1833 gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1834 break;
1835 #endif
1836 default:
1837 tcg_abort();
1838 }
1839 } else {
1840 switch (ot) {
1841 case MO_8:
1842 gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1843 break;
1844 case MO_16:
1845 gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1846 break;
1847 case MO_32:
1848 gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1849 break;
1850 #ifdef TARGET_X86_64
1851 case MO_64:
1852 gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1853 break;
1854 #endif
1855 default:
1856 tcg_abort();
1857 }
1858 }
1859 /* store */
1860 gen_op_st_rm_T0_A0(s, ot, op1);
1861 }
1862
1863 /* XXX: add faster immediate case */
1864 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1865 bool is_right, TCGv count_in)
1866 {
1867 target_ulong mask = (ot == MO_64 ? 63 : 31);
1868 TCGv count;
1869
1870 /* load */
1871 if (op1 == OR_TMP0) {
1872 gen_op_ld_v(s, ot, s->T0, s->A0);
1873 } else {
1874 gen_op_mov_v_reg(s, ot, s->T0, op1);
1875 }
1876
1877 count = tcg_temp_new();
1878 tcg_gen_andi_tl(count, count_in, mask);
1879
1880 switch (ot) {
1881 case MO_16:
1882 /* Note: we implement the Intel behaviour for shift count > 16.
1883 This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
1884 portion by constructing it as a 32-bit value. */
1885 if (is_right) {
1886 tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1887 tcg_gen_mov_tl(s->T1, s->T0);
1888 tcg_gen_mov_tl(s->T0, s->tmp0);
1889 } else {
1890 tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1891 }
1892 /*
1893 * If TARGET_X86_64 defined then fall through into MO_32 case,
1894 * otherwise fall through default case.
1895 */
1896 case MO_32:
1897 #ifdef TARGET_X86_64
1898 /* Concatenate the two 32-bit values and use a 64-bit shift. */
1899 tcg_gen_subi_tl(s->tmp0, count, 1);
1900 if (is_right) {
1901 tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1902 tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1903 tcg_gen_shr_i64(s->T0, s->T0, count);
1904 } else {
1905 tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1906 tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1907 tcg_gen_shl_i64(s->T0, s->T0, count);
1908 tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1909 tcg_gen_shri_i64(s->T0, s->T0, 32);
1910 }
1911 break;
1912 #endif
1913 default:
1914 tcg_gen_subi_tl(s->tmp0, count, 1);
1915 if (is_right) {
1916 tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1917
1918 tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1919 tcg_gen_shr_tl(s->T0, s->T0, count);
1920 tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1921 } else {
1922 tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1923 if (ot == MO_16) {
1924 /* Only needed if count > 16, for Intel behaviour. */
1925 tcg_gen_subfi_tl(s->tmp4, 33, count);
1926 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1927 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1928 }
1929
1930 tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1931 tcg_gen_shl_tl(s->T0, s->T0, count);
1932 tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1933 }
1934 tcg_gen_movi_tl(s->tmp4, 0);
1935 tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1936 s->tmp4, s->T1);
1937 tcg_gen_or_tl(s->T0, s->T0, s->T1);
1938 break;
1939 }
1940
1941 /* store */
1942 gen_op_st_rm_T0_A0(s, ot, op1);
1943
1944 gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1945 tcg_temp_free(count);
1946 }
1947
1948 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1949 {
1950 if (s != OR_TMP1)
1951 gen_op_mov_v_reg(s1, ot, s1->T1, s);
1952 switch(op) {
1953 case OP_ROL:
1954 gen_rot_rm_T1(s1, ot, d, 0);
1955 break;
1956 case OP_ROR:
1957 gen_rot_rm_T1(s1, ot, d, 1);
1958 break;
1959 case OP_SHL:
1960 case OP_SHL1:
1961 gen_shift_rm_T1(s1, ot, d, 0, 0);
1962 break;
1963 case OP_SHR:
1964 gen_shift_rm_T1(s1, ot, d, 1, 0);
1965 break;
1966 case OP_SAR:
1967 gen_shift_rm_T1(s1, ot, d, 1, 1);
1968 break;
1969 case OP_RCL:
1970 gen_rotc_rm_T1(s1, ot, d, 0);
1971 break;
1972 case OP_RCR:
1973 gen_rotc_rm_T1(s1, ot, d, 1);
1974 break;
1975 }
1976 }
1977
1978 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1979 {
1980 switch(op) {
1981 case OP_ROL:
1982 gen_rot_rm_im(s1, ot, d, c, 0);
1983 break;
1984 case OP_ROR:
1985 gen_rot_rm_im(s1, ot, d, c, 1);
1986 break;
1987 case OP_SHL:
1988 case OP_SHL1:
1989 gen_shift_rm_im(s1, ot, d, c, 0, 0);
1990 break;
1991 case OP_SHR:
1992 gen_shift_rm_im(s1, ot, d, c, 1, 0);
1993 break;
1994 case OP_SAR:
1995 gen_shift_rm_im(s1, ot, d, c, 1, 1);
1996 break;
1997 default:
1998 /* currently not optimized */
1999 tcg_gen_movi_tl(s1->T1, c);
2000 gen_shift(s1, op, ot, d, OR_TMP1);
2001 break;
2002 }
2003 }
2004
2005 #define X86_MAX_INSN_LENGTH 15
2006
2007 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2008 {
2009 uint64_t pc = s->pc;
2010
2011 s->pc += num_bytes;
2012 if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2013 /* If the instruction's 16th byte is on a different page than the 1st, a
2014 * page fault on the second page wins over the general protection fault
2015 * caused by the instruction being too long.
2016 * This can happen even if the operand is only one byte long!
2017 */
2018 if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2019 volatile uint8_t unused =
2020 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2021 (void) unused;
2022 }
2023 siglongjmp(s->jmpbuf, 1);
2024 }
2025
2026 return pc;
2027 }
2028
2029 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2030 {
2031 return translator_ldub(env, advance_pc(env, s, 1));
2032 }
2033
2034 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2035 {
2036 return translator_ldsw(env, advance_pc(env, s, 2));
2037 }
2038
2039 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2040 {
2041 return translator_lduw(env, advance_pc(env, s, 2));
2042 }
2043
2044 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2045 {
2046 return translator_ldl(env, advance_pc(env, s, 4));
2047 }
2048
2049 #ifdef TARGET_X86_64
2050 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2051 {
2052 return translator_ldq(env, advance_pc(env, s, 8));
2053 }
2054 #endif
2055
2056 /* Decompose an address. */
2057
2058 typedef struct AddressParts {
2059 int def_seg;
2060 int base;
2061 int index;
2062 int scale;
2063 target_long disp;
2064 } AddressParts;
2065
2066 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2067 int modrm)
2068 {
2069 int def_seg, base, index, scale, mod, rm;
2070 target_long disp;
2071 bool havesib;
2072
2073 def_seg = R_DS;
2074 index = -1;
2075 scale = 0;
2076 disp = 0;
2077
2078 mod = (modrm >> 6) & 3;
2079 rm = modrm & 7;
2080 base = rm | REX_B(s);
2081
2082 if (mod == 3) {
2083 /* Normally filtered out earlier, but including this path
2084 simplifies multi-byte nop, as well as bndcl, bndcu, bndcn. */
2085 goto done;
2086 }
2087
2088 switch (s->aflag) {
2089 case MO_64:
2090 case MO_32:
2091 havesib = 0;
2092 if (rm == 4) {
2093 int code = x86_ldub_code(env, s);
2094 scale = (code >> 6) & 3;
2095 index = ((code >> 3) & 7) | REX_X(s);
2096 if (index == 4) {
2097 index = -1; /* no index */
2098 }
2099 base = (code & 7) | REX_B(s);
2100 havesib = 1;
2101 }
2102
2103 switch (mod) {
2104 case 0:
2105 if ((base & 7) == 5) {
2106 base = -1;
2107 disp = (int32_t)x86_ldl_code(env, s);
2108 if (CODE64(s) && !havesib) {
2109 base = -2;
2110 disp += s->pc + s->rip_offset;
2111 }
2112 }
2113 break;
2114 case 1:
2115 disp = (int8_t)x86_ldub_code(env, s);
2116 break;
2117 default:
2118 case 2:
2119 disp = (int32_t)x86_ldl_code(env, s);
2120 break;
2121 }
2122
2123 /* For correct popl handling with esp. */
2124 if (base == R_ESP && s->popl_esp_hack) {
2125 disp += s->popl_esp_hack;
2126 }
2127 if (base == R_EBP || base == R_ESP) {
2128 def_seg = R_SS;
2129 }
2130 break;
2131
2132 case MO_16:
2133 if (mod == 0) {
2134 if (rm == 6) {
2135 base = -1;
2136 disp = x86_lduw_code(env, s);
2137 break;
2138 }
2139 } else if (mod == 1) {
2140 disp = (int8_t)x86_ldub_code(env, s);
2141 } else {
2142 disp = (int16_t)x86_lduw_code(env, s);
2143 }
2144
2145 switch (rm) {
2146 case 0:
2147 base = R_EBX;
2148 index = R_ESI;
2149 break;
2150 case 1:
2151 base = R_EBX;
2152 index = R_EDI;
2153 break;
2154 case 2:
2155 base = R_EBP;
2156 index = R_ESI;
2157 def_seg = R_SS;
2158 break;
2159 case 3:
2160 base = R_EBP;
2161 index = R_EDI;
2162 def_seg = R_SS;
2163 break;
2164 case 4:
2165 base = R_ESI;
2166 break;
2167 case 5:
2168 base = R_EDI;
2169 break;
2170 case 6:
2171 base = R_EBP;
2172 def_seg = R_SS;
2173 break;
2174 default:
2175 case 7:
2176 base = R_EBX;
2177 break;
2178 }
2179 break;
2180
2181 default:
2182 tcg_abort();
2183 }
2184
2185 done:
2186 return (AddressParts){ def_seg, base, index, scale, disp };
2187 }
2188
2189 /* Compute the address, with a minimum number of TCG ops. */
2190 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2191 {
2192 TCGv ea = NULL;
2193
2194 if (a.index >= 0) {
2195 if (a.scale == 0) {
2196 ea = cpu_regs[a.index];
2197 } else {
2198 tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2199 ea = s->A0;
2200 }
2201 if (a.base >= 0) {
2202 tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2203 ea = s->A0;
2204 }
2205 } else if (a.base >= 0) {
2206 ea = cpu_regs[a.base];
2207 }
2208 if (!ea) {
2209 tcg_gen_movi_tl(s->A0, a.disp);
2210 ea = s->A0;
2211 } else if (a.disp != 0) {
2212 tcg_gen_addi_tl(s->A0, ea, a.disp);
2213 ea = s->A0;
2214 }
2215
2216 return ea;
2217 }
2218
2219 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2220 {
2221 AddressParts a = gen_lea_modrm_0(env, s, modrm);
2222 TCGv ea = gen_lea_modrm_1(s, a);
2223 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2224 }
2225
2226 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2227 {
2228 (void)gen_lea_modrm_0(env, s, modrm);
2229 }
2230
2231 /* Used for BNDCL, BNDCU, BNDCN. */
2232 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2233 TCGCond cond, TCGv_i64 bndv)
2234 {
2235 TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2236
2237 tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2238 if (!CODE64(s)) {
2239 tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2240 }
2241 tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2242 tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2243 gen_helper_bndck(cpu_env, s->tmp2_i32);
2244 }
2245
2246 /* used for LEA and MOV AX, mem */
2247 static void gen_add_A0_ds_seg(DisasContext *s)
2248 {
2249 gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2250 }
2251
2252 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2253 OR_TMP0 */
2254 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2255 MemOp ot, int reg, int is_store)
2256 {
2257 int mod, rm;
2258
2259 mod = (modrm >> 6) & 3;
2260 rm = (modrm & 7) | REX_B(s);
2261 if (mod == 3) {
2262 if (is_store) {
2263 if (reg != OR_TMP0)
2264 gen_op_mov_v_reg(s, ot, s->T0, reg);
2265 gen_op_mov_reg_v(s, ot, rm, s->T0);
2266 } else {
2267 gen_op_mov_v_reg(s, ot, s->T0, rm);
2268 if (reg != OR_TMP0)
2269 gen_op_mov_reg_v(s, ot, reg, s->T0);
2270 }
2271 } else {
2272 gen_lea_modrm(env, s, modrm);
2273 if (is_store) {
2274 if (reg != OR_TMP0)
2275 gen_op_mov_v_reg(s, ot, s->T0, reg);
2276 gen_op_st_v(s, ot, s->T0, s->A0);
2277 } else {
2278 gen_op_ld_v(s, ot, s->T0, s->A0);
2279 if (reg != OR_TMP0)
2280 gen_op_mov_reg_v(s, ot, reg, s->T0);
2281 }
2282 }
2283 }
2284
2285 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2286 {
2287 uint32_t ret;
2288
2289 switch (ot) {
2290 case MO_8:
2291 ret = x86_ldub_code(env, s);
2292 break;
2293 case MO_16:
2294 ret = x86_lduw_code(env, s);
2295 break;
2296 case MO_32:
2297 #ifdef TARGET_X86_64
2298 case MO_64:
2299 #endif
2300 ret = x86_ldl_code(env, s);
2301 break;
2302 default:
2303 tcg_abort();
2304 }
2305 return ret;
2306 }
2307
2308 static inline int insn_const_size(MemOp ot)
2309 {
2310 if (ot <= MO_32) {
2311 return 1 << ot;
2312 } else {
2313 return 4;
2314 }
2315 }
2316
2317 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2318 {
2319 #ifndef CONFIG_USER_ONLY
2320 return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2321 (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2322 #else
2323 return true;
2324 #endif
2325 }
2326
2327 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2328 {
2329 target_ulong pc = s->cs_base + eip;
2330
2331 if (use_goto_tb(s, pc)) {
2332 /* jump to same page: we can use a direct jump */
2333 tcg_gen_goto_tb(tb_num);
2334 gen_jmp_im(s, eip);
2335 tcg_gen_exit_tb(s->base.tb, tb_num);
2336 s->base.is_jmp = DISAS_NORETURN;
2337 } else {
2338 /* jump to another page */
2339 gen_jmp_im(s, eip);
2340 gen_jr(s, s->tmp0);
2341 }
2342 }
2343
2344 static inline void gen_jcc(DisasContext *s, int b,
2345 target_ulong val, target_ulong next_eip)
2346 {
2347 TCGLabel *l1, *l2;
2348
2349 if (s->jmp_opt) {
2350 l1 = gen_new_label();
2351 gen_jcc1(s, b, l1);
2352
2353 gen_goto_tb(s, 0, next_eip);
2354
2355 gen_set_label(l1);
2356 gen_goto_tb(s, 1, val);
2357 } else {
2358 l1 = gen_new_label();
2359 l2 = gen_new_label();
2360 gen_jcc1(s, b, l1);
2361
2362 gen_jmp_im(s, next_eip);
2363 tcg_gen_br(l2);
2364
2365 gen_set_label(l1);
2366 gen_jmp_im(s, val);
2367 gen_set_label(l2);
2368 gen_eob(s);
2369 }
2370 }
2371
2372 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2373 int modrm, int reg)
2374 {
2375 CCPrepare cc;
2376
2377 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2378
2379 cc = gen_prepare_cc(s, b, s->T1);
2380 if (cc.mask != -1) {
2381 TCGv t0 = tcg_temp_new();
2382 tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2383 cc.reg = t0;
2384 }
2385 if (!cc.use_reg2) {
2386 cc.reg2 = tcg_const_tl(cc.imm);
2387 }
2388
2389 tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2390 s->T0, cpu_regs[reg]);
2391 gen_op_mov_reg_v(s, ot, reg, s->T0);
2392
2393 if (cc.mask != -1) {
2394 tcg_temp_free(cc.reg);
2395 }
2396 if (!cc.use_reg2) {
2397 tcg_temp_free(cc.reg2);
2398 }
2399 }
2400
2401 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2402 {
2403 tcg_gen_ld32u_tl(s->T0, cpu_env,
2404 offsetof(CPUX86State,segs[seg_reg].selector));
2405 }
2406
2407 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2408 {
2409 tcg_gen_ext16u_tl(s->T0, s->T0);
2410 tcg_gen_st32_tl(s->T0, cpu_env,
2411 offsetof(CPUX86State,segs[seg_reg].selector));
2412 tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2413 }
2414
2415 /* move T0 to seg_reg and compute if the CPU state may change. Never
2416 call this function with seg_reg == R_CS */
2417 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2418 {
2419 if (PE(s) && !VM86(s)) {
2420 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2421 gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2422 /* abort translation because the addseg value may change or
2423 because ss32 may change. For R_SS, translation must always
2424 stop as a special handling must be done to disable hardware
2425 interrupts for the next instruction */
2426 if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2427 s->base.is_jmp = DISAS_TOO_MANY;
2428 }
2429 } else {
2430 gen_op_movl_seg_T0_vm(s, seg_reg);
2431 if (seg_reg == R_SS) {
2432 s->base.is_jmp = DISAS_TOO_MANY;
2433 }
2434 }
2435 }
2436
2437 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2438 {
2439 /* no SVM activated; fast case */
2440 if (likely(!GUEST(s))) {
2441 return;
2442 }
2443 gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2444 }
2445
2446 static inline void gen_stack_update(DisasContext *s, int addend)
2447 {
2448 gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2449 }
2450
2451 /* Generate a push. It depends on ss32, addseg and dflag. */
2452 static void gen_push_v(DisasContext *s, TCGv val)
2453 {
2454 MemOp d_ot = mo_pushpop(s, s->dflag);
2455 MemOp a_ot = mo_stacksize(s);
2456 int size = 1 << d_ot;
2457 TCGv new_esp = s->A0;
2458
2459 tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2460
2461 if (!CODE64(s)) {
2462 if (ADDSEG(s)) {
2463 new_esp = s->tmp4;
2464 tcg_gen_mov_tl(new_esp, s->A0);
2465 }
2466 gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2467 }
2468
2469 gen_op_st_v(s, d_ot, val, s->A0);
2470 gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2471 }
2472
2473 /* two step pop is necessary for precise exceptions */
2474 static MemOp gen_pop_T0(DisasContext *s)
2475 {
2476 MemOp d_ot = mo_pushpop(s, s->dflag);
2477
2478 gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2479 gen_op_ld_v(s, d_ot, s->T0, s->A0);
2480
2481 return d_ot;
2482 }
2483
2484 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2485 {
2486 gen_stack_update(s, 1 << ot);
2487 }
2488
2489 static inline void gen_stack_A0(DisasContext *s)
2490 {
2491 gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2492 }
2493
2494 static void gen_pusha(DisasContext *s)
2495 {
2496 MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2497 MemOp d_ot = s->dflag;
2498 int size = 1 << d_ot;
2499 int i;
2500
2501 for (i = 0; i < 8; i++) {
2502 tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2503 gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2504 gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2505 }
2506
2507 gen_stack_update(s, -8 * size);
2508 }
2509
2510 static void gen_popa(DisasContext *s)
2511 {
2512 MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2513 MemOp d_ot = s->dflag;
2514 int size = 1 << d_ot;
2515 int i;
2516
2517 for (i = 0; i < 8; i++) {
2518 /* ESP is not reloaded */
2519 if (7 - i == R_ESP) {
2520 continue;
2521 }
2522 tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2523 gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2524 gen_op_ld_v(s, d_ot, s->T0, s->A0);
2525 gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2526 }
2527
2528 gen_stack_update(s, 8 * size);
2529 }
2530
2531 static void gen_enter(DisasContext *s, int esp_addend, int level)
2532 {
2533 MemOp d_ot = mo_pushpop(s, s->dflag);
2534 MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2535 int size = 1 << d_ot;
2536
2537 /* Push BP; compute FrameTemp into T1. */
2538 tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2539 gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2540 gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2541
2542 level &= 31;
2543 if (level != 0) {
2544 int i;
2545
2546 /* Copy level-1 pointers from the previous frame. */
2547 for (i = 1; i < level; ++i) {
2548 tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2549 gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2550 gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2551
2552 tcg_gen_subi_tl(s->A0, s->T1, size * i);
2553 gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2554 gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2555 }
2556
2557 /* Push the current FrameTemp as the last level. */
2558 tcg_gen_subi_tl(s->A0, s->T1, size * level);
2559 gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2560 gen_op_st_v(s, d_ot, s->T1, s->A0);
2561 }
2562
2563 /* Copy the FrameTemp value to EBP. */
2564 gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2565
2566 /* Compute the final value of ESP. */
2567 tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2568 gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2569 }
2570
2571 static void gen_leave(DisasContext *s)
2572 {
2573 MemOp d_ot = mo_pushpop(s, s->dflag);
2574 MemOp a_ot = mo_stacksize(s);
2575
2576 gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2577 gen_op_ld_v(s, d_ot, s->T0, s->A0);
2578
2579 tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2580
2581 gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2582 gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2583 }
2584
2585 /* Similarly, except that the assumption here is that we don't decode
2586 the instruction at all -- either a missing opcode, an unimplemented
2587 feature, or just a bogus instruction stream. */
2588 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2589 {
2590 gen_illegal_opcode(s);
2591
2592 if (qemu_loglevel_mask(LOG_UNIMP)) {
2593 FILE *logfile = qemu_log_lock();
2594 target_ulong pc = s->pc_start, end = s->pc;
2595
2596 qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2597 for (; pc < end; ++pc) {
2598 qemu_log(" %02x", cpu_ldub_code(env, pc));
2599 }
2600 qemu_log("\n");
2601 qemu_log_unlock(logfile);
2602 }
2603 }
2604
2605 /* an interrupt is different from an exception because of the
2606 privilege checks */
2607 static void gen_interrupt(DisasContext *s, int intno,
2608 target_ulong cur_eip, target_ulong next_eip)
2609 {
2610 gen_update_cc_op(s);
2611 gen_jmp_im(s, cur_eip);
2612 gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2613 tcg_const_i32(next_eip - cur_eip));
2614 s->base.is_jmp = DISAS_NORETURN;
2615 }
2616
2617 static void gen_debug(DisasContext *s)
2618 {
2619 gen_update_cc_op(s);
2620 gen_jmp_im(s, s->base.pc_next - s->cs_base);
2621 gen_helper_debug(cpu_env);
2622 s->base.is_jmp = DISAS_NORETURN;
2623 }
2624
2625 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2626 {
2627 if ((s->flags & mask) == 0) {
2628 TCGv_i32 t = tcg_temp_new_i32();
2629 tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2630 tcg_gen_ori_i32(t, t, mask);
2631 tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2632 tcg_temp_free_i32(t);
2633 s->flags |= mask;
2634 }
2635 }
2636
2637 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2638 {
2639 if (s->flags & mask) {
2640 TCGv_i32 t = tcg_temp_new_i32();
2641 tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2642 tcg_gen_andi_i32(t, t, ~mask);
2643 tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2644 tcg_temp_free_i32(t);
2645 s->flags &= ~mask;
2646 }
2647 }
2648
2649 /* Clear BND registers during legacy branches. */
2650 static void gen_bnd_jmp(DisasContext *s)
2651 {
2652 /* Clear the registers only if BND prefix is missing, MPX is enabled,
2653 and if the BNDREGs are known to be in use (non-zero) already.
2654 The helper itself will check BNDPRESERVE at runtime. */
2655 if ((s->prefix & PREFIX_REPNZ) == 0
2656 && (s->flags & HF_MPX_EN_MASK) != 0
2657 && (s->flags & HF_MPX_IU_MASK) != 0) {
2658 gen_helper_bnd_jmp(cpu_env);
2659 }
2660 }
2661
2662 /* Generate an end of block. Trace exception is also generated if needed.
2663 If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2664 If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2665 S->TF. This is used by the syscall/sysret insns. */
2666 static void
2667 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2668 {
2669 gen_update_cc_op(s);
2670
2671 /* If several instructions disable interrupts, only the first does it. */
2672 if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2673 gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2674 } else {
2675 gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2676 }
2677
2678 if (s->base.tb->flags & HF_RF_MASK) {
2679 gen_helper_reset_rf(cpu_env);
2680 }
2681 if (s->base.singlestep_enabled) {
2682 gen_helper_debug(cpu_env);
2683 } else if (recheck_tf) {
2684 gen_helper_rechecking_single_step(cpu_env);
2685 tcg_gen_exit_tb(NULL, 0);
2686 } else if (s->flags & HF_TF_MASK) {
2687 gen_helper_single_step(cpu_env);
2688 } else if (jr) {
2689 tcg_gen_lookup_and_goto_ptr();
2690 } else {
2691 tcg_gen_exit_tb(NULL, 0);
2692 }
2693 s->base.is_jmp = DISAS_NORETURN;
2694 }
2695
2696 static inline void
2697 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2698 {
2699 do_gen_eob_worker(s, inhibit, recheck_tf, false);
2700 }
2701
2702 /* End of block.
2703 If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. */
2704 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2705 {
2706 gen_eob_worker(s, inhibit, false);
2707 }
2708
2709 /* End of block, resetting the inhibit irq flag. */
2710 static void gen_eob(DisasContext *s)
2711 {
2712 gen_eob_worker(s, false, false);
2713 }
2714
2715 /* Jump to register */
2716 static void gen_jr(DisasContext *s, TCGv dest)
2717 {
2718 do_gen_eob_worker(s, false, false, true);
2719 }
2720
2721 /* generate a jump to eip. No segment change must happen before as a
2722 direct call to the next block may occur */
2723 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2724 {
2725 gen_update_cc_op(s);
2726 set_cc_op(s, CC_OP_DYNAMIC);
2727 if (s->jmp_opt) {
2728 gen_goto_tb(s, tb_num, eip);
2729 } else {
2730 gen_jmp_im(s, eip);
2731 gen_eob(s);
2732 }
2733 }
2734
2735 static void gen_jmp(DisasContext *s, target_ulong eip)
2736 {
2737 gen_jmp_tb(s, eip, 0);
2738 }
2739
2740 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2741 {
2742 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2743 tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2744 }
2745
2746 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2747 {
2748 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2749 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2750 }
2751
2752 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2753 {
2754 int mem_index = s->mem_index;
2755 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2756 tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2757 tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2758 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2759 tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2760 }
2761
2762 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2763 {
2764 int mem_index = s->mem_index;
2765 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2766 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2767 tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2768 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2769 tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2770 }
2771
2772 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2773 {
2774 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2775 tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2776 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2777 tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2778 }
2779
2780 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2781 {
2782 tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2783 tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2784 }
2785
2786 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2787 {
2788 tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2789 tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2790 }
2791
2792 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2793 {
2794 tcg_gen_movi_i64(s->tmp1_i64, 0);
2795 tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2796 }
2797
2798 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2799 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2800 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2801 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2802 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2803 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2804 TCGv_i32 val);
2805 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2806 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2807 TCGv val);
2808
2809 #define SSE_SPECIAL ((void *)1)
2810 #define SSE_DUMMY ((void *)2)
2811
2812 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2813 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2814 gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2815
2816 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2817 /* 3DNow! extensions */
2818 [0x0e] = { SSE_DUMMY }, /* femms */
2819 [0x0f] = { SSE_DUMMY }, /* pf... */
2820 /* pure SSE operations */
2821 [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2822 [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2823 [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2824 [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */
2825 [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2826 [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2827 [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */
2828 [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */
2829
2830 [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2831 [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */
2832 [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2833 [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2834 [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2835 [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2836 [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2837 [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2838 [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2839 [0x51] = SSE_FOP(sqrt),
2840 [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2841 [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2842 [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2843 [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2844 [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2845 [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2846 [0x58] = SSE_FOP(add),
2847 [0x59] = SSE_FOP(mul),
2848 [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2849 gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2850 [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2851 [0x5c] = SSE_FOP(sub),
2852 [0x5d] = SSE_FOP(min),
2853 [0x5e] = SSE_FOP(div),
2854 [0x5f] = SSE_FOP(max),
2855
2856 [0xc2] = SSE_FOP(cmpeq),
2857 [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2858 (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2859
2860 /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */
2861 [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2862 [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2863
2864 /* MMX ops and their SSE extensions */
2865 [0x60] = MMX_OP2(punpcklbw),
2866 [0x61] = MMX_OP2(punpcklwd),
2867 [0x62] = MMX_OP2(punpckldq),
2868 [0x63] = MMX_OP2(packsswb),
2869 [0x64] = MMX_OP2(pcmpgtb),
2870 [0x65] = MMX_OP2(pcmpgtw),
2871 [0x66] = MMX_OP2(pcmpgtl),
2872 [0x67] = MMX_OP2(packuswb),
2873 [0x68] = MMX_OP2(punpckhbw),
2874 [0x69] = MMX_OP2(punpckhwd),
2875 [0x6a] = MMX_OP2(punpckhdq),
2876 [0x6b] = MMX_OP2(packssdw),
2877 [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2878 [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2879 [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2880 [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2881 [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2882 (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2883 (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2884 (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2885 [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2886 [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2887 [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2888 [0x74] = MMX_OP2(pcmpeqb),
2889 [0x75] = MMX_OP2(pcmpeqw),
2890 [0x76] = MMX_OP2(pcmpeql),
2891 [0x77] = { SSE_DUMMY }, /* emms */
2892 [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2893 [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2894 [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2895 [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2896 [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2897 [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2898 [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2899 [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2900 [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2901 [0xd1] = MMX_OP2(psrlw),
2902 [0xd2] = MMX_OP2(psrld),
2903 [0xd3] = MMX_OP2(psrlq),
2904 [0xd4] = MMX_OP2(paddq),
2905 [0xd5] = MMX_OP2(pmullw),
2906 [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2907 [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2908 [0xd8] = MMX_OP2(psubusb),
2909 [0xd9] = MMX_OP2(psubusw),
2910 [0xda] = MMX_OP2(pminub),
2911 [0xdb] = MMX_OP2(pand),
2912 [0xdc] = MMX_OP2(paddusb),
2913 [0xdd] = MMX_OP2(paddusw),
2914 [0xde] = MMX_OP2(pmaxub),
2915 [0xdf] = MMX_OP2(pandn),
2916 [0xe0] = MMX_OP2(pavgb),
2917 [0xe1] = MMX_OP2(psraw),
2918 [0xe2] = MMX_OP2(psrad),
2919 [0xe3] = MMX_OP2(pavgw),
2920 [0xe4] = MMX_OP2(pmulhuw),
2921 [0xe5] = MMX_OP2(pmulhw),
2922 [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2923 [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
2924 [0xe8] = MMX_OP2(psubsb),
2925 [0xe9] = MMX_OP2(psubsw),
2926 [0xea] = MMX_OP2(pminsw),
2927 [0xeb] = MMX_OP2(por),
2928 [0xec] = MMX_OP2(paddsb),
2929 [0xed] = MMX_OP2(paddsw),
2930 [0xee] = MMX_OP2(pmaxsw),
2931 [0xef] = MMX_OP2(pxor),
2932 [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2933 [0xf1] = MMX_OP2(psllw),
2934 [0xf2] = MMX_OP2(pslld),
2935 [0xf3] = MMX_OP2(psllq),
2936 [0xf4] = MMX_OP2(pmuludq),
2937 [0xf5] = MMX_OP2(pmaddwd),
2938 [0xf6] = MMX_OP2(psadbw),
2939 [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2940 (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2941 [0xf8] = MMX_OP2(psubb),
2942 [0xf9] = MMX_OP2(psubw),
2943 [0xfa] = MMX_OP2(psubl),
2944 [0xfb] = MMX_OP2(psubq),
2945 [0xfc] = MMX_OP2(paddb),
2946 [0xfd] = MMX_OP2(paddw),
2947 [0xfe] = MMX_OP2(paddl),
2948 };
2949
2950 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2951 [0 + 2] = MMX_OP2(psrlw),
2952 [0 + 4] = MMX_OP2(psraw),
2953 [0 + 6] = MMX_OP2(psllw),
2954 [8 + 2] = MMX_OP2(psrld),
2955 [8 + 4] = MMX_OP2(psrad),
2956 [8 + 6] = MMX_OP2(pslld),
2957 [16 + 2] = MMX_OP2(psrlq),
2958 [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2959 [16 + 6] = MMX_OP2(psllq),
2960 [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2961 };
2962
2963 static const SSEFunc_0_epi sse_op_table3ai[] = {
2964 gen_helper_cvtsi2ss,
2965 gen_helper_cvtsi2sd
2966 };
2967
2968 #ifdef TARGET_X86_64
2969 static const SSEFunc_0_epl sse_op_table3aq[] = {
2970 gen_helper_cvtsq2ss,
2971 gen_helper_cvtsq2sd
2972 };
2973 #endif
2974
2975 static const SSEFunc_i_ep sse_op_table3bi[] = {
2976 gen_helper_cvttss2si,
2977 gen_helper_cvtss2si,
2978 gen_helper_cvttsd2si,
2979 gen_helper_cvtsd2si
2980 };
2981
2982 #ifdef TARGET_X86_64
2983 static const SSEFunc_l_ep sse_op_table3bq[] = {
2984 gen_helper_cvttss2sq,
2985 gen_helper_cvtss2sq,
2986 gen_helper_cvttsd2sq,
2987 gen_helper_cvtsd2sq
2988 };
2989 #endif
2990
2991 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2992 SSE_FOP(cmpeq),
2993 SSE_FOP(cmplt),
2994 SSE_FOP(cmple),
2995 SSE_FOP(cmpunord),
2996 SSE_FOP(cmpneq),
2997 SSE_FOP(cmpnlt),
2998 SSE_FOP(cmpnle),
2999 SSE_FOP(cmpord),
3000 };
3001
3002 static const SSEFunc_0_epp sse_op_table5[256] = {
3003 [0x0c] = gen_helper_pi2fw,
3004 [0x0d] = gen_helper_pi2fd,
3005 [0x1c] = gen_helper_pf2iw,
3006 [0x1d] = gen_helper_pf2id,
3007 [0x8a] = gen_helper_pfnacc,
3008 [0x8e] = gen_helper_pfpnacc,
3009 [0x90] = gen_helper_pfcmpge,
3010 [0x94] = gen_helper_pfmin,
3011 [0x96] = gen_helper_pfrcp,
3012 [0x97] = gen_helper_pfrsqrt,
3013 [0x9a] = gen_helper_pfsub,
3014 [0x9e] = gen_helper_pfadd,
3015 [0xa0] = gen_helper_pfcmpgt,
3016 [0xa4] = gen_helper_pfmax,
3017 [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
3018 [0xa7] = gen_helper_movq, /* pfrsqit1 */
3019 [0xaa] = gen_helper_pfsubr,
3020 [0xae] = gen_helper_pfacc,
3021 [0xb0] = gen_helper_pfcmpeq,
3022 [0xb4] = gen_helper_pfmul,
3023 [0xb6] = gen_helper_movq, /* pfrcpit2 */
3024 [0xb7] = gen_helper_pmulhrw_mmx,
3025 [0xbb] = gen_helper_pswapd,
3026 [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3027 };
3028
3029 struct SSEOpHelper_epp {
3030 SSEFunc_0_epp op[2];
3031 uint32_t ext_mask;
3032 };
3033
3034 struct SSEOpHelper_eppi {
3035 SSEFunc_0_eppi op[2];
3036 uint32_t ext_mask;
3037 };
3038
3039 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3040 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3041 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3042 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3043 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
3044 CPUID_EXT_PCLMULQDQ }
3045 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
3046
3047 static const struct SSEOpHelper_epp sse_op_table6[256] = {
3048 [0x00] = SSSE3_OP(pshufb),
3049 [0x01] = SSSE3_OP(phaddw),
3050 [0x02] = SSSE3_OP(phaddd),
3051 [0x03] = SSSE3_OP(phaddsw),
3052 [0x04] = SSSE3_OP(pmaddubsw),
3053 [0x05] = SSSE3_OP(phsubw),
3054 [0x06] = SSSE3_OP(phsubd),
3055 [0x07] = SSSE3_OP(phsubsw),
3056 [0x08] = SSSE3_OP(psignb),
3057 [0x09] = SSSE3_OP(psignw),
3058 [0x0a] = SSSE3_OP(psignd),
3059 [0x0b] = SSSE3_OP(pmulhrsw),
3060 [0x10] = SSE41_OP(pblendvb),
3061 [0x14] = SSE41_OP(blendvps),
3062 [0x15] = SSE41_OP(blendvpd),
3063 [0x17] = SSE41_OP(ptest),
3064 [0x1c] = SSSE3_OP(pabsb),
3065 [0x1d] = SSSE3_OP(pabsw),
3066 [0x1e] = SSSE3_OP(pabsd),
3067 [0x20] = SSE41_OP(pmovsxbw),
3068 [0x21] = SSE41_OP(pmovsxbd),
3069 [0x22] = SSE41_OP(pmovsxbq),
3070 [0x23] = SSE41_OP(pmovsxwd),
3071 [0x24] = SSE41_OP(pmovsxwq),
3072 [0x25] = SSE41_OP(pmovsxdq),
3073 [0x28] = SSE41_OP(pmuldq),
3074 [0x29] = SSE41_OP(pcmpeqq),
3075 [0x2a] = SSE41_SPECIAL, /* movntqda */
3076 [0x2b] = SSE41_OP(packusdw),
3077 [0x30] = SSE41_OP(pmovzxbw),
3078 [0x31] = SSE41_OP(pmovzxbd),
3079 [0x32] = SSE41_OP(pmovzxbq),
3080 [0x33] = SSE41_OP(pmovzxwd),
3081 [0x34] = SSE41_OP(pmovzxwq),
3082 [0x35] = SSE41_OP(pmovzxdq),
3083 [0x37] = SSE42_OP(pcmpgtq),
3084 [0x38] = SSE41_OP(pminsb),
3085 [0x39] = SSE41_OP(pminsd),
3086 [0x3a] = SSE41_OP(pminuw),
3087 [0x3b] = SSE41_OP(pminud),
3088 [0x3c] = SSE41_OP(pmaxsb),
3089 [0x3d] = SSE41_OP(pmaxsd),
3090 [0x3e] = SSE41_OP(pmaxuw),
3091 [0x3f] = SSE41_OP(pmaxud),
3092 [0x40] = SSE41_OP(pmulld),
3093 [0x41] = SSE41_OP(phminposuw),
3094 [0xdb] = AESNI_OP(aesimc),
3095 [0xdc] = AESNI_OP(aesenc),
3096 [0xdd] = AESNI_OP(aesenclast),
3097 [0xde] = AESNI_OP(aesdec),
3098 [0xdf] = AESNI_OP(aesdeclast),
3099 };
3100
3101 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3102 [0x08] = SSE41_OP(roundps),
3103 [0x09] = SSE41_OP(roundpd),
3104 [0x0a] = SSE41_OP(roundss),
3105 [0x0b] = SSE41_OP(roundsd),
3106 [0x0c] = SSE41_OP(blendps),
3107 [0x0d] = SSE41_OP(blendpd),
3108 [0x0e] = SSE41_OP(pblendw),
3109 [0x0f] = SSSE3_OP(palignr),
3110 [0x14] = SSE41_SPECIAL, /* pextrb */
3111 [0x15] = SSE41_SPECIAL, /* pextrw */
3112 [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3113 [0x17] = SSE41_SPECIAL, /* extractps */
3114 [0x20] = SSE41_SPECIAL, /* pinsrb */
3115 [0x21] = SSE41_SPECIAL, /* insertps */
3116 [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3117 [0x40] = SSE41_OP(dpps),
3118 [0x41] = SSE41_OP(dppd),
3119 [0x42] = SSE41_OP(mpsadbw),
3120 [0x44] = PCLMULQDQ_OP(pclmulqdq),
3121 [0x60] = SSE42_OP(pcmpestrm),
3122 [0x61] = SSE42_OP(pcmpestri),
3123 [0x62] = SSE42_OP(pcmpistrm),
3124 [0x63] = SSE42_OP(pcmpistri),
3125 [0xdf] = AESNI_OP(aeskeygenassist),
3126 };
3127
3128 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3129 target_ulong pc_start)
3130 {
3131 int b1, op1_offset, op2_offset, is_xmm, val;
3132 int modrm, mod, rm, reg;
3133 SSEFunc_0_epp sse_fn_epp;
3134 SSEFunc_0_eppi sse_fn_eppi;
3135 SSEFunc_0_ppi sse_fn_ppi;
3136 SSEFunc_0_eppt sse_fn_eppt;
3137 MemOp ot;
3138
3139 b &= 0xff;
3140 if (s->prefix & PREFIX_DATA)
3141 b1 = 1;
3142 else if (s->prefix & PREFIX_REPZ)
3143 b1 = 2;
3144 else if (s->prefix & PREFIX_REPNZ)
3145 b1 = 3;
3146 else
3147 b1 = 0;
3148 sse_fn_epp = sse_op_table1[b][b1];
3149 if (!sse_fn_epp) {
3150 goto unknown_op;
3151 }
3152 if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3153 is_xmm = 1;
3154 } else {
3155 if (b1 == 0) {
3156 /* MMX case */
3157 is_xmm = 0;
3158 } else {
3159 is_xmm = 1;
3160 }
3161 }
3162 /* simple MMX/SSE operation */
3163 if (s->flags & HF_TS_MASK) {
3164 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3165 return;
3166 }
3167 if (s->flags & HF_EM_MASK) {
3168 illegal_op:
3169 gen_illegal_opcode(s);
3170 return;
3171 }
3172 if (is_xmm
3173 && !(s->flags & HF_OSFXSR_MASK)
3174 && (b != 0x38 && b != 0x3a)) {
3175 goto unknown_op;
3176 }
3177 if (b == 0x0e) {
3178 if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3179 /* If we were fully decoding this we might use illegal_op. */
3180 goto unknown_op;
3181 }
3182 /* femms */
3183 gen_helper_emms(cpu_env);
3184 return;
3185 }
3186 if (b == 0x77) {
3187 /* emms */
3188 gen_helper_emms(cpu_env);
3189 return;
3190 }
3191 /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3192 the static cpu state) */
3193 if (!is_xmm) {
3194 gen_helper_enter_mmx(cpu_env);
3195 }
3196
3197 modrm = x86_ldub_code(env, s);
3198 reg = ((modrm >> 3) & 7);
3199 if (is_xmm) {
3200 reg |= REX_R(s);
3201 }
3202 mod = (modrm >> 6) & 3;
3203 if (sse_fn_epp == SSE_SPECIAL) {
3204 b |= (b1 << 8);
3205 switch(b) {
3206 case 0x0e7: /* movntq */
3207 if (mod == 3) {
3208 goto illegal_op;
3209 }
3210 gen_lea_modrm(env, s, modrm);
3211 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3212 break;
3213 case 0x1e7: /* movntdq */
3214 case 0x02b: /* movntps */
3215 case 0x12b: /* movntps */
3216 if (mod == 3)
3217 goto illegal_op;
3218 gen_lea_modrm(env, s, modrm);
3219 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3220 break;
3221 case 0x3f0: /* lddqu */
3222 if (mod == 3)
3223 goto illegal_op;
3224 gen_lea_modrm(env, s, modrm);
3225 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3226 break;
3227 case 0x22b: /* movntss */
3228 case 0x32b: /* movntsd */
3229 if (mod == 3)
3230 goto illegal_op;
3231 gen_lea_modrm(env, s, modrm);
3232 if (b1 & 1) {
3233 gen_stq_env_A0(s, offsetof(CPUX86State,
3234 xmm_regs[reg].ZMM_Q(0)));
3235 } else {
3236 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3237 xmm_regs[reg].ZMM_L(0)));
3238 gen_op_st_v(s, MO_32, s->T0, s->A0);
3239 }
3240 break;
3241 case 0x6e: /* movd mm, ea */
3242 #ifdef TARGET_X86_64
3243 if (s->dflag == MO_64) {
3244 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3245 tcg_gen_st_tl(s->T0, cpu_env,
3246 offsetof(CPUX86State, fpregs[reg].mmx));
3247 } else
3248 #endif
3249 {
3250 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3251 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3252 offsetof(CPUX86State,fpregs[reg].mmx));
3253 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3254 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3255 }
3256 break;
3257 case 0x16e: /* movd xmm, ea */
3258 #ifdef TARGET_X86_64
3259 if (s->dflag == MO_64) {
3260 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3261 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3262 offsetof(CPUX86State,xmm_regs[reg]));
3263 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3264 } else
3265 #endif
3266 {
3267 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3268 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3269 offsetof(CPUX86State,xmm_regs[reg]));
3270 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3271 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3272 }
3273 break;
3274 case 0x6f: /* movq mm, ea */
3275 if (mod != 3) {
3276 gen_lea_modrm(env, s, modrm);
3277 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3278 } else {
3279 rm = (modrm & 7);
3280 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3281 offsetof(CPUX86State,fpregs[rm].mmx));
3282 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3283 offsetof(CPUX86State,fpregs[reg].mmx));
3284 }
3285 break;
3286 case 0x010: /* movups */
3287 case 0x110: /* movupd */
3288 case 0x028: /* movaps */
3289 case 0x128: /* movapd */
3290 case 0x16f: /* movdqa xmm, ea */
3291 case 0x26f: /* movdqu xmm, ea */
3292 if (mod != 3) {
3293 gen_lea_modrm(env, s, modrm);
3294 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3295 } else {
3296 rm = (modrm & 7) | REX_B(s);
3297 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3298 offsetof(CPUX86State,xmm_regs[rm]));
3299 }
3300 break;
3301 case 0x210: /* movss xmm, ea */
3302 if (mod != 3) {
3303 gen_lea_modrm(env, s, modrm);
3304 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3305 tcg_gen_st32_tl(s->T0, cpu_env,
3306 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3307 tcg_gen_movi_tl(s->T0, 0);
3308 tcg_gen_st32_tl(s->T0, cpu_env,
3309 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3310 tcg_gen_st32_tl(s->T0, cpu_env,
3311 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3312 tcg_gen_st32_tl(s->T0, cpu_env,
3313 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3314 } else {
3315 rm = (modrm & 7) | REX_B(s);
3316 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3317 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3318 }
3319 break;
3320 case 0x310: /* movsd xmm, ea */
3321 if (mod != 3) {
3322 gen_lea_modrm(env, s, modrm);
3323 gen_ldq_env_A0(s, offsetof(CPUX86State,
3324 xmm_regs[reg].ZMM_Q(0)));
3325 tcg_gen_movi_tl(s->T0, 0);
3326 tcg_gen_st32_tl(s->T0, cpu_env,
3327 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3328 tcg_gen_st32_tl(s->T0, cpu_env,
3329 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3330 } else {
3331 rm = (modrm & 7) | REX_B(s);
3332 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3333 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3334 }
3335 break;
3336 case 0x012: /* movlps */
3337 case 0x112: /* movlpd */
3338 if (mod != 3) {
3339 gen_lea_modrm(env, s, modrm);
3340 gen_ldq_env_A0(s, offsetof(CPUX86State,
3341 xmm_regs[reg].ZMM_Q(0)));
3342 } else {
3343 /* movhlps */
3344 rm = (modrm & 7) | REX_B(s);
3345 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3346 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3347 }
3348 break;
3349 case 0x212: /* movsldup */
3350 if (mod != 3) {
3351 gen_lea_modrm(env, s, modrm);
3352 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3353 } else {
3354 rm = (modrm & 7) | REX_B(s);
3355 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3356 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3357 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3358 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3359 }
3360 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3361 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3362 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3363 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3364 break;
3365 case 0x312: /* movddup */
3366 if (mod != 3) {
3367 gen_lea_modrm(env, s, modrm);
3368 gen_ldq_env_A0(s, offsetof(CPUX86State,
3369 xmm_regs[reg].ZMM_Q(0)));
3370 } else {
3371 rm = (modrm & 7) | REX_B(s);
3372 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3373 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3374 }
3375 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3376 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3377 break;
3378 case 0x016: /* movhps */
3379 case 0x116: /* movhpd */
3380 if (mod != 3) {
3381 gen_lea_modrm(env, s, modrm);
3382 gen_ldq_env_A0(s, offsetof(CPUX86State,
3383 xmm_regs[reg].ZMM_Q(1)));
3384 } else {
3385 /* movlhps */
3386 rm = (modrm & 7) | REX_B(s);
3387 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3388 offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3389 }
3390 break;
3391 case 0x216: /* movshdup */
3392 if (mod != 3) {
3393 gen_lea_modrm(env, s, modrm);
3394 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3395 } else {
3396 rm = (modrm & 7) | REX_B(s);
3397 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3398 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3399 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3400 offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3401 }
3402 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3403 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3404 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3405 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3406 break;
3407 case 0x178:
3408 case 0x378:
3409 {
3410 int bit_index, field_length;
3411
3412 if (b1 == 1 && reg != 0)
3413 goto illegal_op;
3414 field_length = x86_ldub_code(env, s) & 0x3F;
3415 bit_index = x86_ldub_code(env, s) & 0x3F;
3416 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3417 offsetof(CPUX86State,xmm_regs[reg]));
3418 if (b1 == 1)
3419 gen_helper_extrq_i(cpu_env, s->ptr0,
3420 tcg_const_i32(bit_index),
3421 tcg_const_i32(field_length));
3422 else
3423 gen_helper_insertq_i(cpu_env, s->ptr0,
3424 tcg_const_i32(bit_index),
3425 tcg_const_i32(field_length));
3426 }
3427 break;
3428 case 0x7e: /* movd ea, mm */
3429 #ifdef TARGET_X86_64
3430 if (s->dflag == MO_64) {
3431 tcg_gen_ld_i64(s->T0, cpu_env,
3432 offsetof(CPUX86State,fpregs[reg].mmx));
3433 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3434 } else
3435 #endif
3436 {
3437 tcg_gen_ld32u_tl(s->T0, cpu_env,
3438 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3439 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3440 }
3441 break;
3442 case 0x17e: /* movd ea, xmm */
3443 #ifdef TARGET_X86_64
3444 if (s->dflag == MO_64) {
3445 tcg_gen_ld_i64(s->T0, cpu_env,
3446 offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3447 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3448 } else
3449 #endif
3450 {
3451 tcg_gen_ld32u_tl