tcg-aarch64: Introduce tcg_out_insn_3405
[qemu.git] / tcg / aarch64 / tcg-target.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifndef NDEBUG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28",
27 "%fp", /* frame pointer */
28 "%lr", /* link register */
29 "%sp", /* stack pointer */
30 };
31 #endif /* NDEBUG */
32
33 #ifdef TARGET_WORDS_BIGENDIAN
34 #define TCG_LDST_BSWAP 1
35 #else
36 #define TCG_LDST_BSWAP 0
37 #endif
38
39 static const int tcg_target_reg_alloc_order[] = {
40 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
41 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
42 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
43
44 TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12,
45 TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
46 TCG_REG_X16, TCG_REG_X17,
47
48 TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */
49
50 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
51 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
52
53 TCG_REG_X8, /* will not use, see tcg_target_init */
54 };
55
56 static const int tcg_target_call_iarg_regs[8] = {
57 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
58 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
59 };
60 static const int tcg_target_call_oarg_regs[1] = {
61 TCG_REG_X0
62 };
63
64 #define TCG_REG_TMP TCG_REG_X8
65
66 #ifndef CONFIG_SOFTMMU
67 # if defined(CONFIG_USE_GUEST_BASE)
68 # define TCG_REG_GUEST_BASE TCG_REG_X28
69 # else
70 # define TCG_REG_GUEST_BASE TCG_REG_XZR
71 # endif
72 #endif
73
74 static inline void reloc_pc26(void *code_ptr, intptr_t target)
75 {
76 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
77 /* read instruction, mask away previous PC_REL26 parameter contents,
78 set the proper offset, then write back the instruction. */
79 uint32_t insn = *(uint32_t *)code_ptr;
80 insn = deposit32(insn, 0, 26, offset);
81 *(uint32_t *)code_ptr = insn;
82 }
83
84 static inline void reloc_pc19(void *code_ptr, intptr_t target)
85 {
86 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
87 /* read instruction, mask away previous PC_REL19 parameter contents,
88 set the proper offset, then write back the instruction. */
89 uint32_t insn = *(uint32_t *)code_ptr;
90 insn = deposit32(insn, 5, 19, offset);
91 *(uint32_t *)code_ptr = insn;
92 }
93
94 static inline void patch_reloc(uint8_t *code_ptr, int type,
95 intptr_t value, intptr_t addend)
96 {
97 value += addend;
98
99 switch (type) {
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, value);
103 break;
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, value);
106 break;
107
108 default:
109 tcg_abort();
110 }
111 }
112
113 #define TCG_CT_CONST_IS32 0x100
114 #define TCG_CT_CONST_AIMM 0x200
115 #define TCG_CT_CONST_LIMM 0x400
116 #define TCG_CT_CONST_ZERO 0x800
117 #define TCG_CT_CONST_MONE 0x1000
118
119 /* parse target specific constraints */
120 static int target_parse_constraint(TCGArgConstraint *ct,
121 const char **pct_str)
122 {
123 const char *ct_str = *pct_str;
124
125 switch (ct_str[0]) {
126 case 'r':
127 ct->ct |= TCG_CT_REG;
128 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
129 break;
130 case 'l': /* qemu_ld / qemu_st address, data_reg */
131 ct->ct |= TCG_CT_REG;
132 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
133 #ifdef CONFIG_SOFTMMU
134 /* x0 and x1 will be overwritten when reading the tlb entry,
135 and x2, and x3 for helper args, better to avoid using them. */
136 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
137 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
138 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
139 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
140 #endif
141 break;
142 case 'w': /* The operand should be considered 32-bit. */
143 ct->ct |= TCG_CT_CONST_IS32;
144 break;
145 case 'A': /* Valid for arithmetic immediate (positive or negative). */
146 ct->ct |= TCG_CT_CONST_AIMM;
147 break;
148 case 'L': /* Valid for logical immediate. */
149 ct->ct |= TCG_CT_CONST_LIMM;
150 break;
151 case 'M': /* minus one */
152 ct->ct |= TCG_CT_CONST_MONE;
153 break;
154 case 'Z': /* zero */
155 ct->ct |= TCG_CT_CONST_ZERO;
156 break;
157 default:
158 return -1;
159 }
160
161 ct_str++;
162 *pct_str = ct_str;
163 return 0;
164 }
165
166 static inline bool is_aimm(uint64_t val)
167 {
168 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
169 }
170
171 static inline bool is_limm(uint64_t val)
172 {
173 /* Taking a simplified view of the logical immediates for now, ignoring
174 the replication that can happen across the field. Match bit patterns
175 of the forms
176 0....01....1
177 0..01..10..0
178 and their inverses. */
179
180 /* Make things easier below, by testing the form with msb clear. */
181 if ((int64_t)val < 0) {
182 val = ~val;
183 }
184 if (val == 0) {
185 return false;
186 }
187 val += val & -val;
188 return (val & (val - 1)) == 0;
189 }
190
191 static int tcg_target_const_match(tcg_target_long val,
192 const TCGArgConstraint *arg_ct)
193 {
194 int ct = arg_ct->ct;
195
196 if (ct & TCG_CT_CONST) {
197 return 1;
198 }
199 if (ct & TCG_CT_CONST_IS32) {
200 val = (int32_t)val;
201 }
202 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
203 return 1;
204 }
205 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
206 return 1;
207 }
208 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
209 return 1;
210 }
211 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
212 return 1;
213 }
214
215 return 0;
216 }
217
218 enum aarch64_cond_code {
219 COND_EQ = 0x0,
220 COND_NE = 0x1,
221 COND_CS = 0x2, /* Unsigned greater or equal */
222 COND_HS = COND_CS, /* ALIAS greater or equal */
223 COND_CC = 0x3, /* Unsigned less than */
224 COND_LO = COND_CC, /* ALIAS Lower */
225 COND_MI = 0x4, /* Negative */
226 COND_PL = 0x5, /* Zero or greater */
227 COND_VS = 0x6, /* Overflow */
228 COND_VC = 0x7, /* No overflow */
229 COND_HI = 0x8, /* Unsigned greater than */
230 COND_LS = 0x9, /* Unsigned less or equal */
231 COND_GE = 0xa,
232 COND_LT = 0xb,
233 COND_GT = 0xc,
234 COND_LE = 0xd,
235 COND_AL = 0xe,
236 COND_NV = 0xf, /* behaves like COND_AL here */
237 };
238
239 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
240 [TCG_COND_EQ] = COND_EQ,
241 [TCG_COND_NE] = COND_NE,
242 [TCG_COND_LT] = COND_LT,
243 [TCG_COND_GE] = COND_GE,
244 [TCG_COND_LE] = COND_LE,
245 [TCG_COND_GT] = COND_GT,
246 /* unsigned */
247 [TCG_COND_LTU] = COND_LO,
248 [TCG_COND_GTU] = COND_HI,
249 [TCG_COND_GEU] = COND_HS,
250 [TCG_COND_LEU] = COND_LS,
251 };
252
253 /* opcodes for LDR / STR instructions with base + simm9 addressing */
254 enum aarch64_ldst_op_data { /* size of the data moved */
255 LDST_8 = 0x38,
256 LDST_16 = 0x78,
257 LDST_32 = 0xb8,
258 LDST_64 = 0xf8,
259 };
260 enum aarch64_ldst_op_type { /* type of operation */
261 LDST_ST = 0x0, /* store */
262 LDST_LD = 0x4, /* load */
263 LDST_LD_S_X = 0x8, /* load and sign-extend into Xt */
264 LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */
265 };
266
267 /* We encode the format of the insn into the beginning of the name, so that
268 we can have the preprocessor help "typecheck" the insn vs the output
269 function. Arm didn't provide us with nice names for the formats, so we
270 use the section number of the architecture reference manual in which the
271 instruction group is described. */
272 typedef enum {
273 /* Add/subtract immediate instructions. */
274 I3401_ADDI = 0x11000000,
275 I3401_ADDSI = 0x31000000,
276 I3401_SUBI = 0x51000000,
277 I3401_SUBSI = 0x71000000,
278
279 /* Bitfield instructions. */
280 I3402_BFM = 0x33000000,
281 I3402_SBFM = 0x13000000,
282 I3402_UBFM = 0x53000000,
283
284 /* Extract instruction. */
285 I3403_EXTR = 0x13800000,
286
287 /* Logical immediate instructions. */
288 I3404_ANDI = 0x12000000,
289 I3404_ORRI = 0x32000000,
290 I3404_EORI = 0x52000000,
291
292 /* Move wide immediate instructions. */
293 I3405_MOVN = 0x12800000,
294 I3405_MOVZ = 0x52800000,
295 I3405_MOVK = 0x72800000,
296
297 /* Add/subtract shifted register instructions (without a shift). */
298 I3502_ADD = 0x0b000000,
299 I3502_ADDS = 0x2b000000,
300 I3502_SUB = 0x4b000000,
301 I3502_SUBS = 0x6b000000,
302
303 /* Add/subtract shifted register instructions (with a shift). */
304 I3502S_ADD_LSL = I3502_ADD,
305
306 /* Add/subtract with carry instructions. */
307 I3503_ADC = 0x1a000000,
308 I3503_SBC = 0x5a000000,
309
310 /* Conditional select instructions. */
311 I3506_CSEL = 0x1a800000,
312 I3506_CSINC = 0x1a800400,
313
314 /* Data-processing (2 source) instructions. */
315 I3508_LSLV = 0x1ac02000,
316 I3508_LSRV = 0x1ac02400,
317 I3508_ASRV = 0x1ac02800,
318 I3508_RORV = 0x1ac02c00,
319 I3508_SMULH = 0x9b407c00,
320 I3508_UMULH = 0x9bc07c00,
321 I3508_UDIV = 0x1ac00800,
322 I3508_SDIV = 0x1ac00c00,
323
324 /* Data-processing (3 source) instructions. */
325 I3509_MADD = 0x1b000000,
326 I3509_MSUB = 0x1b008000,
327
328 /* Logical shifted register instructions (without a shift). */
329 I3510_AND = 0x0a000000,
330 I3510_BIC = 0x0a200000,
331 I3510_ORR = 0x2a000000,
332 I3510_ORN = 0x2a200000,
333 I3510_EOR = 0x4a000000,
334 I3510_EON = 0x4a200000,
335 I3510_ANDS = 0x6a000000,
336 } AArch64Insn;
337
338 static inline enum aarch64_ldst_op_data
339 aarch64_ldst_get_data(TCGOpcode tcg_op)
340 {
341 switch (tcg_op) {
342 case INDEX_op_ld8u_i32:
343 case INDEX_op_ld8s_i32:
344 case INDEX_op_ld8u_i64:
345 case INDEX_op_ld8s_i64:
346 case INDEX_op_st8_i32:
347 case INDEX_op_st8_i64:
348 return LDST_8;
349
350 case INDEX_op_ld16u_i32:
351 case INDEX_op_ld16s_i32:
352 case INDEX_op_ld16u_i64:
353 case INDEX_op_ld16s_i64:
354 case INDEX_op_st16_i32:
355 case INDEX_op_st16_i64:
356 return LDST_16;
357
358 case INDEX_op_ld_i32:
359 case INDEX_op_st_i32:
360 case INDEX_op_ld32u_i64:
361 case INDEX_op_ld32s_i64:
362 case INDEX_op_st32_i64:
363 return LDST_32;
364
365 case INDEX_op_ld_i64:
366 case INDEX_op_st_i64:
367 return LDST_64;
368
369 default:
370 tcg_abort();
371 }
372 }
373
374 static inline enum aarch64_ldst_op_type
375 aarch64_ldst_get_type(TCGOpcode tcg_op)
376 {
377 switch (tcg_op) {
378 case INDEX_op_st8_i32:
379 case INDEX_op_st16_i32:
380 case INDEX_op_st8_i64:
381 case INDEX_op_st16_i64:
382 case INDEX_op_st_i32:
383 case INDEX_op_st32_i64:
384 case INDEX_op_st_i64:
385 return LDST_ST;
386
387 case INDEX_op_ld8u_i32:
388 case INDEX_op_ld16u_i32:
389 case INDEX_op_ld8u_i64:
390 case INDEX_op_ld16u_i64:
391 case INDEX_op_ld_i32:
392 case INDEX_op_ld32u_i64:
393 case INDEX_op_ld_i64:
394 return LDST_LD;
395
396 case INDEX_op_ld8s_i32:
397 case INDEX_op_ld16s_i32:
398 return LDST_LD_S_W;
399
400 case INDEX_op_ld8s_i64:
401 case INDEX_op_ld16s_i64:
402 case INDEX_op_ld32s_i64:
403 return LDST_LD_S_X;
404
405 default:
406 tcg_abort();
407 }
408 }
409
410 static inline uint32_t tcg_in32(TCGContext *s)
411 {
412 uint32_t v = *(uint32_t *)s->code_ptr;
413 return v;
414 }
415
416 /* Emit an opcode with "type-checking" of the format. */
417 #define tcg_out_insn(S, FMT, OP, ...) \
418 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
419
420 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
421 TCGReg rd, TCGReg rn, uint64_t aimm)
422 {
423 if (aimm > 0xfff) {
424 assert((aimm & 0xfff) == 0);
425 aimm >>= 12;
426 assert(aimm <= 0xfff);
427 aimm |= 1 << 12; /* apply LSL 12 */
428 }
429 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
430 }
431
432 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
433 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
434 that feed the DecodeBitMasks pseudo function. */
435 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
436 TCGReg rd, TCGReg rn, int n, int immr, int imms)
437 {
438 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
439 | rn << 5 | rd);
440 }
441
442 #define tcg_out_insn_3404 tcg_out_insn_3402
443
444 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
445 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
446 {
447 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
448 | rn << 5 | rd);
449 }
450
451 /* This function is used for the Move (wide immediate) instruction group.
452 Note that SHIFT is a full shift count, not the 2 bit HW field. */
453 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
454 TCGReg rd, uint16_t half, unsigned shift)
455 {
456 assert((shift & ~0x30) == 0);
457 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
458 }
459
460 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
461 the rare occasion when we actually want to supply a shift amount. */
462 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
463 TCGType ext, TCGReg rd, TCGReg rn,
464 TCGReg rm, int imm6)
465 {
466 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
467 }
468
469 /* This function is for 3.5.2 (Add/subtract shifted register),
470 and 3.5.10 (Logical shifted register), for the vast majorty of cases
471 when we don't want to apply a shift. Thus it can also be used for
472 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
473 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
474 TCGReg rd, TCGReg rn, TCGReg rm)
475 {
476 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
477 }
478
479 #define tcg_out_insn_3503 tcg_out_insn_3502
480 #define tcg_out_insn_3508 tcg_out_insn_3502
481 #define tcg_out_insn_3510 tcg_out_insn_3502
482
483 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
484 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
485 {
486 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
487 | tcg_cond_to_aarch64[c] << 12);
488 }
489
490 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
491 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
492 {
493 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
494 }
495
496
497 static inline void tcg_out_ldst_9(TCGContext *s,
498 enum aarch64_ldst_op_data op_data,
499 enum aarch64_ldst_op_type op_type,
500 TCGReg rd, TCGReg rn, tcg_target_long offset)
501 {
502 /* use LDUR with BASE register with 9bit signed unscaled offset */
503 tcg_out32(s, op_data << 24 | op_type << 20
504 | (offset & 0x1ff) << 12 | rn << 5 | rd);
505 }
506
507 /* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
508 static inline void tcg_out_ldst_12(TCGContext *s,
509 enum aarch64_ldst_op_data op_data,
510 enum aarch64_ldst_op_type op_type,
511 TCGReg rd, TCGReg rn,
512 tcg_target_ulong scaled_uimm)
513 {
514 tcg_out32(s, (op_data | 1) << 24
515 | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
516 }
517
518 /* Register to register move using ORR (shifted register with no shift). */
519 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
520 {
521 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
522 }
523
524 /* Register to register move using ADDI (move to/from SP). */
525 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
526 {
527 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
528 }
529
530 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
531 tcg_target_long value)
532 {
533 AArch64Insn insn;
534
535 if (type == TCG_TYPE_I32) {
536 value = (uint32_t)value;
537 }
538
539 /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
540 first MOVZ with the half-word immediate skipping the zeros, with a shift
541 (LSL) equal to this number. Then all next instructions use MOVKs.
542 Zero the processed half-word in the value, continue until empty.
543 We build the final result 16bits at a time with up to 4 instructions,
544 but do not emit instructions for 16bit zero holes. */
545 insn = I3405_MOVZ;
546 do {
547 unsigned shift = ctz64(value) & (63 & -16);
548 tcg_out_insn_3405(s, insn, shift >= 32, rd, value >> shift, shift);
549 value &= ~(0xffffUL << shift);
550 insn = I3405_MOVK;
551 } while (value);
552 }
553
554 static inline void tcg_out_ldst_r(TCGContext *s,
555 enum aarch64_ldst_op_data op_data,
556 enum aarch64_ldst_op_type op_type,
557 TCGReg rd, TCGReg base, TCGReg regoff)
558 {
559 /* load from memory to register using base + 64bit register offset */
560 /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
561 /* the 0x6000 is for the "no extend field" */
562 tcg_out32(s, 0x00206800
563 | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
564 }
565
566 /* solve the whole ldst problem */
567 static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
568 enum aarch64_ldst_op_type type,
569 TCGReg rd, TCGReg rn, tcg_target_long offset)
570 {
571 if (offset >= -256 && offset < 256) {
572 tcg_out_ldst_9(s, data, type, rd, rn, offset);
573 return;
574 }
575
576 if (offset >= 256) {
577 /* if the offset is naturally aligned and in range,
578 then we can use the scaled uimm12 encoding */
579 unsigned int s_bits = data >> 6;
580 if (!(offset & ((1 << s_bits) - 1))) {
581 tcg_target_ulong scaled_uimm = offset >> s_bits;
582 if (scaled_uimm <= 0xfff) {
583 tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm);
584 return;
585 }
586 }
587 }
588
589 /* worst-case scenario, move offset to temp register, use reg offset */
590 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
591 tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
592 }
593
594 static inline void tcg_out_mov(TCGContext *s,
595 TCGType type, TCGReg ret, TCGReg arg)
596 {
597 if (ret != arg) {
598 tcg_out_movr(s, type == TCG_TYPE_I64, ret, arg);
599 }
600 }
601
602 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
603 TCGReg arg1, intptr_t arg2)
604 {
605 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD,
606 arg, arg1, arg2);
607 }
608
609 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
610 TCGReg arg1, intptr_t arg2)
611 {
612 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST,
613 arg, arg1, arg2);
614 }
615
616 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
617 TCGReg rn, unsigned int a, unsigned int b)
618 {
619 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
620 }
621
622 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
623 TCGReg rn, unsigned int a, unsigned int b)
624 {
625 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
626 }
627
628 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
629 TCGReg rn, unsigned int a, unsigned int b)
630 {
631 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
632 }
633
634 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
635 TCGReg rn, TCGReg rm, unsigned int a)
636 {
637 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
638 }
639
640 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
641 TCGReg rd, TCGReg rn, unsigned int m)
642 {
643 int bits = ext ? 64 : 32;
644 int max = bits - 1;
645 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
646 }
647
648 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
649 TCGReg rd, TCGReg rn, unsigned int m)
650 {
651 int max = ext ? 63 : 31;
652 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
653 }
654
655 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
656 TCGReg rd, TCGReg rn, unsigned int m)
657 {
658 int max = ext ? 63 : 31;
659 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
660 }
661
662 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
663 TCGReg rd, TCGReg rn, unsigned int m)
664 {
665 int max = ext ? 63 : 31;
666 tcg_out_extr(s, ext, rd, rn, rn, m & max);
667 }
668
669 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
670 TCGReg rd, TCGReg rn, unsigned int m)
671 {
672 int bits = ext ? 64 : 32;
673 int max = bits - 1;
674 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
675 }
676
677 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
678 TCGReg rn, unsigned lsb, unsigned width)
679 {
680 unsigned size = ext ? 64 : 32;
681 unsigned a = (size - lsb) & (size - 1);
682 unsigned b = width - 1;
683 tcg_out_bfm(s, ext, rd, rn, a, b);
684 }
685
686 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
687 tcg_target_long b, bool const_b)
688 {
689 if (const_b) {
690 /* Using CMP or CMN aliases. */
691 if (b >= 0) {
692 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
693 } else {
694 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
695 }
696 } else {
697 /* Using CMP alias SUBS wzr, Wn, Wm */
698 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
699 }
700 }
701
702 static inline void tcg_out_goto(TCGContext *s, intptr_t target)
703 {
704 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
705
706 if (offset < -0x02000000 || offset >= 0x02000000) {
707 /* out of 26bit range */
708 tcg_abort();
709 }
710
711 tcg_out32(s, 0x14000000 | (offset & 0x03ffffff));
712 }
713
714 static inline void tcg_out_goto_noaddr(TCGContext *s)
715 {
716 /* We pay attention here to not modify the branch target by
717 reading from the buffer. This ensure that caches and memory are
718 kept coherent during retranslation.
719 Mask away possible garbage in the high bits for the first translation,
720 while keeping the offset bits for retranslation. */
721 uint32_t insn;
722 insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000;
723 tcg_out32(s, insn);
724 }
725
726 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
727 {
728 /* see comments in tcg_out_goto_noaddr */
729 uint32_t insn;
730 insn = tcg_in32(s) & (0x07ffff << 5);
731 insn |= 0x54000000 | tcg_cond_to_aarch64[c];
732 tcg_out32(s, insn);
733 }
734
735 static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c, intptr_t target)
736 {
737 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
738
739 if (offset < -0x40000 || offset >= 0x40000) {
740 /* out of 19bit range */
741 tcg_abort();
742 }
743
744 offset &= 0x7ffff;
745 tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5);
746 }
747
748 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
749 {
750 tcg_out32(s, 0xd63f0000 | reg << 5);
751 }
752
753 static inline void tcg_out_gotor(TCGContext *s, TCGReg reg)
754 {
755 tcg_out32(s, 0xd61f0000 | reg << 5);
756 }
757
758 static inline void tcg_out_call(TCGContext *s, intptr_t target)
759 {
760 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
761
762 if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
763 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
764 tcg_out_callr(s, TCG_REG_TMP);
765 } else {
766 tcg_out32(s, 0x94000000 | (offset & 0x03ffffff));
767 }
768 }
769
770 static inline void tcg_out_ret(TCGContext *s)
771 {
772 /* emit RET { LR } */
773 tcg_out32(s, 0xd65f03c0);
774 }
775
776 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
777 {
778 intptr_t target = addr;
779 intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
780
781 if (offset < -0x02000000 || offset >= 0x02000000) {
782 /* out of 26bit range */
783 tcg_abort();
784 }
785
786 patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
787 flush_icache_range(jmp_addr, jmp_addr + 4);
788 }
789
790 static inline void tcg_out_goto_label(TCGContext *s, int label_index)
791 {
792 TCGLabel *l = &s->labels[label_index];
793
794 if (!l->has_value) {
795 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
796 tcg_out_goto_noaddr(s);
797 } else {
798 tcg_out_goto(s, l->u.value);
799 }
800 }
801
802 static inline void tcg_out_goto_label_cond(TCGContext *s,
803 TCGCond c, int label_index)
804 {
805 TCGLabel *l = &s->labels[label_index];
806
807 if (!l->has_value) {
808 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label_index, 0);
809 tcg_out_goto_cond_noaddr(s, c);
810 } else {
811 tcg_out_goto_cond(s, c, l->u.value);
812 }
813 }
814
815 static inline void tcg_out_rev(TCGContext *s, TCGType ext,
816 TCGReg rd, TCGReg rm)
817 {
818 /* using REV 0x5ac00800 */
819 unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
820 tcg_out32(s, base | rm << 5 | rd);
821 }
822
823 static inline void tcg_out_rev16(TCGContext *s, TCGType ext,
824 TCGReg rd, TCGReg rm)
825 {
826 /* using REV16 0x5ac00400 */
827 unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
828 tcg_out32(s, base | rm << 5 | rd);
829 }
830
831 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, int s_bits,
832 TCGReg rd, TCGReg rn)
833 {
834 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
835 int bits = 8 * (1 << s_bits) - 1;
836 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
837 }
838
839 static inline void tcg_out_uxt(TCGContext *s, int s_bits,
840 TCGReg rd, TCGReg rn)
841 {
842 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
843 int bits = 8 * (1 << s_bits) - 1;
844 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
845 }
846
847 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
848 TCGReg rn, int64_t aimm)
849 {
850 if (aimm >= 0) {
851 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
852 } else {
853 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
854 }
855 }
856
857 /* This function is used for the Logical (immediate) instruction group.
858 The value of LIMM must satisfy IS_LIMM. See the comment above about
859 only supporting simplified logical immediates. */
860 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
861 TCGReg rd, TCGReg rn, uint64_t limm)
862 {
863 unsigned h, l, r, c;
864
865 assert(is_limm(limm));
866
867 h = clz64(limm);
868 l = ctz64(limm);
869 if (l == 0) {
870 r = 0; /* form 0....01....1 */
871 c = ctz64(~limm) - 1;
872 if (h == 0) {
873 r = clz64(~limm); /* form 1..10..01..1 */
874 c += r;
875 }
876 } else {
877 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
878 c = r - h - 1;
879 }
880 if (ext == TCG_TYPE_I32) {
881 r &= 31;
882 c &= 31;
883 }
884
885 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
886 }
887
888 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
889 TCGReg rh, TCGReg al, TCGReg ah,
890 tcg_target_long bl, tcg_target_long bh,
891 bool const_bl, bool const_bh, bool sub)
892 {
893 TCGReg orig_rl = rl;
894 AArch64Insn insn;
895
896 if (rl == ah || (!const_bh && rl == bh)) {
897 rl = TCG_REG_TMP;
898 }
899
900 if (const_bl) {
901 insn = I3401_ADDSI;
902 if ((bl < 0) ^ sub) {
903 insn = I3401_SUBSI;
904 bl = -bl;
905 }
906 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
907 } else {
908 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
909 }
910
911 insn = I3503_ADC;
912 if (const_bh) {
913 /* Note that the only two constants we support are 0 and -1, and
914 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
915 if ((bh != 0) ^ sub) {
916 insn = I3503_SBC;
917 }
918 bh = TCG_REG_XZR;
919 } else if (sub) {
920 insn = I3503_SBC;
921 }
922 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
923
924 if (rl != orig_rl) {
925 tcg_out_movr(s, ext, orig_rl, rl);
926 }
927 }
928
929 #ifdef CONFIG_SOFTMMU
930 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
931 * int mmu_idx, uintptr_t ra)
932 */
933 static const void * const qemu_ld_helpers[4] = {
934 helper_ret_ldub_mmu,
935 helper_ret_lduw_mmu,
936 helper_ret_ldul_mmu,
937 helper_ret_ldq_mmu,
938 };
939
940 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
941 * uintxx_t val, int mmu_idx, uintptr_t ra)
942 */
943 static const void * const qemu_st_helpers[4] = {
944 helper_ret_stb_mmu,
945 helper_ret_stw_mmu,
946 helper_ret_stl_mmu,
947 helper_ret_stq_mmu,
948 };
949
950 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
951 {
952 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
953
954 tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
955 tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
956 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
957 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X3, (tcg_target_long)lb->raddr);
958 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
959 (tcg_target_long)qemu_ld_helpers[lb->opc & 3]);
960 tcg_out_callr(s, TCG_REG_TMP);
961 if (lb->opc & 0x04) {
962 tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0);
963 } else {
964 tcg_out_movr(s, 1, lb->datalo_reg, TCG_REG_X0);
965 }
966
967 tcg_out_goto(s, (intptr_t)lb->raddr);
968 }
969
970 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
971 {
972 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
973
974 tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
975 tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
976 tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg);
977 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
978 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (intptr_t)lb->raddr);
979 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
980 (intptr_t)qemu_st_helpers[lb->opc & 3]);
981 tcg_out_callr(s, TCG_REG_TMP);
982 tcg_out_goto(s, (tcg_target_long)lb->raddr);
983 }
984
985 static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
986 TCGReg data_reg, TCGReg addr_reg,
987 int mem_index,
988 uint8_t *raddr, uint8_t *label_ptr)
989 {
990 TCGLabelQemuLdst *label = new_ldst_label(s);
991
992 label->is_ld = is_ld;
993 label->opc = opc;
994 label->datalo_reg = data_reg;
995 label->addrlo_reg = addr_reg;
996 label->mem_index = mem_index;
997 label->raddr = raddr;
998 label->label_ptr[0] = label_ptr;
999 }
1000
1001 /* Load and compare a TLB entry, emitting the conditional jump to the
1002 slow path for the failure case, which will be patched later when finalizing
1003 the slow path. Generated code returns the host addend in X1,
1004 clobbers X0,X2,X3,TMP. */
1005 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
1006 int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
1007 {
1008 TCGReg base = TCG_AREG0;
1009 int tlb_offset = is_read ?
1010 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1011 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1012 /* Extract the TLB index from the address into X0.
1013 X0<CPU_TLB_BITS:0> =
1014 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1015 tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
1016 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1017 /* Store the page mask part of the address and the low s_bits into X3.
1018 Later this allows checking for equality and alignment at the same time.
1019 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
1020 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
1021 addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1022 /* Add any "high bits" from the tlb offset to the env address into X2,
1023 to take advantage of the LSL12 form of the ADDI instruction.
1024 X2 = env + (tlb_offset & 0xfff000) */
1025 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1026 tlb_offset & 0xfff000);
1027 /* Merge the tlb index contribution into X2.
1028 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1029 tcg_out_insn(s, 3502S, ADD_LSL, 1, TCG_REG_X2, TCG_REG_X2,
1030 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1031 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1032 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1033 tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
1034 LDST_LD, TCG_REG_X0, TCG_REG_X2,
1035 (tlb_offset & 0xfff));
1036 /* Load the tlb addend. Do that early to avoid stalling.
1037 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1038 tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
1039 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1040 (is_read ? offsetof(CPUTLBEntry, addr_read)
1041 : offsetof(CPUTLBEntry, addr_write)));
1042 /* Perform the address comparison. */
1043 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1044 *label_ptr = s->code_ptr;
1045 /* If not equal, we jump to the slow path. */
1046 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1047 }
1048
1049 #endif /* CONFIG_SOFTMMU */
1050
1051 static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r,
1052 TCGReg addr_r, TCGReg off_r)
1053 {
1054 switch (opc) {
1055 case 0:
1056 tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r);
1057 break;
1058 case 0 | 4:
1059 tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r);
1060 break;
1061 case 1:
1062 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
1063 if (TCG_LDST_BSWAP) {
1064 tcg_out_rev16(s, 0, data_r, data_r);
1065 }
1066 break;
1067 case 1 | 4:
1068 if (TCG_LDST_BSWAP) {
1069 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
1070 tcg_out_rev16(s, 0, data_r, data_r);
1071 tcg_out_sxt(s, 1, 1, data_r, data_r);
1072 } else {
1073 tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r);
1074 }
1075 break;
1076 case 2:
1077 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
1078 if (TCG_LDST_BSWAP) {
1079 tcg_out_rev(s, 0, data_r, data_r);
1080 }
1081 break;
1082 case 2 | 4:
1083 if (TCG_LDST_BSWAP) {
1084 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
1085 tcg_out_rev(s, 0, data_r, data_r);
1086 tcg_out_sxt(s, 1, 2, data_r, data_r);
1087 } else {
1088 tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r);
1089 }
1090 break;
1091 case 3:
1092 tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r);
1093 if (TCG_LDST_BSWAP) {
1094 tcg_out_rev(s, 1, data_r, data_r);
1095 }
1096 break;
1097 default:
1098 tcg_abort();
1099 }
1100 }
1101
1102 static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r,
1103 TCGReg addr_r, TCGReg off_r)
1104 {
1105 switch (opc) {
1106 case 0:
1107 tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r);
1108 break;
1109 case 1:
1110 if (TCG_LDST_BSWAP) {
1111 tcg_out_rev16(s, 0, TCG_REG_TMP, data_r);
1112 tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1113 } else {
1114 tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r);
1115 }
1116 break;
1117 case 2:
1118 if (TCG_LDST_BSWAP) {
1119 tcg_out_rev(s, 0, TCG_REG_TMP, data_r);
1120 tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1121 } else {
1122 tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r);
1123 }
1124 break;
1125 case 3:
1126 if (TCG_LDST_BSWAP) {
1127 tcg_out_rev(s, 1, TCG_REG_TMP, data_r);
1128 tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1129 } else {
1130 tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r);
1131 }
1132 break;
1133 default:
1134 tcg_abort();
1135 }
1136 }
1137
1138 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
1139 {
1140 TCGReg addr_reg, data_reg;
1141 #ifdef CONFIG_SOFTMMU
1142 int mem_index, s_bits;
1143 uint8_t *label_ptr;
1144 #endif
1145 data_reg = args[0];
1146 addr_reg = args[1];
1147
1148 #ifdef CONFIG_SOFTMMU
1149 mem_index = args[2];
1150 s_bits = opc & 3;
1151 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1152 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1153 add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg,
1154 mem_index, s->code_ptr, label_ptr);
1155 #else /* !CONFIG_SOFTMMU */
1156 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg,
1157 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1158 #endif /* CONFIG_SOFTMMU */
1159 }
1160
1161 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1162 {
1163 TCGReg addr_reg, data_reg;
1164 #ifdef CONFIG_SOFTMMU
1165 int mem_index, s_bits;
1166 uint8_t *label_ptr;
1167 #endif
1168 data_reg = args[0];
1169 addr_reg = args[1];
1170
1171 #ifdef CONFIG_SOFTMMU
1172 mem_index = args[2];
1173 s_bits = opc & 3;
1174
1175 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1176 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1177 add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg,
1178 mem_index, s->code_ptr, label_ptr);
1179 #else /* !CONFIG_SOFTMMU */
1180 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg,
1181 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1182 #endif /* CONFIG_SOFTMMU */
1183 }
1184
1185 static uint8_t *tb_ret_addr;
1186
1187 /* callee stack use example:
1188 stp x29, x30, [sp,#-32]!
1189 mov x29, sp
1190 stp x1, x2, [sp,#16]
1191 ...
1192 ldp x1, x2, [sp,#16]
1193 ldp x29, x30, [sp],#32
1194 ret
1195 */
1196
1197 /* push r1 and r2, and alloc stack space for a total of
1198 alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
1199 static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
1200 TCGReg r1, TCGReg r2, int alloc_n)
1201 {
1202 /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
1203 | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
1204 assert(alloc_n > 0 && alloc_n < 0x20);
1205 alloc_n = (-alloc_n) & 0x3f;
1206 tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1207 }
1208
1209 /* dealloc stack space for a total of alloc_n elements and pop r1, r2. */
1210 static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
1211 TCGReg r1, TCGReg r2, int alloc_n)
1212 {
1213 /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
1214 | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
1215 assert(alloc_n > 0 && alloc_n < 0x20);
1216 tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1217 }
1218
1219 static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
1220 TCGReg r1, TCGReg r2, int idx)
1221 {
1222 /* using register pair offset simm7 STP 0x29000000 | (ext)
1223 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1224 assert(idx > 0 && idx < 0x20);
1225 tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1226 }
1227
1228 static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
1229 TCGReg r1, TCGReg r2, int idx)
1230 {
1231 /* using register pair offset simm7 LDP 0x29400000 | (ext)
1232 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1233 assert(idx > 0 && idx < 0x20);
1234 tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1235 }
1236
1237 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1238 const TCGArg args[TCG_MAX_OP_ARGS],
1239 const int const_args[TCG_MAX_OP_ARGS])
1240 {
1241 /* 99% of the time, we can signal the use of extension registers
1242 by looking to see if the opcode handles 64-bit data. */
1243 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1244
1245 /* Hoist the loads of the most common arguments. */
1246 TCGArg a0 = args[0];
1247 TCGArg a1 = args[1];
1248 TCGArg a2 = args[2];
1249 int c2 = const_args[2];
1250
1251 /* Some operands are defined with "rZ" constraint, a register or
1252 the zero register. These need not actually test args[I] == 0. */
1253 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1254
1255 switch (opc) {
1256 case INDEX_op_exit_tb:
1257 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1258 tcg_out_goto(s, (intptr_t)tb_ret_addr);
1259 break;
1260
1261 case INDEX_op_goto_tb:
1262 #ifndef USE_DIRECT_JUMP
1263 #error "USE_DIRECT_JUMP required for aarch64"
1264 #endif
1265 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1266 s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
1267 /* actual branch destination will be patched by
1268 aarch64_tb_set_jmp_target later, beware retranslation. */
1269 tcg_out_goto_noaddr(s);
1270 s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
1271 break;
1272
1273 case INDEX_op_call:
1274 if (const_args[0]) {
1275 tcg_out_call(s, a0);
1276 } else {
1277 tcg_out_callr(s, a0);
1278 }
1279 break;
1280
1281 case INDEX_op_br:
1282 tcg_out_goto_label(s, a0);
1283 break;
1284
1285 case INDEX_op_ld_i32:
1286 case INDEX_op_ld_i64:
1287 case INDEX_op_st_i32:
1288 case INDEX_op_st_i64:
1289 case INDEX_op_ld8u_i32:
1290 case INDEX_op_ld8s_i32:
1291 case INDEX_op_ld16u_i32:
1292 case INDEX_op_ld16s_i32:
1293 case INDEX_op_ld8u_i64:
1294 case INDEX_op_ld8s_i64:
1295 case INDEX_op_ld16u_i64:
1296 case INDEX_op_ld16s_i64:
1297 case INDEX_op_ld32u_i64:
1298 case INDEX_op_ld32s_i64:
1299 case INDEX_op_st8_i32:
1300 case INDEX_op_st8_i64:
1301 case INDEX_op_st16_i32:
1302 case INDEX_op_st16_i64:
1303 case INDEX_op_st32_i64:
1304 tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
1305 a0, a1, a2);
1306 break;
1307
1308 case INDEX_op_add_i32:
1309 a2 = (int32_t)a2;
1310 /* FALLTHRU */
1311 case INDEX_op_add_i64:
1312 if (c2) {
1313 tcg_out_addsubi(s, ext, a0, a1, a2);
1314 } else {
1315 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1316 }
1317 break;
1318
1319 case INDEX_op_sub_i32:
1320 a2 = (int32_t)a2;
1321 /* FALLTHRU */
1322 case INDEX_op_sub_i64:
1323 if (c2) {
1324 tcg_out_addsubi(s, ext, a0, a1, -a2);
1325 } else {
1326 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1327 }
1328 break;
1329
1330 case INDEX_op_neg_i64:
1331 case INDEX_op_neg_i32:
1332 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1333 break;
1334
1335 case INDEX_op_and_i32:
1336 a2 = (int32_t)a2;
1337 /* FALLTHRU */
1338 case INDEX_op_and_i64:
1339 if (c2) {
1340 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1341 } else {
1342 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1343 }
1344 break;
1345
1346 case INDEX_op_andc_i32:
1347 a2 = (int32_t)a2;
1348 /* FALLTHRU */
1349 case INDEX_op_andc_i64:
1350 if (c2) {
1351 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1352 } else {
1353 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1354 }
1355 break;
1356
1357 case INDEX_op_or_i32:
1358 a2 = (int32_t)a2;
1359 /* FALLTHRU */
1360 case INDEX_op_or_i64:
1361 if (c2) {
1362 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1363 } else {
1364 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1365 }
1366 break;
1367
1368 case INDEX_op_orc_i32:
1369 a2 = (int32_t)a2;
1370 /* FALLTHRU */
1371 case INDEX_op_orc_i64:
1372 if (c2) {
1373 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1374 } else {
1375 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1376 }
1377 break;
1378
1379 case INDEX_op_xor_i32:
1380 a2 = (int32_t)a2;
1381 /* FALLTHRU */
1382 case INDEX_op_xor_i64:
1383 if (c2) {
1384 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1385 } else {
1386 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1387 }
1388 break;
1389
1390 case INDEX_op_eqv_i32:
1391 a2 = (int32_t)a2;
1392 /* FALLTHRU */
1393 case INDEX_op_eqv_i64:
1394 if (c2) {
1395 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1396 } else {
1397 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1398 }
1399 break;
1400
1401 case INDEX_op_not_i64:
1402 case INDEX_op_not_i32:
1403 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1404 break;
1405
1406 case INDEX_op_mul_i64:
1407 case INDEX_op_mul_i32:
1408 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1409 break;
1410
1411 case INDEX_op_div_i64:
1412 case INDEX_op_div_i32:
1413 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1414 break;
1415 case INDEX_op_divu_i64:
1416 case INDEX_op_divu_i32:
1417 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1418 break;
1419
1420 case INDEX_op_rem_i64:
1421 case INDEX_op_rem_i32:
1422 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1423 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1424 break;
1425 case INDEX_op_remu_i64:
1426 case INDEX_op_remu_i32:
1427 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1428 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1429 break;
1430
1431 case INDEX_op_shl_i64:
1432 case INDEX_op_shl_i32:
1433 if (c2) {
1434 tcg_out_shl(s, ext, a0, a1, a2);
1435 } else {
1436 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1437 }
1438 break;
1439
1440 case INDEX_op_shr_i64:
1441 case INDEX_op_shr_i32:
1442 if (c2) {
1443 tcg_out_shr(s, ext, a0, a1, a2);
1444 } else {
1445 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1446 }
1447 break;
1448
1449 case INDEX_op_sar_i64:
1450 case INDEX_op_sar_i32:
1451 if (c2) {
1452 tcg_out_sar(s, ext, a0, a1, a2);
1453 } else {
1454 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1455 }
1456 break;
1457
1458 case INDEX_op_rotr_i64:
1459 case INDEX_op_rotr_i32:
1460 if (c2) {
1461 tcg_out_rotr(s, ext, a0, a1, a2);
1462 } else {
1463 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1464 }
1465 break;
1466
1467 case INDEX_op_rotl_i64:
1468 case INDEX_op_rotl_i32:
1469 if (c2) {
1470 tcg_out_rotl(s, ext, a0, a1, a2);
1471 } else {
1472 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1473 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1474 }
1475 break;
1476
1477 case INDEX_op_brcond_i32:
1478 a1 = (int32_t)a1;
1479 /* FALLTHRU */
1480 case INDEX_op_brcond_i64:
1481 tcg_out_cmp(s, ext, a0, a1, const_args[1]);
1482 tcg_out_goto_label_cond(s, a2, args[3]);
1483 break;
1484
1485 case INDEX_op_setcond_i32:
1486 a2 = (int32_t)a2;
1487 /* FALLTHRU */
1488 case INDEX_op_setcond_i64:
1489 tcg_out_cmp(s, ext, a1, a2, c2);
1490 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1491 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1492 TCG_REG_XZR, tcg_invert_cond(args[3]));
1493 break;
1494
1495 case INDEX_op_movcond_i32:
1496 a2 = (int32_t)a2;
1497 /* FALLTHRU */
1498 case INDEX_op_movcond_i64:
1499 tcg_out_cmp(s, ext, a1, a2, c2);
1500 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1501 break;
1502
1503 case INDEX_op_qemu_ld8u:
1504 tcg_out_qemu_ld(s, args, 0 | 0);
1505 break;
1506 case INDEX_op_qemu_ld8s:
1507 tcg_out_qemu_ld(s, args, 4 | 0);
1508 break;
1509 case INDEX_op_qemu_ld16u:
1510 tcg_out_qemu_ld(s, args, 0 | 1);
1511 break;
1512 case INDEX_op_qemu_ld16s:
1513 tcg_out_qemu_ld(s, args, 4 | 1);
1514 break;
1515 case INDEX_op_qemu_ld32u:
1516 tcg_out_qemu_ld(s, args, 0 | 2);
1517 break;
1518 case INDEX_op_qemu_ld32s:
1519 tcg_out_qemu_ld(s, args, 4 | 2);
1520 break;
1521 case INDEX_op_qemu_ld32:
1522 tcg_out_qemu_ld(s, args, 0 | 2);
1523 break;
1524 case INDEX_op_qemu_ld64:
1525 tcg_out_qemu_ld(s, args, 0 | 3);
1526 break;
1527 case INDEX_op_qemu_st8:
1528 tcg_out_qemu_st(s, args, 0);
1529 break;
1530 case INDEX_op_qemu_st16:
1531 tcg_out_qemu_st(s, args, 1);
1532 break;
1533 case INDEX_op_qemu_st32:
1534 tcg_out_qemu_st(s, args, 2);
1535 break;
1536 case INDEX_op_qemu_st64:
1537 tcg_out_qemu_st(s, args, 3);
1538 break;
1539
1540 case INDEX_op_bswap32_i64:
1541 /* Despite the _i64, this is a 32-bit bswap. */
1542 ext = 0;
1543 /* FALLTHRU */
1544 case INDEX_op_bswap64_i64:
1545 case INDEX_op_bswap32_i32:
1546 tcg_out_rev(s, ext, a0, a1);
1547 break;
1548 case INDEX_op_bswap16_i64:
1549 case INDEX_op_bswap16_i32:
1550 tcg_out_rev16(s, 0, a0, a1);
1551 break;
1552
1553 case INDEX_op_ext8s_i64:
1554 case INDEX_op_ext8s_i32:
1555 tcg_out_sxt(s, ext, 0, a0, a1);
1556 break;
1557 case INDEX_op_ext16s_i64:
1558 case INDEX_op_ext16s_i32:
1559 tcg_out_sxt(s, ext, 1, a0, a1);
1560 break;
1561 case INDEX_op_ext32s_i64:
1562 tcg_out_sxt(s, 1, 2, a0, a1);
1563 break;
1564 case INDEX_op_ext8u_i64:
1565 case INDEX_op_ext8u_i32:
1566 tcg_out_uxt(s, 0, a0, a1);
1567 break;
1568 case INDEX_op_ext16u_i64:
1569 case INDEX_op_ext16u_i32:
1570 tcg_out_uxt(s, 1, a0, a1);
1571 break;
1572 case INDEX_op_ext32u_i64:
1573 tcg_out_movr(s, 0, a0, a1);
1574 break;
1575
1576 case INDEX_op_deposit_i64:
1577 case INDEX_op_deposit_i32:
1578 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1579 break;
1580
1581 case INDEX_op_add2_i32:
1582 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1583 (int32_t)args[4], args[5], const_args[4],
1584 const_args[5], false);
1585 break;
1586 case INDEX_op_add2_i64:
1587 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1588 args[5], const_args[4], const_args[5], false);
1589 break;
1590 case INDEX_op_sub2_i32:
1591 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1592 (int32_t)args[4], args[5], const_args[4],
1593 const_args[5], true);
1594 break;
1595 case INDEX_op_sub2_i64:
1596 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1597 args[5], const_args[4], const_args[5], true);
1598 break;
1599
1600 case INDEX_op_muluh_i64:
1601 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1602 break;
1603 case INDEX_op_mulsh_i64:
1604 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1605 break;
1606
1607 case INDEX_op_mov_i64:
1608 case INDEX_op_mov_i32:
1609 case INDEX_op_movi_i64:
1610 case INDEX_op_movi_i32:
1611 /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */
1612 default:
1613 /* Opcode not implemented. */
1614 tcg_abort();
1615 }
1616
1617 #undef REG0
1618 }
1619
1620 static const TCGTargetOpDef aarch64_op_defs[] = {
1621 { INDEX_op_exit_tb, { } },
1622 { INDEX_op_goto_tb, { } },
1623 { INDEX_op_call, { "ri" } },
1624 { INDEX_op_br, { } },
1625
1626 { INDEX_op_mov_i32, { "r", "r" } },
1627 { INDEX_op_mov_i64, { "r", "r" } },
1628
1629 { INDEX_op_movi_i32, { "r" } },
1630 { INDEX_op_movi_i64, { "r" } },
1631
1632 { INDEX_op_ld8u_i32, { "r", "r" } },
1633 { INDEX_op_ld8s_i32, { "r", "r" } },
1634 { INDEX_op_ld16u_i32, { "r", "r" } },
1635 { INDEX_op_ld16s_i32, { "r", "r" } },
1636 { INDEX_op_ld_i32, { "r", "r" } },
1637 { INDEX_op_ld8u_i64, { "r", "r" } },
1638 { INDEX_op_ld8s_i64, { "r", "r" } },
1639 { INDEX_op_ld16u_i64, { "r", "r" } },
1640 { INDEX_op_ld16s_i64, { "r", "r" } },
1641 { INDEX_op_ld32u_i64, { "r", "r" } },
1642 { INDEX_op_ld32s_i64, { "r", "r" } },
1643 { INDEX_op_ld_i64, { "r", "r" } },
1644
1645 { INDEX_op_st8_i32, { "r", "r" } },
1646 { INDEX_op_st16_i32, { "r", "r" } },
1647 { INDEX_op_st_i32, { "r", "r" } },
1648 { INDEX_op_st8_i64, { "r", "r" } },
1649 { INDEX_op_st16_i64, { "r", "r" } },
1650 { INDEX_op_st32_i64, { "r", "r" } },
1651 { INDEX_op_st_i64, { "r", "r" } },
1652
1653 { INDEX_op_add_i32, { "r", "r", "rwA" } },
1654 { INDEX_op_add_i64, { "r", "r", "rA" } },
1655 { INDEX_op_sub_i32, { "r", "r", "rwA" } },
1656 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1657 { INDEX_op_mul_i32, { "r", "r", "r" } },
1658 { INDEX_op_mul_i64, { "r", "r", "r" } },
1659 { INDEX_op_div_i32, { "r", "r", "r" } },
1660 { INDEX_op_div_i64, { "r", "r", "r" } },
1661 { INDEX_op_divu_i32, { "r", "r", "r" } },
1662 { INDEX_op_divu_i64, { "r", "r", "r" } },
1663 { INDEX_op_rem_i32, { "r", "r", "r" } },
1664 { INDEX_op_rem_i64, { "r", "r", "r" } },
1665 { INDEX_op_remu_i32, { "r", "r", "r" } },
1666 { INDEX_op_remu_i64, { "r", "r", "r" } },
1667 { INDEX_op_and_i32, { "r", "r", "rwL" } },
1668 { INDEX_op_and_i64, { "r", "r", "rL" } },
1669 { INDEX_op_or_i32, { "r", "r", "rwL" } },
1670 { INDEX_op_or_i64, { "r", "r", "rL" } },
1671 { INDEX_op_xor_i32, { "r", "r", "rwL" } },
1672 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1673 { INDEX_op_andc_i32, { "r", "r", "rwL" } },
1674 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1675 { INDEX_op_orc_i32, { "r", "r", "rwL" } },
1676 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1677 { INDEX_op_eqv_i32, { "r", "r", "rwL" } },
1678 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1679
1680 { INDEX_op_neg_i32, { "r", "r" } },
1681 { INDEX_op_neg_i64, { "r", "r" } },
1682 { INDEX_op_not_i32, { "r", "r" } },
1683 { INDEX_op_not_i64, { "r", "r" } },
1684
1685 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1686 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1687 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1688 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1689 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1690 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1691 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1692 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1693 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1694 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1695
1696 { INDEX_op_brcond_i32, { "r", "rwA" } },
1697 { INDEX_op_brcond_i64, { "r", "rA" } },
1698 { INDEX_op_setcond_i32, { "r", "r", "rwA" } },
1699 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1700 { INDEX_op_movcond_i32, { "r", "r", "rwA", "rZ", "rZ" } },
1701 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1702
1703 { INDEX_op_qemu_ld8u, { "r", "l" } },
1704 { INDEX_op_qemu_ld8s, { "r", "l" } },
1705 { INDEX_op_qemu_ld16u, { "r", "l" } },
1706 { INDEX_op_qemu_ld16s, { "r", "l" } },
1707 { INDEX_op_qemu_ld32u, { "r", "l" } },
1708 { INDEX_op_qemu_ld32s, { "r", "l" } },
1709
1710 { INDEX_op_qemu_ld32, { "r", "l" } },
1711 { INDEX_op_qemu_ld64, { "r", "l" } },
1712
1713 { INDEX_op_qemu_st8, { "l", "l" } },
1714 { INDEX_op_qemu_st16, { "l", "l" } },
1715 { INDEX_op_qemu_st32, { "l", "l" } },
1716 { INDEX_op_qemu_st64, { "l", "l" } },
1717
1718 { INDEX_op_bswap16_i32, { "r", "r" } },
1719 { INDEX_op_bswap32_i32, { "r", "r" } },
1720 { INDEX_op_bswap16_i64, { "r", "r" } },
1721 { INDEX_op_bswap32_i64, { "r", "r" } },
1722 { INDEX_op_bswap64_i64, { "r", "r" } },
1723
1724 { INDEX_op_ext8s_i32, { "r", "r" } },
1725 { INDEX_op_ext16s_i32, { "r", "r" } },
1726 { INDEX_op_ext8u_i32, { "r", "r" } },
1727 { INDEX_op_ext16u_i32, { "r", "r" } },
1728
1729 { INDEX_op_ext8s_i64, { "r", "r" } },
1730 { INDEX_op_ext16s_i64, { "r", "r" } },
1731 { INDEX_op_ext32s_i64, { "r", "r" } },
1732 { INDEX_op_ext8u_i64, { "r", "r" } },
1733 { INDEX_op_ext16u_i64, { "r", "r" } },
1734 { INDEX_op_ext32u_i64, { "r", "r" } },
1735
1736 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1737 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1738
1739 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1740 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1741 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1742 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1743
1744 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1745 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1746
1747 { -1 },
1748 };
1749
1750 static void tcg_target_init(TCGContext *s)
1751 {
1752 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1753 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1754
1755 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1756 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1757 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1758 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1759 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1760 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1761 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1762 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1763 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1764 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1765 (1 << TCG_REG_X18));
1766
1767 tcg_regset_clear(s->reserved_regs);
1768 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1769 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1770 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1771 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1772
1773 tcg_add_target_add_op_defs(aarch64_op_defs);
1774 }
1775
1776 static void tcg_target_qemu_prologue(TCGContext *s)
1777 {
1778 /* NB: frame sizes are in 16 byte stack units! */
1779 int frame_size_callee_saved, frame_size_tcg_locals;
1780 TCGReg r;
1781
1782 /* save pairs (FP, LR) and (X19, X20) .. (X27, X28) */
1783 frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
1784
1785 /* frame size requirement for TCG local variables */
1786 frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
1787 + CPU_TEMP_BUF_NLONGS * sizeof(long)
1788 + (TCG_TARGET_STACK_ALIGN - 1);
1789 frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
1790 frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
1791
1792 /* push (FP, LR) and update sp */
1793 tcg_out_push_pair(s, TCG_REG_SP,
1794 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1795
1796 /* FP -> callee_saved */
1797 tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP);
1798
1799 /* store callee-preserved regs x19..x28 using FP -> callee_saved */
1800 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1801 int idx = (r - TCG_REG_X19) / 2 + 1;
1802 tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
1803 }
1804
1805 /* Make stack space for TCG locals. */
1806 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1807 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1808
1809 /* inform TCG about how to find TCG locals with register, offset, size */
1810 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1811 CPU_TEMP_BUF_NLONGS * sizeof(long));
1812
1813 #if defined(CONFIG_USE_GUEST_BASE)
1814 if (GUEST_BASE) {
1815 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1816 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1817 }
1818 #endif
1819
1820 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1821 tcg_out_gotor(s, tcg_target_call_iarg_regs[1]);
1822
1823 tb_ret_addr = s->code_ptr;
1824
1825 /* Remove TCG locals stack space. */
1826 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1827 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1828
1829 /* restore registers x19..x28.
1830 FP must be preserved, so it still points to callee_saved area */
1831 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1832 int idx = (r - TCG_REG_X19) / 2 + 1;
1833 tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
1834 }
1835
1836 /* pop (FP, LR), restore SP to previous frame, return */
1837 tcg_out_pop_pair(s, TCG_REG_SP,
1838 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1839 tcg_out_ret(s);
1840 }