tcg/aarch64: add ext argument to tcg_out_insn_3310
[qemu.git] / tcg / aarch64 / tcg-target.c
1 /*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
15
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21 #ifndef NDEBUG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
27 };
28 #endif /* NDEBUG */
29
30 static const int tcg_target_reg_alloc_order[] = {
31 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
32 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
33 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
34
35 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
36 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
37 TCG_REG_X16, TCG_REG_X17,
38
39 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
40 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
41
42 /* X18 reserved by system */
43 /* X19 reserved for AREG0 */
44 /* X29 reserved as fp */
45 /* X30 reserved as temporary */
46 };
47
48 static const int tcg_target_call_iarg_regs[8] = {
49 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
50 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
51 };
52 static const int tcg_target_call_oarg_regs[1] = {
53 TCG_REG_X0
54 };
55
56 #define TCG_REG_TMP TCG_REG_X30
57
58 #ifndef CONFIG_SOFTMMU
59 # ifdef CONFIG_USE_GUEST_BASE
60 # define TCG_REG_GUEST_BASE TCG_REG_X28
61 # else
62 # define TCG_REG_GUEST_BASE TCG_REG_XZR
63 # endif
64 #endif
65
66 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
67 {
68 ptrdiff_t offset = target - code_ptr;
69 assert(offset == sextract64(offset, 0, 26));
70 /* read instruction, mask away previous PC_REL26 parameter contents,
71 set the proper offset, then write back the instruction. */
72 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
73 }
74
75 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
76 {
77 ptrdiff_t offset = target - code_ptr;
78 assert(offset == sextract64(offset, 0, 19));
79 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
80 }
81
82 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
83 intptr_t value, intptr_t addend)
84 {
85 assert(addend == 0);
86 switch (type) {
87 case R_AARCH64_JUMP26:
88 case R_AARCH64_CALL26:
89 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
90 break;
91 case R_AARCH64_CONDBR19:
92 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
93 break;
94 default:
95 tcg_abort();
96 }
97 }
98
99 #define TCG_CT_CONST_AIMM 0x100
100 #define TCG_CT_CONST_LIMM 0x200
101 #define TCG_CT_CONST_ZERO 0x400
102 #define TCG_CT_CONST_MONE 0x800
103
104 /* parse target specific constraints */
105 static int target_parse_constraint(TCGArgConstraint *ct,
106 const char **pct_str)
107 {
108 const char *ct_str = *pct_str;
109
110 switch (ct_str[0]) {
111 case 'r':
112 ct->ct |= TCG_CT_REG;
113 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
114 break;
115 case 'l': /* qemu_ld / qemu_st address, data_reg */
116 ct->ct |= TCG_CT_REG;
117 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
118 #ifdef CONFIG_SOFTMMU
119 /* x0 and x1 will be overwritten when reading the tlb entry,
120 and x2, and x3 for helper args, better to avoid using them. */
121 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
122 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
123 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
124 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
125 #endif
126 break;
127 case 'A': /* Valid for arithmetic immediate (positive or negative). */
128 ct->ct |= TCG_CT_CONST_AIMM;
129 break;
130 case 'L': /* Valid for logical immediate. */
131 ct->ct |= TCG_CT_CONST_LIMM;
132 break;
133 case 'M': /* minus one */
134 ct->ct |= TCG_CT_CONST_MONE;
135 break;
136 case 'Z': /* zero */
137 ct->ct |= TCG_CT_CONST_ZERO;
138 break;
139 default:
140 return -1;
141 }
142
143 ct_str++;
144 *pct_str = ct_str;
145 return 0;
146 }
147
148 static inline bool is_aimm(uint64_t val)
149 {
150 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
151 }
152
153 static inline bool is_limm(uint64_t val)
154 {
155 /* Taking a simplified view of the logical immediates for now, ignoring
156 the replication that can happen across the field. Match bit patterns
157 of the forms
158 0....01....1
159 0..01..10..0
160 and their inverses. */
161
162 /* Make things easier below, by testing the form with msb clear. */
163 if ((int64_t)val < 0) {
164 val = ~val;
165 }
166 if (val == 0) {
167 return false;
168 }
169 val += val & -val;
170 return (val & (val - 1)) == 0;
171 }
172
173 static int tcg_target_const_match(tcg_target_long val, TCGType type,
174 const TCGArgConstraint *arg_ct)
175 {
176 int ct = arg_ct->ct;
177
178 if (ct & TCG_CT_CONST) {
179 return 1;
180 }
181 if (type == TCG_TYPE_I32) {
182 val = (int32_t)val;
183 }
184 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
185 return 1;
186 }
187 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
188 return 1;
189 }
190 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
191 return 1;
192 }
193 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
194 return 1;
195 }
196
197 return 0;
198 }
199
200 enum aarch64_cond_code {
201 COND_EQ = 0x0,
202 COND_NE = 0x1,
203 COND_CS = 0x2, /* Unsigned greater or equal */
204 COND_HS = COND_CS, /* ALIAS greater or equal */
205 COND_CC = 0x3, /* Unsigned less than */
206 COND_LO = COND_CC, /* ALIAS Lower */
207 COND_MI = 0x4, /* Negative */
208 COND_PL = 0x5, /* Zero or greater */
209 COND_VS = 0x6, /* Overflow */
210 COND_VC = 0x7, /* No overflow */
211 COND_HI = 0x8, /* Unsigned greater than */
212 COND_LS = 0x9, /* Unsigned less or equal */
213 COND_GE = 0xa,
214 COND_LT = 0xb,
215 COND_GT = 0xc,
216 COND_LE = 0xd,
217 COND_AL = 0xe,
218 COND_NV = 0xf, /* behaves like COND_AL here */
219 };
220
221 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
222 [TCG_COND_EQ] = COND_EQ,
223 [TCG_COND_NE] = COND_NE,
224 [TCG_COND_LT] = COND_LT,
225 [TCG_COND_GE] = COND_GE,
226 [TCG_COND_LE] = COND_LE,
227 [TCG_COND_GT] = COND_GT,
228 /* unsigned */
229 [TCG_COND_LTU] = COND_LO,
230 [TCG_COND_GTU] = COND_HI,
231 [TCG_COND_GEU] = COND_HS,
232 [TCG_COND_LEU] = COND_LS,
233 };
234
235 typedef enum {
236 LDST_ST = 0, /* store */
237 LDST_LD = 1, /* load */
238 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
239 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
240 } AArch64LdstType;
241
242 /* We encode the format of the insn into the beginning of the name, so that
243 we can have the preprocessor help "typecheck" the insn vs the output
244 function. Arm didn't provide us with nice names for the formats, so we
245 use the section number of the architecture reference manual in which the
246 instruction group is described. */
247 typedef enum {
248 /* Compare and branch (immediate). */
249 I3201_CBZ = 0x34000000,
250 I3201_CBNZ = 0x35000000,
251
252 /* Conditional branch (immediate). */
253 I3202_B_C = 0x54000000,
254
255 /* Unconditional branch (immediate). */
256 I3206_B = 0x14000000,
257 I3206_BL = 0x94000000,
258
259 /* Unconditional branch (register). */
260 I3207_BR = 0xd61f0000,
261 I3207_BLR = 0xd63f0000,
262 I3207_RET = 0xd65f0000,
263
264 /* Load/store register. Described here as 3.3.12, but the helper
265 that emits them can transform to 3.3.10 or 3.3.13. */
266 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
267 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
268 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
269 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
270
271 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
272 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
273 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
274 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
275
276 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
277 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
278
279 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
280 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
281 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
282
283 I3312_TO_I3310 = 0x00200800,
284 I3312_TO_I3313 = 0x01000000,
285
286 /* Load/store register pair instructions. */
287 I3314_LDP = 0x28400000,
288 I3314_STP = 0x28000000,
289
290 /* Add/subtract immediate instructions. */
291 I3401_ADDI = 0x11000000,
292 I3401_ADDSI = 0x31000000,
293 I3401_SUBI = 0x51000000,
294 I3401_SUBSI = 0x71000000,
295
296 /* Bitfield instructions. */
297 I3402_BFM = 0x33000000,
298 I3402_SBFM = 0x13000000,
299 I3402_UBFM = 0x53000000,
300
301 /* Extract instruction. */
302 I3403_EXTR = 0x13800000,
303
304 /* Logical immediate instructions. */
305 I3404_ANDI = 0x12000000,
306 I3404_ORRI = 0x32000000,
307 I3404_EORI = 0x52000000,
308
309 /* Move wide immediate instructions. */
310 I3405_MOVN = 0x12800000,
311 I3405_MOVZ = 0x52800000,
312 I3405_MOVK = 0x72800000,
313
314 /* PC relative addressing instructions. */
315 I3406_ADR = 0x10000000,
316 I3406_ADRP = 0x90000000,
317
318 /* Add/subtract shifted register instructions (without a shift). */
319 I3502_ADD = 0x0b000000,
320 I3502_ADDS = 0x2b000000,
321 I3502_SUB = 0x4b000000,
322 I3502_SUBS = 0x6b000000,
323
324 /* Add/subtract shifted register instructions (with a shift). */
325 I3502S_ADD_LSL = I3502_ADD,
326
327 /* Add/subtract with carry instructions. */
328 I3503_ADC = 0x1a000000,
329 I3503_SBC = 0x5a000000,
330
331 /* Conditional select instructions. */
332 I3506_CSEL = 0x1a800000,
333 I3506_CSINC = 0x1a800400,
334
335 /* Data-processing (1 source) instructions. */
336 I3507_REV16 = 0x5ac00400,
337 I3507_REV32 = 0x5ac00800,
338 I3507_REV64 = 0x5ac00c00,
339
340 /* Data-processing (2 source) instructions. */
341 I3508_LSLV = 0x1ac02000,
342 I3508_LSRV = 0x1ac02400,
343 I3508_ASRV = 0x1ac02800,
344 I3508_RORV = 0x1ac02c00,
345 I3508_SMULH = 0x9b407c00,
346 I3508_UMULH = 0x9bc07c00,
347 I3508_UDIV = 0x1ac00800,
348 I3508_SDIV = 0x1ac00c00,
349
350 /* Data-processing (3 source) instructions. */
351 I3509_MADD = 0x1b000000,
352 I3509_MSUB = 0x1b008000,
353
354 /* Logical shifted register instructions (without a shift). */
355 I3510_AND = 0x0a000000,
356 I3510_BIC = 0x0a200000,
357 I3510_ORR = 0x2a000000,
358 I3510_ORN = 0x2a200000,
359 I3510_EOR = 0x4a000000,
360 I3510_EON = 0x4a200000,
361 I3510_ANDS = 0x6a000000,
362 } AArch64Insn;
363
364 static inline uint32_t tcg_in32(TCGContext *s)
365 {
366 uint32_t v = *(uint32_t *)s->code_ptr;
367 return v;
368 }
369
370 /* Emit an opcode with "type-checking" of the format. */
371 #define tcg_out_insn(S, FMT, OP, ...) \
372 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
373
374 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
375 TCGReg rt, int imm19)
376 {
377 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
378 }
379
380 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
381 TCGCond c, int imm19)
382 {
383 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
384 }
385
386 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
387 {
388 tcg_out32(s, insn | (imm26 & 0x03ffffff));
389 }
390
391 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
392 {
393 tcg_out32(s, insn | rn << 5);
394 }
395
396 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
397 TCGReg r1, TCGReg r2, TCGReg rn,
398 tcg_target_long ofs, bool pre, bool w)
399 {
400 insn |= 1u << 31; /* ext */
401 insn |= pre << 24;
402 insn |= w << 23;
403
404 assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
405 insn |= (ofs & (0x7f << 3)) << (15 - 3);
406
407 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
408 }
409
410 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
411 TCGReg rd, TCGReg rn, uint64_t aimm)
412 {
413 if (aimm > 0xfff) {
414 assert((aimm & 0xfff) == 0);
415 aimm >>= 12;
416 assert(aimm <= 0xfff);
417 aimm |= 1 << 12; /* apply LSL 12 */
418 }
419 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
420 }
421
422 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
423 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
424 that feed the DecodeBitMasks pseudo function. */
425 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
426 TCGReg rd, TCGReg rn, int n, int immr, int imms)
427 {
428 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
429 | rn << 5 | rd);
430 }
431
432 #define tcg_out_insn_3404 tcg_out_insn_3402
433
434 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
435 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
436 {
437 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
438 | rn << 5 | rd);
439 }
440
441 /* This function is used for the Move (wide immediate) instruction group.
442 Note that SHIFT is a full shift count, not the 2 bit HW field. */
443 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
444 TCGReg rd, uint16_t half, unsigned shift)
445 {
446 assert((shift & ~0x30) == 0);
447 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
448 }
449
450 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
451 TCGReg rd, int64_t disp)
452 {
453 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
454 }
455
456 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
457 the rare occasion when we actually want to supply a shift amount. */
458 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
459 TCGType ext, TCGReg rd, TCGReg rn,
460 TCGReg rm, int imm6)
461 {
462 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
463 }
464
465 /* This function is for 3.5.2 (Add/subtract shifted register),
466 and 3.5.10 (Logical shifted register), for the vast majorty of cases
467 when we don't want to apply a shift. Thus it can also be used for
468 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
469 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
470 TCGReg rd, TCGReg rn, TCGReg rm)
471 {
472 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
473 }
474
475 #define tcg_out_insn_3503 tcg_out_insn_3502
476 #define tcg_out_insn_3508 tcg_out_insn_3502
477 #define tcg_out_insn_3510 tcg_out_insn_3502
478
479 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
480 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
481 {
482 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
483 | tcg_cond_to_aarch64[c] << 12);
484 }
485
486 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
487 TCGReg rd, TCGReg rn)
488 {
489 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
490 }
491
492 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
493 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
494 {
495 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
496 }
497
498 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
499 TCGReg rd, TCGReg base, TCGType ext,
500 TCGReg regoff)
501 {
502 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
503 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
504 0x4000 | ext << 13 | base << 5 | rd);
505 }
506
507 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
508 TCGReg rd, TCGReg rn, intptr_t offset)
509 {
510 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
511 }
512
513 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
514 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
515 {
516 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
517 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
518 }
519
520 /* Register to register move using ORR (shifted register with no shift). */
521 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
522 {
523 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
524 }
525
526 /* Register to register move using ADDI (move to/from SP). */
527 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
528 {
529 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
530 }
531
532 /* This function is used for the Logical (immediate) instruction group.
533 The value of LIMM must satisfy IS_LIMM. See the comment above about
534 only supporting simplified logical immediates. */
535 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
536 TCGReg rd, TCGReg rn, uint64_t limm)
537 {
538 unsigned h, l, r, c;
539
540 assert(is_limm(limm));
541
542 h = clz64(limm);
543 l = ctz64(limm);
544 if (l == 0) {
545 r = 0; /* form 0....01....1 */
546 c = ctz64(~limm) - 1;
547 if (h == 0) {
548 r = clz64(~limm); /* form 1..10..01..1 */
549 c += r;
550 }
551 } else {
552 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
553 c = r - h - 1;
554 }
555 if (ext == TCG_TYPE_I32) {
556 r &= 31;
557 c &= 31;
558 }
559
560 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
561 }
562
563 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
564 tcg_target_long value)
565 {
566 AArch64Insn insn;
567 int i, wantinv, shift;
568 tcg_target_long svalue = value;
569 tcg_target_long ivalue = ~value;
570 tcg_target_long imask;
571
572 /* For 32-bit values, discard potential garbage in value. For 64-bit
573 values within [2**31, 2**32-1], we can create smaller sequences by
574 interpreting this as a negative 32-bit number, while ensuring that
575 the high 32 bits are cleared by setting SF=0. */
576 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
577 svalue = (int32_t)value;
578 value = (uint32_t)value;
579 ivalue = (uint32_t)ivalue;
580 type = TCG_TYPE_I32;
581 }
582
583 /* Speed things up by handling the common case of small positive
584 and negative values specially. */
585 if ((value & ~0xffffull) == 0) {
586 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
587 return;
588 } else if ((ivalue & ~0xffffull) == 0) {
589 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
590 return;
591 }
592
593 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
594 use the sign-extended value. That lets us match rotated values such
595 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
596 if (is_limm(svalue)) {
597 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
598 return;
599 }
600
601 /* Look for host pointer values within 4G of the PC. This happens
602 often when loading pointers to QEMU's own data structures. */
603 if (type == TCG_TYPE_I64) {
604 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
605 if (disp == sextract64(disp, 0, 21)) {
606 tcg_out_insn(s, 3406, ADRP, rd, disp);
607 if (value & 0xfff) {
608 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
609 }
610 return;
611 }
612 }
613
614 /* Would it take fewer insns to begin with MOVN? For the value and its
615 inverse, count the number of 16-bit lanes that are 0. */
616 for (i = wantinv = imask = 0; i < 64; i += 16) {
617 tcg_target_long mask = 0xffffull << i;
618 if ((value & mask) == 0) {
619 wantinv -= 1;
620 }
621 if ((ivalue & mask) == 0) {
622 wantinv += 1;
623 imask |= mask;
624 }
625 }
626
627 /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
628 insn = I3405_MOVZ;
629 if (wantinv > 0) {
630 value = ivalue;
631 insn = I3405_MOVN;
632 }
633
634 /* Find the lowest lane that is not 0x0000. */
635 shift = ctz64(value) & (63 & -16);
636 tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
637
638 if (wantinv > 0) {
639 /* Re-invert the value, so MOVK sees non-inverted bits. */
640 value = ~value;
641 /* Clear out all the 0xffff lanes. */
642 value ^= imask;
643 }
644 /* Clear out the lane that we just set. */
645 value &= ~(0xffffUL << shift);
646
647 /* Iterate until all lanes have been set, and thus cleared from VALUE. */
648 while (value) {
649 shift = ctz64(value) & (63 & -16);
650 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
651 value &= ~(0xffffUL << shift);
652 }
653 }
654
655 /* Define something more legible for general use. */
656 #define tcg_out_ldst_r tcg_out_insn_3310
657
658 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
659 TCGReg rd, TCGReg rn, intptr_t offset)
660 {
661 TCGMemOp size = (uint32_t)insn >> 30;
662
663 /* If the offset is naturally aligned and in range, then we can
664 use the scaled uimm12 encoding */
665 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
666 uintptr_t scaled_uimm = offset >> size;
667 if (scaled_uimm <= 0xfff) {
668 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
669 return;
670 }
671 }
672
673 /* Small signed offsets can use the unscaled encoding. */
674 if (offset >= -256 && offset < 256) {
675 tcg_out_insn_3312(s, insn, rd, rn, offset);
676 return;
677 }
678
679 /* Worst-case scenario, move offset to temp register, use reg offset. */
680 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
681 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
682 }
683
684 static inline void tcg_out_mov(TCGContext *s,
685 TCGType type, TCGReg ret, TCGReg arg)
686 {
687 if (ret != arg) {
688 tcg_out_movr(s, type, ret, arg);
689 }
690 }
691
692 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
693 TCGReg arg1, intptr_t arg2)
694 {
695 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
696 arg, arg1, arg2);
697 }
698
699 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
700 TCGReg arg1, intptr_t arg2)
701 {
702 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
703 arg, arg1, arg2);
704 }
705
706 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
707 TCGReg rn, unsigned int a, unsigned int b)
708 {
709 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
710 }
711
712 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
713 TCGReg rn, unsigned int a, unsigned int b)
714 {
715 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
716 }
717
718 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
719 TCGReg rn, unsigned int a, unsigned int b)
720 {
721 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
722 }
723
724 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
725 TCGReg rn, TCGReg rm, unsigned int a)
726 {
727 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
728 }
729
730 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
731 TCGReg rd, TCGReg rn, unsigned int m)
732 {
733 int bits = ext ? 64 : 32;
734 int max = bits - 1;
735 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
736 }
737
738 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
739 TCGReg rd, TCGReg rn, unsigned int m)
740 {
741 int max = ext ? 63 : 31;
742 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
743 }
744
745 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
746 TCGReg rd, TCGReg rn, unsigned int m)
747 {
748 int max = ext ? 63 : 31;
749 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
750 }
751
752 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
753 TCGReg rd, TCGReg rn, unsigned int m)
754 {
755 int max = ext ? 63 : 31;
756 tcg_out_extr(s, ext, rd, rn, rn, m & max);
757 }
758
759 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
760 TCGReg rd, TCGReg rn, unsigned int m)
761 {
762 int bits = ext ? 64 : 32;
763 int max = bits - 1;
764 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
765 }
766
767 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
768 TCGReg rn, unsigned lsb, unsigned width)
769 {
770 unsigned size = ext ? 64 : 32;
771 unsigned a = (size - lsb) & (size - 1);
772 unsigned b = width - 1;
773 tcg_out_bfm(s, ext, rd, rn, a, b);
774 }
775
776 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
777 tcg_target_long b, bool const_b)
778 {
779 if (const_b) {
780 /* Using CMP or CMN aliases. */
781 if (b >= 0) {
782 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
783 } else {
784 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
785 }
786 } else {
787 /* Using CMP alias SUBS wzr, Wn, Wm */
788 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
789 }
790 }
791
792 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
793 {
794 ptrdiff_t offset = target - s->code_ptr;
795 assert(offset == sextract64(offset, 0, 26));
796 tcg_out_insn(s, 3206, B, offset);
797 }
798
799 static inline void tcg_out_goto_noaddr(TCGContext *s)
800 {
801 /* We pay attention here to not modify the branch target by reading from
802 the buffer. This ensure that caches and memory are kept coherent during
803 retranslation. Mask away possible garbage in the high bits for the
804 first translation, while keeping the offset bits for retranslation. */
805 uint32_t old = tcg_in32(s);
806 tcg_out_insn(s, 3206, B, old);
807 }
808
809 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
810 {
811 /* See comments in tcg_out_goto_noaddr. */
812 uint32_t old = tcg_in32(s) >> 5;
813 tcg_out_insn(s, 3202, B_C, c, old);
814 }
815
816 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
817 {
818 tcg_out_insn(s, 3207, BLR, reg);
819 }
820
821 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
822 {
823 ptrdiff_t offset = target - s->code_ptr;
824 if (offset == sextract64(offset, 0, 26)) {
825 tcg_out_insn(s, 3206, BL, offset);
826 } else {
827 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
828 tcg_out_callr(s, TCG_REG_TMP);
829 }
830 }
831
832 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
833 {
834 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
835 tcg_insn_unit *target = (tcg_insn_unit *)addr;
836
837 reloc_pc26(code_ptr, target);
838 flush_icache_range(jmp_addr, jmp_addr + 4);
839 }
840
841 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
842 {
843 if (!l->has_value) {
844 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
845 tcg_out_goto_noaddr(s);
846 } else {
847 tcg_out_goto(s, l->u.value_ptr);
848 }
849 }
850
851 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
852 TCGArg b, bool b_const, TCGLabel *l)
853 {
854 intptr_t offset;
855 bool need_cmp;
856
857 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
858 need_cmp = false;
859 } else {
860 need_cmp = true;
861 tcg_out_cmp(s, ext, a, b, b_const);
862 }
863
864 if (!l->has_value) {
865 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
866 offset = tcg_in32(s) >> 5;
867 } else {
868 offset = l->u.value_ptr - s->code_ptr;
869 assert(offset == sextract64(offset, 0, 19));
870 }
871
872 if (need_cmp) {
873 tcg_out_insn(s, 3202, B_C, c, offset);
874 } else if (c == TCG_COND_EQ) {
875 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
876 } else {
877 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
878 }
879 }
880
881 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
882 {
883 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
884 }
885
886 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
887 {
888 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
889 }
890
891 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
892 {
893 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
894 }
895
896 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
897 TCGReg rd, TCGReg rn)
898 {
899 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
900 int bits = (8 << s_bits) - 1;
901 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
902 }
903
904 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
905 TCGReg rd, TCGReg rn)
906 {
907 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
908 int bits = (8 << s_bits) - 1;
909 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
910 }
911
912 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
913 TCGReg rn, int64_t aimm)
914 {
915 if (aimm >= 0) {
916 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
917 } else {
918 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
919 }
920 }
921
922 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
923 TCGReg rh, TCGReg al, TCGReg ah,
924 tcg_target_long bl, tcg_target_long bh,
925 bool const_bl, bool const_bh, bool sub)
926 {
927 TCGReg orig_rl = rl;
928 AArch64Insn insn;
929
930 if (rl == ah || (!const_bh && rl == bh)) {
931 rl = TCG_REG_TMP;
932 }
933
934 if (const_bl) {
935 insn = I3401_ADDSI;
936 if ((bl < 0) ^ sub) {
937 insn = I3401_SUBSI;
938 bl = -bl;
939 }
940 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
941 } else {
942 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
943 }
944
945 insn = I3503_ADC;
946 if (const_bh) {
947 /* Note that the only two constants we support are 0 and -1, and
948 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
949 if ((bh != 0) ^ sub) {
950 insn = I3503_SBC;
951 }
952 bh = TCG_REG_XZR;
953 } else if (sub) {
954 insn = I3503_SBC;
955 }
956 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
957
958 tcg_out_mov(s, ext, orig_rl, rl);
959 }
960
961 #ifdef CONFIG_SOFTMMU
962 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
963 * TCGMemOpIdx oi, uintptr_t ra)
964 */
965 static void * const qemu_ld_helpers[16] = {
966 [MO_UB] = helper_ret_ldub_mmu,
967 [MO_LEUW] = helper_le_lduw_mmu,
968 [MO_LEUL] = helper_le_ldul_mmu,
969 [MO_LEQ] = helper_le_ldq_mmu,
970 [MO_BEUW] = helper_be_lduw_mmu,
971 [MO_BEUL] = helper_be_ldul_mmu,
972 [MO_BEQ] = helper_be_ldq_mmu,
973 };
974
975 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
976 * uintxx_t val, TCGMemOpIdx oi,
977 * uintptr_t ra)
978 */
979 static void * const qemu_st_helpers[16] = {
980 [MO_UB] = helper_ret_stb_mmu,
981 [MO_LEUW] = helper_le_stw_mmu,
982 [MO_LEUL] = helper_le_stl_mmu,
983 [MO_LEQ] = helper_le_stq_mmu,
984 [MO_BEUW] = helper_be_stw_mmu,
985 [MO_BEUL] = helper_be_stl_mmu,
986 [MO_BEQ] = helper_be_stq_mmu,
987 };
988
989 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
990 {
991 ptrdiff_t offset = tcg_pcrel_diff(s, target);
992 assert(offset == sextract64(offset, 0, 21));
993 tcg_out_insn(s, 3406, ADR, rd, offset);
994 }
995
996 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
997 {
998 TCGMemOpIdx oi = lb->oi;
999 TCGMemOp opc = get_memop(oi);
1000 TCGMemOp size = opc & MO_SIZE;
1001
1002 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1003
1004 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1005 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1006 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1007 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1008 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1009 if (opc & MO_SIGN) {
1010 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1011 } else {
1012 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1013 }
1014
1015 tcg_out_goto(s, lb->raddr);
1016 }
1017
1018 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1019 {
1020 TCGMemOpIdx oi = lb->oi;
1021 TCGMemOp opc = get_memop(oi);
1022 TCGMemOp size = opc & MO_SIZE;
1023
1024 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1025
1026 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1027 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1028 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1029 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1030 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1031 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1032 tcg_out_goto(s, lb->raddr);
1033 }
1034
1035 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1036 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1037 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1038 {
1039 TCGLabelQemuLdst *label = new_ldst_label(s);
1040
1041 label->is_ld = is_ld;
1042 label->oi = oi;
1043 label->type = ext;
1044 label->datalo_reg = data_reg;
1045 label->addrlo_reg = addr_reg;
1046 label->raddr = raddr;
1047 label->label_ptr[0] = label_ptr;
1048 }
1049
1050 /* Load and compare a TLB entry, emitting the conditional jump to the
1051 slow path for the failure case, which will be patched later when finalizing
1052 the slow path. Generated code returns the host addend in X1,
1053 clobbers X0,X2,X3,TMP. */
1054 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits,
1055 tcg_insn_unit **label_ptr, int mem_index,
1056 bool is_read)
1057 {
1058 TCGReg base = TCG_AREG0;
1059 int tlb_offset = is_read ?
1060 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1061 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1062
1063 /* Extract the TLB index from the address into X0.
1064 X0<CPU_TLB_BITS:0> =
1065 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1066 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1067 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1068
1069 /* Store the page mask part of the address and the low s_bits into X3.
1070 Later this allows checking for equality and alignment at the same time.
1071 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
1072 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
1073 addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1074
1075 /* Add any "high bits" from the tlb offset to the env address into X2,
1076 to take advantage of the LSL12 form of the ADDI instruction.
1077 X2 = env + (tlb_offset & 0xfff000) */
1078 if (tlb_offset & 0xfff000) {
1079 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1080 tlb_offset & 0xfff000);
1081 base = TCG_REG_X2;
1082 }
1083
1084 /* Merge the tlb index contribution into X2.
1085 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1086 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1087 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1088
1089 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1090 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1091 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1092 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1093
1094 /* Load the tlb addend. Do that early to avoid stalling.
1095 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1096 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1097 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1098 (is_read ? offsetof(CPUTLBEntry, addr_read)
1099 : offsetof(CPUTLBEntry, addr_write)));
1100
1101 /* Perform the address comparison. */
1102 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1103
1104 /* If not equal, we jump to the slow path. */
1105 *label_ptr = s->code_ptr;
1106 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1107 }
1108
1109 #endif /* CONFIG_SOFTMMU */
1110
1111 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1112 TCGReg data_r, TCGReg addr_r, TCGReg off_r)
1113 {
1114 const TCGMemOp bswap = memop & MO_BSWAP;
1115 const TCGType otype = TCG_TYPE_I64;
1116
1117 switch (memop & MO_SSIZE) {
1118 case MO_UB:
1119 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1120 break;
1121 case MO_SB:
1122 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1123 data_r, addr_r, otype, off_r);
1124 break;
1125 case MO_UW:
1126 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1127 if (bswap) {
1128 tcg_out_rev16(s, data_r, data_r);
1129 }
1130 break;
1131 case MO_SW:
1132 if (bswap) {
1133 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1134 tcg_out_rev16(s, data_r, data_r);
1135 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1136 } else {
1137 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1138 data_r, addr_r, otype, off_r);
1139 }
1140 break;
1141 case MO_UL:
1142 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1143 if (bswap) {
1144 tcg_out_rev32(s, data_r, data_r);
1145 }
1146 break;
1147 case MO_SL:
1148 if (bswap) {
1149 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1150 tcg_out_rev32(s, data_r, data_r);
1151 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1152 } else {
1153 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1154 }
1155 break;
1156 case MO_Q:
1157 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1158 if (bswap) {
1159 tcg_out_rev64(s, data_r, data_r);
1160 }
1161 break;
1162 default:
1163 tcg_abort();
1164 }
1165 }
1166
1167 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1168 TCGReg data_r, TCGReg addr_r, TCGReg off_r)
1169 {
1170 const TCGMemOp bswap = memop & MO_BSWAP;
1171 const TCGType otype = TCG_TYPE_I64;
1172
1173 switch (memop & MO_SIZE) {
1174 case MO_8:
1175 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1176 break;
1177 case MO_16:
1178 if (bswap && data_r != TCG_REG_XZR) {
1179 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1180 data_r = TCG_REG_TMP;
1181 }
1182 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1183 break;
1184 case MO_32:
1185 if (bswap && data_r != TCG_REG_XZR) {
1186 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1187 data_r = TCG_REG_TMP;
1188 }
1189 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1190 break;
1191 case MO_64:
1192 if (bswap && data_r != TCG_REG_XZR) {
1193 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1194 data_r = TCG_REG_TMP;
1195 }
1196 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1197 break;
1198 default:
1199 tcg_abort();
1200 }
1201 }
1202
1203 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1204 TCGMemOpIdx oi, TCGType ext)
1205 {
1206 TCGMemOp memop = get_memop(oi);
1207 #ifdef CONFIG_SOFTMMU
1208 unsigned mem_index = get_mmuidx(oi);
1209 TCGMemOp s_bits = memop & MO_SIZE;
1210 tcg_insn_unit *label_ptr;
1211
1212 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1213 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, addr_reg, TCG_REG_X1);
1214 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1215 s->code_ptr, label_ptr);
1216 #else /* !CONFIG_SOFTMMU */
1217 tcg_out_qemu_ld_direct(s, memop, ext, data_reg, addr_reg,
1218 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1219 #endif /* CONFIG_SOFTMMU */
1220 }
1221
1222 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1223 TCGMemOpIdx oi)
1224 {
1225 TCGMemOp memop = get_memop(oi);
1226 #ifdef CONFIG_SOFTMMU
1227 unsigned mem_index = get_mmuidx(oi);
1228 TCGMemOp s_bits = memop & MO_SIZE;
1229 tcg_insn_unit *label_ptr;
1230
1231 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1232 tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
1233 add_qemu_ldst_label(s, false, oi, s_bits == MO_64, data_reg, addr_reg,
1234 s->code_ptr, label_ptr);
1235 #else /* !CONFIG_SOFTMMU */
1236 tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg,
1237 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1238 #endif /* CONFIG_SOFTMMU */
1239 }
1240
1241 static tcg_insn_unit *tb_ret_addr;
1242
1243 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1244 const TCGArg args[TCG_MAX_OP_ARGS],
1245 const int const_args[TCG_MAX_OP_ARGS])
1246 {
1247 /* 99% of the time, we can signal the use of extension registers
1248 by looking to see if the opcode handles 64-bit data. */
1249 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1250
1251 /* Hoist the loads of the most common arguments. */
1252 TCGArg a0 = args[0];
1253 TCGArg a1 = args[1];
1254 TCGArg a2 = args[2];
1255 int c2 = const_args[2];
1256
1257 /* Some operands are defined with "rZ" constraint, a register or
1258 the zero register. These need not actually test args[I] == 0. */
1259 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1260
1261 switch (opc) {
1262 case INDEX_op_exit_tb:
1263 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1264 tcg_out_goto(s, tb_ret_addr);
1265 break;
1266
1267 case INDEX_op_goto_tb:
1268 #ifndef USE_DIRECT_JUMP
1269 #error "USE_DIRECT_JUMP required for aarch64"
1270 #endif
1271 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1272 s->tb_jmp_offset[a0] = tcg_current_code_size(s);
1273 /* actual branch destination will be patched by
1274 aarch64_tb_set_jmp_target later, beware retranslation. */
1275 tcg_out_goto_noaddr(s);
1276 s->tb_next_offset[a0] = tcg_current_code_size(s);
1277 break;
1278
1279 case INDEX_op_br:
1280 tcg_out_goto_label(s, arg_label(a0));
1281 break;
1282
1283 case INDEX_op_ld8u_i32:
1284 case INDEX_op_ld8u_i64:
1285 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1286 break;
1287 case INDEX_op_ld8s_i32:
1288 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1289 break;
1290 case INDEX_op_ld8s_i64:
1291 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1292 break;
1293 case INDEX_op_ld16u_i32:
1294 case INDEX_op_ld16u_i64:
1295 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1296 break;
1297 case INDEX_op_ld16s_i32:
1298 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1299 break;
1300 case INDEX_op_ld16s_i64:
1301 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1302 break;
1303 case INDEX_op_ld_i32:
1304 case INDEX_op_ld32u_i64:
1305 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1306 break;
1307 case INDEX_op_ld32s_i64:
1308 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1309 break;
1310 case INDEX_op_ld_i64:
1311 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1312 break;
1313
1314 case INDEX_op_st8_i32:
1315 case INDEX_op_st8_i64:
1316 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1317 break;
1318 case INDEX_op_st16_i32:
1319 case INDEX_op_st16_i64:
1320 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1321 break;
1322 case INDEX_op_st_i32:
1323 case INDEX_op_st32_i64:
1324 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1325 break;
1326 case INDEX_op_st_i64:
1327 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1328 break;
1329
1330 case INDEX_op_add_i32:
1331 a2 = (int32_t)a2;
1332 /* FALLTHRU */
1333 case INDEX_op_add_i64:
1334 if (c2) {
1335 tcg_out_addsubi(s, ext, a0, a1, a2);
1336 } else {
1337 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1338 }
1339 break;
1340
1341 case INDEX_op_sub_i32:
1342 a2 = (int32_t)a2;
1343 /* FALLTHRU */
1344 case INDEX_op_sub_i64:
1345 if (c2) {
1346 tcg_out_addsubi(s, ext, a0, a1, -a2);
1347 } else {
1348 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1349 }
1350 break;
1351
1352 case INDEX_op_neg_i64:
1353 case INDEX_op_neg_i32:
1354 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1355 break;
1356
1357 case INDEX_op_and_i32:
1358 a2 = (int32_t)a2;
1359 /* FALLTHRU */
1360 case INDEX_op_and_i64:
1361 if (c2) {
1362 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1363 } else {
1364 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1365 }
1366 break;
1367
1368 case INDEX_op_andc_i32:
1369 a2 = (int32_t)a2;
1370 /* FALLTHRU */
1371 case INDEX_op_andc_i64:
1372 if (c2) {
1373 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1374 } else {
1375 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1376 }
1377 break;
1378
1379 case INDEX_op_or_i32:
1380 a2 = (int32_t)a2;
1381 /* FALLTHRU */
1382 case INDEX_op_or_i64:
1383 if (c2) {
1384 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1385 } else {
1386 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1387 }
1388 break;
1389
1390 case INDEX_op_orc_i32:
1391 a2 = (int32_t)a2;
1392 /* FALLTHRU */
1393 case INDEX_op_orc_i64:
1394 if (c2) {
1395 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1396 } else {
1397 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1398 }
1399 break;
1400
1401 case INDEX_op_xor_i32:
1402 a2 = (int32_t)a2;
1403 /* FALLTHRU */
1404 case INDEX_op_xor_i64:
1405 if (c2) {
1406 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1407 } else {
1408 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1409 }
1410 break;
1411
1412 case INDEX_op_eqv_i32:
1413 a2 = (int32_t)a2;
1414 /* FALLTHRU */
1415 case INDEX_op_eqv_i64:
1416 if (c2) {
1417 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1418 } else {
1419 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1420 }
1421 break;
1422
1423 case INDEX_op_not_i64:
1424 case INDEX_op_not_i32:
1425 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1426 break;
1427
1428 case INDEX_op_mul_i64:
1429 case INDEX_op_mul_i32:
1430 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1431 break;
1432
1433 case INDEX_op_div_i64:
1434 case INDEX_op_div_i32:
1435 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1436 break;
1437 case INDEX_op_divu_i64:
1438 case INDEX_op_divu_i32:
1439 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1440 break;
1441
1442 case INDEX_op_rem_i64:
1443 case INDEX_op_rem_i32:
1444 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1445 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1446 break;
1447 case INDEX_op_remu_i64:
1448 case INDEX_op_remu_i32:
1449 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1450 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1451 break;
1452
1453 case INDEX_op_shl_i64:
1454 case INDEX_op_shl_i32:
1455 if (c2) {
1456 tcg_out_shl(s, ext, a0, a1, a2);
1457 } else {
1458 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1459 }
1460 break;
1461
1462 case INDEX_op_shr_i64:
1463 case INDEX_op_shr_i32:
1464 if (c2) {
1465 tcg_out_shr(s, ext, a0, a1, a2);
1466 } else {
1467 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1468 }
1469 break;
1470
1471 case INDEX_op_sar_i64:
1472 case INDEX_op_sar_i32:
1473 if (c2) {
1474 tcg_out_sar(s, ext, a0, a1, a2);
1475 } else {
1476 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1477 }
1478 break;
1479
1480 case INDEX_op_rotr_i64:
1481 case INDEX_op_rotr_i32:
1482 if (c2) {
1483 tcg_out_rotr(s, ext, a0, a1, a2);
1484 } else {
1485 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1486 }
1487 break;
1488
1489 case INDEX_op_rotl_i64:
1490 case INDEX_op_rotl_i32:
1491 if (c2) {
1492 tcg_out_rotl(s, ext, a0, a1, a2);
1493 } else {
1494 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1495 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1496 }
1497 break;
1498
1499 case INDEX_op_brcond_i32:
1500 a1 = (int32_t)a1;
1501 /* FALLTHRU */
1502 case INDEX_op_brcond_i64:
1503 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1504 break;
1505
1506 case INDEX_op_setcond_i32:
1507 a2 = (int32_t)a2;
1508 /* FALLTHRU */
1509 case INDEX_op_setcond_i64:
1510 tcg_out_cmp(s, ext, a1, a2, c2);
1511 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1512 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1513 TCG_REG_XZR, tcg_invert_cond(args[3]));
1514 break;
1515
1516 case INDEX_op_movcond_i32:
1517 a2 = (int32_t)a2;
1518 /* FALLTHRU */
1519 case INDEX_op_movcond_i64:
1520 tcg_out_cmp(s, ext, a1, a2, c2);
1521 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1522 break;
1523
1524 case INDEX_op_qemu_ld_i32:
1525 case INDEX_op_qemu_ld_i64:
1526 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1527 break;
1528 case INDEX_op_qemu_st_i32:
1529 case INDEX_op_qemu_st_i64:
1530 tcg_out_qemu_st(s, REG0(0), a1, a2);
1531 break;
1532
1533 case INDEX_op_bswap64_i64:
1534 tcg_out_rev64(s, a0, a1);
1535 break;
1536 case INDEX_op_bswap32_i64:
1537 case INDEX_op_bswap32_i32:
1538 tcg_out_rev32(s, a0, a1);
1539 break;
1540 case INDEX_op_bswap16_i64:
1541 case INDEX_op_bswap16_i32:
1542 tcg_out_rev16(s, a0, a1);
1543 break;
1544
1545 case INDEX_op_ext8s_i64:
1546 case INDEX_op_ext8s_i32:
1547 tcg_out_sxt(s, ext, MO_8, a0, a1);
1548 break;
1549 case INDEX_op_ext16s_i64:
1550 case INDEX_op_ext16s_i32:
1551 tcg_out_sxt(s, ext, MO_16, a0, a1);
1552 break;
1553 case INDEX_op_ext32s_i64:
1554 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1555 break;
1556 case INDEX_op_ext8u_i64:
1557 case INDEX_op_ext8u_i32:
1558 tcg_out_uxt(s, MO_8, a0, a1);
1559 break;
1560 case INDEX_op_ext16u_i64:
1561 case INDEX_op_ext16u_i32:
1562 tcg_out_uxt(s, MO_16, a0, a1);
1563 break;
1564 case INDEX_op_ext32u_i64:
1565 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1566 break;
1567
1568 case INDEX_op_deposit_i64:
1569 case INDEX_op_deposit_i32:
1570 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1571 break;
1572
1573 case INDEX_op_add2_i32:
1574 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1575 (int32_t)args[4], args[5], const_args[4],
1576 const_args[5], false);
1577 break;
1578 case INDEX_op_add2_i64:
1579 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1580 args[5], const_args[4], const_args[5], false);
1581 break;
1582 case INDEX_op_sub2_i32:
1583 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1584 (int32_t)args[4], args[5], const_args[4],
1585 const_args[5], true);
1586 break;
1587 case INDEX_op_sub2_i64:
1588 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1589 args[5], const_args[4], const_args[5], true);
1590 break;
1591
1592 case INDEX_op_muluh_i64:
1593 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1594 break;
1595 case INDEX_op_mulsh_i64:
1596 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1597 break;
1598
1599 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1600 case INDEX_op_mov_i64:
1601 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1602 case INDEX_op_movi_i64:
1603 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1604 default:
1605 tcg_abort();
1606 }
1607
1608 #undef REG0
1609 }
1610
1611 static const TCGTargetOpDef aarch64_op_defs[] = {
1612 { INDEX_op_exit_tb, { } },
1613 { INDEX_op_goto_tb, { } },
1614 { INDEX_op_br, { } },
1615
1616 { INDEX_op_ld8u_i32, { "r", "r" } },
1617 { INDEX_op_ld8s_i32, { "r", "r" } },
1618 { INDEX_op_ld16u_i32, { "r", "r" } },
1619 { INDEX_op_ld16s_i32, { "r", "r" } },
1620 { INDEX_op_ld_i32, { "r", "r" } },
1621 { INDEX_op_ld8u_i64, { "r", "r" } },
1622 { INDEX_op_ld8s_i64, { "r", "r" } },
1623 { INDEX_op_ld16u_i64, { "r", "r" } },
1624 { INDEX_op_ld16s_i64, { "r", "r" } },
1625 { INDEX_op_ld32u_i64, { "r", "r" } },
1626 { INDEX_op_ld32s_i64, { "r", "r" } },
1627 { INDEX_op_ld_i64, { "r", "r" } },
1628
1629 { INDEX_op_st8_i32, { "rZ", "r" } },
1630 { INDEX_op_st16_i32, { "rZ", "r" } },
1631 { INDEX_op_st_i32, { "rZ", "r" } },
1632 { INDEX_op_st8_i64, { "rZ", "r" } },
1633 { INDEX_op_st16_i64, { "rZ", "r" } },
1634 { INDEX_op_st32_i64, { "rZ", "r" } },
1635 { INDEX_op_st_i64, { "rZ", "r" } },
1636
1637 { INDEX_op_add_i32, { "r", "r", "rA" } },
1638 { INDEX_op_add_i64, { "r", "r", "rA" } },
1639 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1640 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1641 { INDEX_op_mul_i32, { "r", "r", "r" } },
1642 { INDEX_op_mul_i64, { "r", "r", "r" } },
1643 { INDEX_op_div_i32, { "r", "r", "r" } },
1644 { INDEX_op_div_i64, { "r", "r", "r" } },
1645 { INDEX_op_divu_i32, { "r", "r", "r" } },
1646 { INDEX_op_divu_i64, { "r", "r", "r" } },
1647 { INDEX_op_rem_i32, { "r", "r", "r" } },
1648 { INDEX_op_rem_i64, { "r", "r", "r" } },
1649 { INDEX_op_remu_i32, { "r", "r", "r" } },
1650 { INDEX_op_remu_i64, { "r", "r", "r" } },
1651 { INDEX_op_and_i32, { "r", "r", "rL" } },
1652 { INDEX_op_and_i64, { "r", "r", "rL" } },
1653 { INDEX_op_or_i32, { "r", "r", "rL" } },
1654 { INDEX_op_or_i64, { "r", "r", "rL" } },
1655 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1656 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1657 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1658 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1659 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1660 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1661 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1662 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1663
1664 { INDEX_op_neg_i32, { "r", "r" } },
1665 { INDEX_op_neg_i64, { "r", "r" } },
1666 { INDEX_op_not_i32, { "r", "r" } },
1667 { INDEX_op_not_i64, { "r", "r" } },
1668
1669 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1670 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1671 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1672 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1673 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1674 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1675 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1676 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1677 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1678 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1679
1680 { INDEX_op_brcond_i32, { "r", "rA" } },
1681 { INDEX_op_brcond_i64, { "r", "rA" } },
1682 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1683 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1684 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1685 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1686
1687 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1688 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1689 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1690 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1691
1692 { INDEX_op_bswap16_i32, { "r", "r" } },
1693 { INDEX_op_bswap32_i32, { "r", "r" } },
1694 { INDEX_op_bswap16_i64, { "r", "r" } },
1695 { INDEX_op_bswap32_i64, { "r", "r" } },
1696 { INDEX_op_bswap64_i64, { "r", "r" } },
1697
1698 { INDEX_op_ext8s_i32, { "r", "r" } },
1699 { INDEX_op_ext16s_i32, { "r", "r" } },
1700 { INDEX_op_ext8u_i32, { "r", "r" } },
1701 { INDEX_op_ext16u_i32, { "r", "r" } },
1702
1703 { INDEX_op_ext8s_i64, { "r", "r" } },
1704 { INDEX_op_ext16s_i64, { "r", "r" } },
1705 { INDEX_op_ext32s_i64, { "r", "r" } },
1706 { INDEX_op_ext8u_i64, { "r", "r" } },
1707 { INDEX_op_ext16u_i64, { "r", "r" } },
1708 { INDEX_op_ext32u_i64, { "r", "r" } },
1709
1710 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1711 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1712
1713 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1714 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1715 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1716 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1717
1718 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1719 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1720
1721 { -1 },
1722 };
1723
1724 static void tcg_target_init(TCGContext *s)
1725 {
1726 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1727 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1728
1729 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1730 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1731 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1732 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1733 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1734 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1735 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1736 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1737 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1738 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1739 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1740
1741 tcg_regset_clear(s->reserved_regs);
1742 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1743 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1744 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1745 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1746
1747 tcg_add_target_add_op_defs(aarch64_op_defs);
1748 }
1749
1750 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1751 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1752
1753 #define FRAME_SIZE \
1754 ((PUSH_SIZE \
1755 + TCG_STATIC_CALL_ARGS_SIZE \
1756 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1757 + TCG_TARGET_STACK_ALIGN - 1) \
1758 & ~(TCG_TARGET_STACK_ALIGN - 1))
1759
1760 /* We're expecting a 2 byte uleb128 encoded value. */
1761 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1762
1763 /* We're expecting to use a single ADDI insn. */
1764 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1765
1766 static void tcg_target_qemu_prologue(TCGContext *s)
1767 {
1768 TCGReg r;
1769
1770 /* Push (FP, LR) and allocate space for all saved registers. */
1771 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1772 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1773
1774 /* Set up frame pointer for canonical unwinding. */
1775 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1776
1777 /* Store callee-preserved regs x19..x28. */
1778 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1779 int ofs = (r - TCG_REG_X19 + 2) * 8;
1780 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1781 }
1782
1783 /* Make stack space for TCG locals. */
1784 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1785 FRAME_SIZE - PUSH_SIZE);
1786
1787 /* Inform TCG about how to find TCG locals with register, offset, size. */
1788 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1789 CPU_TEMP_BUF_NLONGS * sizeof(long));
1790
1791 #if defined(CONFIG_USE_GUEST_BASE)
1792 if (GUEST_BASE) {
1793 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1794 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1795 }
1796 #endif
1797
1798 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1799 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1800
1801 tb_ret_addr = s->code_ptr;
1802
1803 /* Remove TCG locals stack space. */
1804 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1805 FRAME_SIZE - PUSH_SIZE);
1806
1807 /* Restore registers x19..x28. */
1808 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1809 int ofs = (r - TCG_REG_X19 + 2) * 8;
1810 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1811 }
1812
1813 /* Pop (FP, LR), restore SP to previous frame. */
1814 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1815 TCG_REG_SP, PUSH_SIZE, 0, 1);
1816 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1817 }
1818
1819 typedef struct {
1820 DebugFrameHeader h;
1821 uint8_t fde_def_cfa[4];
1822 uint8_t fde_reg_ofs[24];
1823 } DebugFrame;
1824
1825 #define ELF_HOST_MACHINE EM_AARCH64
1826
1827 static const DebugFrame debug_frame = {
1828 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1829 .h.cie.id = -1,
1830 .h.cie.version = 1,
1831 .h.cie.code_align = 1,
1832 .h.cie.data_align = 0x78, /* sleb128 -8 */
1833 .h.cie.return_column = TCG_REG_LR,
1834
1835 /* Total FDE size does not include the "len" member. */
1836 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1837
1838 .fde_def_cfa = {
1839 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
1840 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
1841 (FRAME_SIZE >> 7)
1842 },
1843 .fde_reg_ofs = {
1844 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
1845 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
1846 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
1847 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
1848 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
1849 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
1850 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
1851 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
1852 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
1853 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
1854 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
1855 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
1856 }
1857 };
1858
1859 void tcg_register_jit(void *buf, size_t buf_size)
1860 {
1861 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1862 }