target/arm: Handle TBI for sve scalar + int memory ops
[qemu.git] / target / arm / translate-a64.c
1 /*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "qemu/log.h"
26 #include "arm_ldst.h"
27 #include "translate.h"
28 #include "internals.h"
29 #include "qemu/host-utils.h"
30
31 #include "hw/semihosting/semihost.h"
32 #include "exec/gen-icount.h"
33
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
36 #include "exec/log.h"
37
38 #include "trace-tcg.h"
39 #include "translate-a64.h"
40 #include "qemu/atomic128.h"
41
42 static TCGv_i64 cpu_X[32];
43 static TCGv_i64 cpu_pc;
44
45 /* Load/store exclusive handling */
46 static TCGv_i64 cpu_exclusive_high;
47
48 static const char *regnames[] = {
49 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
50 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
51 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
52 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
53 };
54
55 enum a64_shift_type {
56 A64_SHIFT_TYPE_LSL = 0,
57 A64_SHIFT_TYPE_LSR = 1,
58 A64_SHIFT_TYPE_ASR = 2,
59 A64_SHIFT_TYPE_ROR = 3
60 };
61
62 /* Table based decoder typedefs - used when the relevant bits for decode
63 * are too awkwardly scattered across the instruction (eg SIMD).
64 */
65 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
66
67 typedef struct AArch64DecodeTable {
68 uint32_t pattern;
69 uint32_t mask;
70 AArch64DecodeFn *disas_fn;
71 } AArch64DecodeTable;
72
73 /* initialize TCG globals. */
74 void a64_translate_init(void)
75 {
76 int i;
77
78 cpu_pc = tcg_global_mem_new_i64(cpu_env,
79 offsetof(CPUARMState, pc),
80 "pc");
81 for (i = 0; i < 32; i++) {
82 cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
83 offsetof(CPUARMState, xregs[i]),
84 regnames[i]);
85 }
86
87 cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
88 offsetof(CPUARMState, exclusive_high), "exclusive_high");
89 }
90
91 /*
92 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
93 */
94 static int get_a64_user_mem_index(DisasContext *s)
95 {
96 /*
97 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
98 * which is the usual mmu_idx for this cpu state.
99 */
100 ARMMMUIdx useridx = s->mmu_idx;
101
102 if (s->unpriv) {
103 /*
104 * We have pre-computed the condition for AccType_UNPRIV.
105 * Therefore we should never get here with a mmu_idx for
106 * which we do not know the corresponding user mmu_idx.
107 */
108 switch (useridx) {
109 case ARMMMUIdx_E10_1:
110 case ARMMMUIdx_E10_1_PAN:
111 useridx = ARMMMUIdx_E10_0;
112 break;
113 case ARMMMUIdx_E20_2:
114 case ARMMMUIdx_E20_2_PAN:
115 useridx = ARMMMUIdx_E20_0;
116 break;
117 case ARMMMUIdx_SE10_1:
118 case ARMMMUIdx_SE10_1_PAN:
119 useridx = ARMMMUIdx_SE10_0;
120 break;
121 default:
122 g_assert_not_reached();
123 }
124 }
125 return arm_to_core_mmu_idx(useridx);
126 }
127
128 static void reset_btype(DisasContext *s)
129 {
130 if (s->btype != 0) {
131 TCGv_i32 zero = tcg_const_i32(0);
132 tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
133 tcg_temp_free_i32(zero);
134 s->btype = 0;
135 }
136 }
137
138 static void set_btype(DisasContext *s, int val)
139 {
140 TCGv_i32 tcg_val;
141
142 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
143 tcg_debug_assert(val >= 1 && val <= 3);
144
145 tcg_val = tcg_const_i32(val);
146 tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
147 tcg_temp_free_i32(tcg_val);
148 s->btype = -1;
149 }
150
151 void gen_a64_set_pc_im(uint64_t val)
152 {
153 tcg_gen_movi_i64(cpu_pc, val);
154 }
155
156 /*
157 * Handle Top Byte Ignore (TBI) bits.
158 *
159 * If address tagging is enabled via the TCR TBI bits:
160 * + for EL2 and EL3 there is only one TBI bit, and if it is set
161 * then the address is zero-extended, clearing bits [63:56]
162 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
163 * and TBI1 controls addressses with bit 55 == 1.
164 * If the appropriate TBI bit is set for the address then
165 * the address is sign-extended from bit 55 into bits [63:56]
166 *
167 * Here We have concatenated TBI{1,0} into tbi.
168 */
169 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
170 TCGv_i64 src, int tbi)
171 {
172 if (tbi == 0) {
173 /* Load unmodified address */
174 tcg_gen_mov_i64(dst, src);
175 } else if (!regime_has_2_ranges(s->mmu_idx)) {
176 /* Force tag byte to all zero */
177 tcg_gen_extract_i64(dst, src, 0, 56);
178 } else {
179 /* Sign-extend from bit 55. */
180 tcg_gen_sextract_i64(dst, src, 0, 56);
181
182 if (tbi != 3) {
183 TCGv_i64 tcg_zero = tcg_const_i64(0);
184
185 /*
186 * The two TBI bits differ.
187 * If tbi0, then !tbi1: only use the extension if positive.
188 * if !tbi0, then tbi1: only use the extension if negative.
189 */
190 tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
191 dst, dst, tcg_zero, dst, src);
192 tcg_temp_free_i64(tcg_zero);
193 }
194 }
195 }
196
197 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
198 {
199 /*
200 * If address tagging is enabled for instructions via the TCR TBI bits,
201 * then loading an address into the PC will clear out any tag.
202 */
203 gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
204 }
205
206 /*
207 * Handle MTE and/or TBI.
208 *
209 * For TBI, ideally, we would do nothing. Proper behaviour on fault is
210 * for the tag to be present in the FAR_ELx register. But for user-only
211 * mode we do not have a TLB with which to implement this, so we must
212 * remove the top byte now.
213 *
214 * Always return a fresh temporary that we can increment independently
215 * of the write-back address.
216 */
217
218 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
219 {
220 TCGv_i64 clean = new_tmp_a64(s);
221 #ifdef CONFIG_USER_ONLY
222 gen_top_byte_ignore(s, clean, addr, s->tbid);
223 #else
224 tcg_gen_mov_i64(clean, addr);
225 #endif
226 return clean;
227 }
228
229 /* Insert a zero tag into src, with the result at dst. */
230 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
231 {
232 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
233 }
234
235 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
236 MMUAccessType acc, int log2_size)
237 {
238 TCGv_i32 t_acc = tcg_const_i32(acc);
239 TCGv_i32 t_idx = tcg_const_i32(get_mem_index(s));
240 TCGv_i32 t_size = tcg_const_i32(1 << log2_size);
241
242 gen_helper_probe_access(cpu_env, ptr, t_acc, t_idx, t_size);
243 tcg_temp_free_i32(t_acc);
244 tcg_temp_free_i32(t_idx);
245 tcg_temp_free_i32(t_size);
246 }
247
248 /*
249 * For MTE, check a single logical or atomic access. This probes a single
250 * address, the exact one specified. The size and alignment of the access
251 * is not relevant to MTE, per se, but watchpoints do require the size,
252 * and we want to recognize those before making any other changes to state.
253 */
254 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
255 bool is_write, bool tag_checked,
256 int log2_size, bool is_unpriv,
257 int core_idx)
258 {
259 if (tag_checked && s->mte_active[is_unpriv]) {
260 TCGv_i32 tcg_desc;
261 TCGv_i64 ret;
262 int desc = 0;
263
264 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
265 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
266 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
267 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
268 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size);
269 tcg_desc = tcg_const_i32(desc);
270
271 ret = new_tmp_a64(s);
272 gen_helper_mte_check1(ret, cpu_env, tcg_desc, addr);
273 tcg_temp_free_i32(tcg_desc);
274
275 return ret;
276 }
277 return clean_data_tbi(s, addr);
278 }
279
280 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
281 bool tag_checked, int log2_size)
282 {
283 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
284 false, get_mem_index(s));
285 }
286
287 /*
288 * For MTE, check multiple logical sequential accesses.
289 */
290 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
291 bool tag_checked, int log2_esize, int total_size)
292 {
293 if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) {
294 TCGv_i32 tcg_desc;
295 TCGv_i64 ret;
296 int desc = 0;
297
298 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
299 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
300 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
301 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
302 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize);
303 desc = FIELD_DP32(desc, MTEDESC, TSIZE, total_size);
304 tcg_desc = tcg_const_i32(desc);
305
306 ret = new_tmp_a64(s);
307 gen_helper_mte_checkN(ret, cpu_env, tcg_desc, addr);
308 tcg_temp_free_i32(tcg_desc);
309
310 return ret;
311 }
312 return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize);
313 }
314
315 typedef struct DisasCompare64 {
316 TCGCond cond;
317 TCGv_i64 value;
318 } DisasCompare64;
319
320 static void a64_test_cc(DisasCompare64 *c64, int cc)
321 {
322 DisasCompare c32;
323
324 arm_test_cc(&c32, cc);
325
326 /* Sign-extend the 32-bit value so that the GE/LT comparisons work
327 * properly. The NE/EQ comparisons are also fine with this choice. */
328 c64->cond = c32.cond;
329 c64->value = tcg_temp_new_i64();
330 tcg_gen_ext_i32_i64(c64->value, c32.value);
331
332 arm_free_cc(&c32);
333 }
334
335 static void a64_free_cc(DisasCompare64 *c64)
336 {
337 tcg_temp_free_i64(c64->value);
338 }
339
340 static void gen_exception_internal(int excp)
341 {
342 TCGv_i32 tcg_excp = tcg_const_i32(excp);
343
344 assert(excp_is_internal(excp));
345 gen_helper_exception_internal(cpu_env, tcg_excp);
346 tcg_temp_free_i32(tcg_excp);
347 }
348
349 static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
350 {
351 gen_a64_set_pc_im(pc);
352 gen_exception_internal(excp);
353 s->base.is_jmp = DISAS_NORETURN;
354 }
355
356 static void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
357 uint32_t syndrome, uint32_t target_el)
358 {
359 gen_a64_set_pc_im(pc);
360 gen_exception(excp, syndrome, target_el);
361 s->base.is_jmp = DISAS_NORETURN;
362 }
363
364 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
365 {
366 TCGv_i32 tcg_syn;
367
368 gen_a64_set_pc_im(s->pc_curr);
369 tcg_syn = tcg_const_i32(syndrome);
370 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
371 tcg_temp_free_i32(tcg_syn);
372 s->base.is_jmp = DISAS_NORETURN;
373 }
374
375 static void gen_step_complete_exception(DisasContext *s)
376 {
377 /* We just completed step of an insn. Move from Active-not-pending
378 * to Active-pending, and then also take the swstep exception.
379 * This corresponds to making the (IMPDEF) choice to prioritize
380 * swstep exceptions over asynchronous exceptions taken to an exception
381 * level where debug is disabled. This choice has the advantage that
382 * we do not need to maintain internal state corresponding to the
383 * ISV/EX syndrome bits between completion of the step and generation
384 * of the exception, and our syndrome information is always correct.
385 */
386 gen_ss_advance(s);
387 gen_swstep_exception(s, 1, s->is_ldex);
388 s->base.is_jmp = DISAS_NORETURN;
389 }
390
391 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
392 {
393 /* No direct tb linking with singlestep (either QEMU's or the ARM
394 * debug architecture kind) or deterministic io
395 */
396 if (s->base.singlestep_enabled || s->ss_active ||
397 (tb_cflags(s->base.tb) & CF_LAST_IO)) {
398 return false;
399 }
400
401 #ifndef CONFIG_USER_ONLY
402 /* Only link tbs from inside the same guest page */
403 if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
404 return false;
405 }
406 #endif
407
408 return true;
409 }
410
411 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
412 {
413 TranslationBlock *tb;
414
415 tb = s->base.tb;
416 if (use_goto_tb(s, n, dest)) {
417 tcg_gen_goto_tb(n);
418 gen_a64_set_pc_im(dest);
419 tcg_gen_exit_tb(tb, n);
420 s->base.is_jmp = DISAS_NORETURN;
421 } else {
422 gen_a64_set_pc_im(dest);
423 if (s->ss_active) {
424 gen_step_complete_exception(s);
425 } else if (s->base.singlestep_enabled) {
426 gen_exception_internal(EXCP_DEBUG);
427 } else {
428 tcg_gen_lookup_and_goto_ptr();
429 s->base.is_jmp = DISAS_NORETURN;
430 }
431 }
432 }
433
434 void unallocated_encoding(DisasContext *s)
435 {
436 /* Unallocated and reserved encodings are uncategorized */
437 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
438 default_exception_el(s));
439 }
440
441 static void init_tmp_a64_array(DisasContext *s)
442 {
443 #ifdef CONFIG_DEBUG_TCG
444 memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
445 #endif
446 s->tmp_a64_count = 0;
447 }
448
449 static void free_tmp_a64(DisasContext *s)
450 {
451 int i;
452 for (i = 0; i < s->tmp_a64_count; i++) {
453 tcg_temp_free_i64(s->tmp_a64[i]);
454 }
455 init_tmp_a64_array(s);
456 }
457
458 TCGv_i64 new_tmp_a64(DisasContext *s)
459 {
460 assert(s->tmp_a64_count < TMP_A64_MAX);
461 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
462 }
463
464 TCGv_i64 new_tmp_a64_zero(DisasContext *s)
465 {
466 TCGv_i64 t = new_tmp_a64(s);
467 tcg_gen_movi_i64(t, 0);
468 return t;
469 }
470
471 /*
472 * Register access functions
473 *
474 * These functions are used for directly accessing a register in where
475 * changes to the final register value are likely to be made. If you
476 * need to use a register for temporary calculation (e.g. index type
477 * operations) use the read_* form.
478 *
479 * B1.2.1 Register mappings
480 *
481 * In instruction register encoding 31 can refer to ZR (zero register) or
482 * the SP (stack pointer) depending on context. In QEMU's case we map SP
483 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
484 * This is the point of the _sp forms.
485 */
486 TCGv_i64 cpu_reg(DisasContext *s, int reg)
487 {
488 if (reg == 31) {
489 return new_tmp_a64_zero(s);
490 } else {
491 return cpu_X[reg];
492 }
493 }
494
495 /* register access for when 31 == SP */
496 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
497 {
498 return cpu_X[reg];
499 }
500
501 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
502 * representing the register contents. This TCGv is an auto-freed
503 * temporary so it need not be explicitly freed, and may be modified.
504 */
505 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
506 {
507 TCGv_i64 v = new_tmp_a64(s);
508 if (reg != 31) {
509 if (sf) {
510 tcg_gen_mov_i64(v, cpu_X[reg]);
511 } else {
512 tcg_gen_ext32u_i64(v, cpu_X[reg]);
513 }
514 } else {
515 tcg_gen_movi_i64(v, 0);
516 }
517 return v;
518 }
519
520 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
521 {
522 TCGv_i64 v = new_tmp_a64(s);
523 if (sf) {
524 tcg_gen_mov_i64(v, cpu_X[reg]);
525 } else {
526 tcg_gen_ext32u_i64(v, cpu_X[reg]);
527 }
528 return v;
529 }
530
531 /* Return the offset into CPUARMState of a slice (from
532 * the least significant end) of FP register Qn (ie
533 * Dn, Sn, Hn or Bn).
534 * (Note that this is not the same mapping as for A32; see cpu.h)
535 */
536 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
537 {
538 return vec_reg_offset(s, regno, 0, size);
539 }
540
541 /* Offset of the high half of the 128 bit vector Qn */
542 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
543 {
544 return vec_reg_offset(s, regno, 1, MO_64);
545 }
546
547 /* Convenience accessors for reading and writing single and double
548 * FP registers. Writing clears the upper parts of the associated
549 * 128 bit vector register, as required by the architecture.
550 * Note that unlike the GP register accessors, the values returned
551 * by the read functions must be manually freed.
552 */
553 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
554 {
555 TCGv_i64 v = tcg_temp_new_i64();
556
557 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
558 return v;
559 }
560
561 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
562 {
563 TCGv_i32 v = tcg_temp_new_i32();
564
565 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
566 return v;
567 }
568
569 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
570 {
571 TCGv_i32 v = tcg_temp_new_i32();
572
573 tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
574 return v;
575 }
576
577 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
578 * If SVE is not enabled, then there are only 128 bits in the vector.
579 */
580 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
581 {
582 unsigned ofs = fp_reg_offset(s, rd, MO_64);
583 unsigned vsz = vec_full_reg_size(s);
584
585 /* Nop move, with side effect of clearing the tail. */
586 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
587 }
588
589 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
590 {
591 unsigned ofs = fp_reg_offset(s, reg, MO_64);
592
593 tcg_gen_st_i64(v, cpu_env, ofs);
594 clear_vec_high(s, false, reg);
595 }
596
597 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
598 {
599 TCGv_i64 tmp = tcg_temp_new_i64();
600
601 tcg_gen_extu_i32_i64(tmp, v);
602 write_fp_dreg(s, reg, tmp);
603 tcg_temp_free_i64(tmp);
604 }
605
606 TCGv_ptr get_fpstatus_ptr(bool is_f16)
607 {
608 TCGv_ptr statusptr = tcg_temp_new_ptr();
609 int offset;
610
611 /* In A64 all instructions (both FP and Neon) use the FPCR; there
612 * is no equivalent of the A32 Neon "standard FPSCR value".
613 * However half-precision operations operate under a different
614 * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
615 */
616 if (is_f16) {
617 offset = offsetof(CPUARMState, vfp.fp_status_f16);
618 } else {
619 offset = offsetof(CPUARMState, vfp.fp_status);
620 }
621 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
622 return statusptr;
623 }
624
625 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */
626 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
627 GVecGen2Fn *gvec_fn, int vece)
628 {
629 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
630 is_q ? 16 : 8, vec_full_reg_size(s));
631 }
632
633 /* Expand a 2-operand + immediate AdvSIMD vector operation using
634 * an expander function.
635 */
636 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
637 int64_t imm, GVecGen2iFn *gvec_fn, int vece)
638 {
639 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
640 imm, is_q ? 16 : 8, vec_full_reg_size(s));
641 }
642
643 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */
644 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
645 GVecGen3Fn *gvec_fn, int vece)
646 {
647 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
648 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
649 }
650
651 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */
652 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
653 int rx, GVecGen4Fn *gvec_fn, int vece)
654 {
655 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
656 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
657 is_q ? 16 : 8, vec_full_reg_size(s));
658 }
659
660 /* Expand a 2-operand operation using an out-of-line helper. */
661 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
662 int rn, int data, gen_helper_gvec_2 *fn)
663 {
664 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
665 vec_full_reg_offset(s, rn),
666 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
667 }
668
669 /* Expand a 3-operand operation using an out-of-line helper. */
670 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
671 int rn, int rm, int data, gen_helper_gvec_3 *fn)
672 {
673 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
674 vec_full_reg_offset(s, rn),
675 vec_full_reg_offset(s, rm),
676 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
677 }
678
679 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
680 * an out-of-line helper.
681 */
682 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
683 int rm, bool is_fp16, int data,
684 gen_helper_gvec_3_ptr *fn)
685 {
686 TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
687 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
688 vec_full_reg_offset(s, rn),
689 vec_full_reg_offset(s, rm), fpst,
690 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
691 tcg_temp_free_ptr(fpst);
692 }
693
694 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
695 * than the 32 bit equivalent.
696 */
697 static inline void gen_set_NZ64(TCGv_i64 result)
698 {
699 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
700 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
701 }
702
703 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
704 static inline void gen_logic_CC(int sf, TCGv_i64 result)
705 {
706 if (sf) {
707 gen_set_NZ64(result);
708 } else {
709 tcg_gen_extrl_i64_i32(cpu_ZF, result);
710 tcg_gen_mov_i32(cpu_NF, cpu_ZF);
711 }
712 tcg_gen_movi_i32(cpu_CF, 0);
713 tcg_gen_movi_i32(cpu_VF, 0);
714 }
715
716 /* dest = T0 + T1; compute C, N, V and Z flags */
717 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
718 {
719 if (sf) {
720 TCGv_i64 result, flag, tmp;
721 result = tcg_temp_new_i64();
722 flag = tcg_temp_new_i64();
723 tmp = tcg_temp_new_i64();
724
725 tcg_gen_movi_i64(tmp, 0);
726 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
727
728 tcg_gen_extrl_i64_i32(cpu_CF, flag);
729
730 gen_set_NZ64(result);
731
732 tcg_gen_xor_i64(flag, result, t0);
733 tcg_gen_xor_i64(tmp, t0, t1);
734 tcg_gen_andc_i64(flag, flag, tmp);
735 tcg_temp_free_i64(tmp);
736 tcg_gen_extrh_i64_i32(cpu_VF, flag);
737
738 tcg_gen_mov_i64(dest, result);
739 tcg_temp_free_i64(result);
740 tcg_temp_free_i64(flag);
741 } else {
742 /* 32 bit arithmetic */
743 TCGv_i32 t0_32 = tcg_temp_new_i32();
744 TCGv_i32 t1_32 = tcg_temp_new_i32();
745 TCGv_i32 tmp = tcg_temp_new_i32();
746
747 tcg_gen_movi_i32(tmp, 0);
748 tcg_gen_extrl_i64_i32(t0_32, t0);
749 tcg_gen_extrl_i64_i32(t1_32, t1);
750 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
751 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
752 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
753 tcg_gen_xor_i32(tmp, t0_32, t1_32);
754 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
755 tcg_gen_extu_i32_i64(dest, cpu_NF);
756
757 tcg_temp_free_i32(tmp);
758 tcg_temp_free_i32(t0_32);
759 tcg_temp_free_i32(t1_32);
760 }
761 }
762
763 /* dest = T0 - T1; compute C, N, V and Z flags */
764 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
765 {
766 if (sf) {
767 /* 64 bit arithmetic */
768 TCGv_i64 result, flag, tmp;
769
770 result = tcg_temp_new_i64();
771 flag = tcg_temp_new_i64();
772 tcg_gen_sub_i64(result, t0, t1);
773
774 gen_set_NZ64(result);
775
776 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
777 tcg_gen_extrl_i64_i32(cpu_CF, flag);
778
779 tcg_gen_xor_i64(flag, result, t0);
780 tmp = tcg_temp_new_i64();
781 tcg_gen_xor_i64(tmp, t0, t1);
782 tcg_gen_and_i64(flag, flag, tmp);
783 tcg_temp_free_i64(tmp);
784 tcg_gen_extrh_i64_i32(cpu_VF, flag);
785 tcg_gen_mov_i64(dest, result);
786 tcg_temp_free_i64(flag);
787 tcg_temp_free_i64(result);
788 } else {
789 /* 32 bit arithmetic */
790 TCGv_i32 t0_32 = tcg_temp_new_i32();
791 TCGv_i32 t1_32 = tcg_temp_new_i32();
792 TCGv_i32 tmp;
793
794 tcg_gen_extrl_i64_i32(t0_32, t0);
795 tcg_gen_extrl_i64_i32(t1_32, t1);
796 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
797 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
798 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
799 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
800 tmp = tcg_temp_new_i32();
801 tcg_gen_xor_i32(tmp, t0_32, t1_32);
802 tcg_temp_free_i32(t0_32);
803 tcg_temp_free_i32(t1_32);
804 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
805 tcg_temp_free_i32(tmp);
806 tcg_gen_extu_i32_i64(dest, cpu_NF);
807 }
808 }
809
810 /* dest = T0 + T1 + CF; do not compute flags. */
811 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
812 {
813 TCGv_i64 flag = tcg_temp_new_i64();
814 tcg_gen_extu_i32_i64(flag, cpu_CF);
815 tcg_gen_add_i64(dest, t0, t1);
816 tcg_gen_add_i64(dest, dest, flag);
817 tcg_temp_free_i64(flag);
818
819 if (!sf) {
820 tcg_gen_ext32u_i64(dest, dest);
821 }
822 }
823
824 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
825 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
826 {
827 if (sf) {
828 TCGv_i64 result, cf_64, vf_64, tmp;
829 result = tcg_temp_new_i64();
830 cf_64 = tcg_temp_new_i64();
831 vf_64 = tcg_temp_new_i64();
832 tmp = tcg_const_i64(0);
833
834 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
835 tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
836 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
837 tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
838 gen_set_NZ64(result);
839
840 tcg_gen_xor_i64(vf_64, result, t0);
841 tcg_gen_xor_i64(tmp, t0, t1);
842 tcg_gen_andc_i64(vf_64, vf_64, tmp);
843 tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
844
845 tcg_gen_mov_i64(dest, result);
846
847 tcg_temp_free_i64(tmp);
848 tcg_temp_free_i64(vf_64);
849 tcg_temp_free_i64(cf_64);
850 tcg_temp_free_i64(result);
851 } else {
852 TCGv_i32 t0_32, t1_32, tmp;
853 t0_32 = tcg_temp_new_i32();
854 t1_32 = tcg_temp_new_i32();
855 tmp = tcg_const_i32(0);
856
857 tcg_gen_extrl_i64_i32(t0_32, t0);
858 tcg_gen_extrl_i64_i32(t1_32, t1);
859 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
860 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
861
862 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
863 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
864 tcg_gen_xor_i32(tmp, t0_32, t1_32);
865 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
866 tcg_gen_extu_i32_i64(dest, cpu_NF);
867
868 tcg_temp_free_i32(tmp);
869 tcg_temp_free_i32(t1_32);
870 tcg_temp_free_i32(t0_32);
871 }
872 }
873
874 /*
875 * Load/Store generators
876 */
877
878 /*
879 * Store from GPR register to memory.
880 */
881 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
882 TCGv_i64 tcg_addr, int size, int memidx,
883 bool iss_valid,
884 unsigned int iss_srt,
885 bool iss_sf, bool iss_ar)
886 {
887 g_assert(size <= 3);
888 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
889
890 if (iss_valid) {
891 uint32_t syn;
892
893 syn = syn_data_abort_with_iss(0,
894 size,
895 false,
896 iss_srt,
897 iss_sf,
898 iss_ar,
899 0, 0, 0, 0, 0, false);
900 disas_set_insn_syndrome(s, syn);
901 }
902 }
903
904 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
905 TCGv_i64 tcg_addr, int size,
906 bool iss_valid,
907 unsigned int iss_srt,
908 bool iss_sf, bool iss_ar)
909 {
910 do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
911 iss_valid, iss_srt, iss_sf, iss_ar);
912 }
913
914 /*
915 * Load from memory to GPR register
916 */
917 static void do_gpr_ld_memidx(DisasContext *s,
918 TCGv_i64 dest, TCGv_i64 tcg_addr,
919 int size, bool is_signed,
920 bool extend, int memidx,
921 bool iss_valid, unsigned int iss_srt,
922 bool iss_sf, bool iss_ar)
923 {
924 MemOp memop = s->be_data + size;
925
926 g_assert(size <= 3);
927
928 if (is_signed) {
929 memop += MO_SIGN;
930 }
931
932 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
933
934 if (extend && is_signed) {
935 g_assert(size < 3);
936 tcg_gen_ext32u_i64(dest, dest);
937 }
938
939 if (iss_valid) {
940 uint32_t syn;
941
942 syn = syn_data_abort_with_iss(0,
943 size,
944 is_signed,
945 iss_srt,
946 iss_sf,
947 iss_ar,
948 0, 0, 0, 0, 0, false);
949 disas_set_insn_syndrome(s, syn);
950 }
951 }
952
953 static void do_gpr_ld(DisasContext *s,
954 TCGv_i64 dest, TCGv_i64 tcg_addr,
955 int size, bool is_signed, bool extend,
956 bool iss_valid, unsigned int iss_srt,
957 bool iss_sf, bool iss_ar)
958 {
959 do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
960 get_mem_index(s),
961 iss_valid, iss_srt, iss_sf, iss_ar);
962 }
963
964 /*
965 * Store from FP register to memory
966 */
967 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
968 {
969 /* This writes the bottom N bits of a 128 bit wide vector to memory */
970 TCGv_i64 tmp = tcg_temp_new_i64();
971 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
972 if (size < 4) {
973 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
974 s->be_data + size);
975 } else {
976 bool be = s->be_data == MO_BE;
977 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
978
979 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
980 tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
981 s->be_data | MO_Q);
982 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
983 tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
984 s->be_data | MO_Q);
985 tcg_temp_free_i64(tcg_hiaddr);
986 }
987
988 tcg_temp_free_i64(tmp);
989 }
990
991 /*
992 * Load from memory to FP register
993 */
994 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
995 {
996 /* This always zero-extends and writes to a full 128 bit wide vector */
997 TCGv_i64 tmplo = tcg_temp_new_i64();
998 TCGv_i64 tmphi = NULL;
999
1000 if (size < 4) {
1001 MemOp memop = s->be_data + size;
1002 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
1003 } else {
1004 bool be = s->be_data == MO_BE;
1005 TCGv_i64 tcg_hiaddr;
1006
1007 tmphi = tcg_temp_new_i64();
1008 tcg_hiaddr = tcg_temp_new_i64();
1009
1010 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
1011 tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
1012 s->be_data | MO_Q);
1013 tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
1014 s->be_data | MO_Q);
1015 tcg_temp_free_i64(tcg_hiaddr);
1016 }
1017
1018 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
1019 tcg_temp_free_i64(tmplo);
1020
1021 if (tmphi) {
1022 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
1023 tcg_temp_free_i64(tmphi);
1024 }
1025 clear_vec_high(s, tmphi != NULL, destidx);
1026 }
1027
1028 /*
1029 * Vector load/store helpers.
1030 *
1031 * The principal difference between this and a FP load is that we don't
1032 * zero extend as we are filling a partial chunk of the vector register.
1033 * These functions don't support 128 bit loads/stores, which would be
1034 * normal load/store operations.
1035 *
1036 * The _i32 versions are useful when operating on 32 bit quantities
1037 * (eg for floating point single or using Neon helper functions).
1038 */
1039
1040 /* Get value of an element within a vector register */
1041 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1042 int element, MemOp memop)
1043 {
1044 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1045 switch (memop) {
1046 case MO_8:
1047 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1048 break;
1049 case MO_16:
1050 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1051 break;
1052 case MO_32:
1053 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1054 break;
1055 case MO_8|MO_SIGN:
1056 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1057 break;
1058 case MO_16|MO_SIGN:
1059 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1060 break;
1061 case MO_32|MO_SIGN:
1062 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1063 break;
1064 case MO_64:
1065 case MO_64|MO_SIGN:
1066 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1067 break;
1068 default:
1069 g_assert_not_reached();
1070 }
1071 }
1072
1073 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1074 int element, MemOp memop)
1075 {
1076 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1077 switch (memop) {
1078 case MO_8:
1079 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1080 break;
1081 case MO_16:
1082 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1083 break;
1084 case MO_8|MO_SIGN:
1085 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1086 break;
1087 case MO_16|MO_SIGN:
1088 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1089 break;
1090 case MO_32:
1091 case MO_32|MO_SIGN:
1092 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1093 break;
1094 default:
1095 g_assert_not_reached();
1096 }
1097 }
1098
1099 /* Set value of an element within a vector register */
1100 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1101 int element, MemOp memop)
1102 {
1103 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1104 switch (memop) {
1105 case MO_8:
1106 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1107 break;
1108 case MO_16:
1109 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1110 break;
1111 case MO_32:
1112 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1113 break;
1114 case MO_64:
1115 tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1116 break;
1117 default:
1118 g_assert_not_reached();
1119 }
1120 }
1121
1122 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1123 int destidx, int element, MemOp memop)
1124 {
1125 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1126 switch (memop) {
1127 case MO_8:
1128 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1129 break;
1130 case MO_16:
1131 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1132 break;
1133 case MO_32:
1134 tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1135 break;
1136 default:
1137 g_assert_not_reached();
1138 }
1139 }
1140
1141 /* Store from vector register to memory */
1142 static void do_vec_st(DisasContext *s, int srcidx, int element,
1143 TCGv_i64 tcg_addr, int size, MemOp endian)
1144 {
1145 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1146
1147 read_vec_element(s, tcg_tmp, srcidx, element, size);
1148 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1149
1150 tcg_temp_free_i64(tcg_tmp);
1151 }
1152
1153 /* Load from memory to vector register */
1154 static void do_vec_ld(DisasContext *s, int destidx, int element,
1155 TCGv_i64 tcg_addr, int size, MemOp endian)
1156 {
1157 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1158
1159 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1160 write_vec_element(s, tcg_tmp, destidx, element, size);
1161
1162 tcg_temp_free_i64(tcg_tmp);
1163 }
1164
1165 /* Check that FP/Neon access is enabled. If it is, return
1166 * true. If not, emit code to generate an appropriate exception,
1167 * and return false; the caller should not emit any code for
1168 * the instruction. Note that this check must happen after all
1169 * unallocated-encoding checks (otherwise the syndrome information
1170 * for the resulting exception will be incorrect).
1171 */
1172 static inline bool fp_access_check(DisasContext *s)
1173 {
1174 assert(!s->fp_access_checked);
1175 s->fp_access_checked = true;
1176
1177 if (!s->fp_excp_el) {
1178 return true;
1179 }
1180
1181 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1182 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
1183 return false;
1184 }
1185
1186 /* Check that SVE access is enabled. If it is, return true.
1187 * If not, emit code to generate an appropriate exception and return false.
1188 */
1189 bool sve_access_check(DisasContext *s)
1190 {
1191 if (s->sve_excp_el) {
1192 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(),
1193 s->sve_excp_el);
1194 return false;
1195 }
1196 return fp_access_check(s);
1197 }
1198
1199 /*
1200 * This utility function is for doing register extension with an
1201 * optional shift. You will likely want to pass a temporary for the
1202 * destination register. See DecodeRegExtend() in the ARM ARM.
1203 */
1204 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1205 int option, unsigned int shift)
1206 {
1207 int extsize = extract32(option, 0, 2);
1208 bool is_signed = extract32(option, 2, 1);
1209
1210 if (is_signed) {
1211 switch (extsize) {
1212 case 0:
1213 tcg_gen_ext8s_i64(tcg_out, tcg_in);
1214 break;
1215 case 1:
1216 tcg_gen_ext16s_i64(tcg_out, tcg_in);
1217 break;
1218 case 2:
1219 tcg_gen_ext32s_i64(tcg_out, tcg_in);
1220 break;
1221 case 3:
1222 tcg_gen_mov_i64(tcg_out, tcg_in);
1223 break;
1224 }
1225 } else {
1226 switch (extsize) {
1227 case 0:
1228 tcg_gen_ext8u_i64(tcg_out, tcg_in);
1229 break;
1230 case 1:
1231 tcg_gen_ext16u_i64(tcg_out, tcg_in);
1232 break;
1233 case 2:
1234 tcg_gen_ext32u_i64(tcg_out, tcg_in);
1235 break;
1236 case 3:
1237 tcg_gen_mov_i64(tcg_out, tcg_in);
1238 break;
1239 }
1240 }
1241
1242 if (shift) {
1243 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1244 }
1245 }
1246
1247 static inline void gen_check_sp_alignment(DisasContext *s)
1248 {
1249 /* The AArch64 architecture mandates that (if enabled via PSTATE
1250 * or SCTLR bits) there is a check that SP is 16-aligned on every
1251 * SP-relative load or store (with an exception generated if it is not).
1252 * In line with general QEMU practice regarding misaligned accesses,
1253 * we omit these checks for the sake of guest program performance.
1254 * This function is provided as a hook so we can more easily add these
1255 * checks in future (possibly as a "favour catching guest program bugs
1256 * over speed" user selectable option).
1257 */
1258 }
1259
1260 /*
1261 * This provides a simple table based table lookup decoder. It is
1262 * intended to be used when the relevant bits for decode are too
1263 * awkwardly placed and switch/if based logic would be confusing and
1264 * deeply nested. Since it's a linear search through the table, tables
1265 * should be kept small.
1266 *
1267 * It returns the first handler where insn & mask == pattern, or
1268 * NULL if there is no match.
1269 * The table is terminated by an empty mask (i.e. 0)
1270 */
1271 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1272 uint32_t insn)
1273 {
1274 const AArch64DecodeTable *tptr = table;
1275
1276 while (tptr->mask) {
1277 if ((insn & tptr->mask) == tptr->pattern) {
1278 return tptr->disas_fn;
1279 }
1280 tptr++;
1281 }
1282 return NULL;
1283 }
1284
1285 /*
1286 * The instruction disassembly implemented here matches
1287 * the instruction encoding classifications in chapter C4
1288 * of the ARM Architecture Reference Manual (DDI0487B_a);
1289 * classification names and decode diagrams here should generally
1290 * match up with those in the manual.
1291 */
1292
1293 /* Unconditional branch (immediate)
1294 * 31 30 26 25 0
1295 * +----+-----------+-------------------------------------+
1296 * | op | 0 0 1 0 1 | imm26 |
1297 * +----+-----------+-------------------------------------+
1298 */
1299 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1300 {
1301 uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4;
1302
1303 if (insn & (1U << 31)) {
1304 /* BL Branch with link */
1305 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
1306 }
1307
1308 /* B Branch / BL Branch with link */
1309 reset_btype(s);
1310 gen_goto_tb(s, 0, addr);
1311 }
1312
1313 /* Compare and branch (immediate)
1314 * 31 30 25 24 23 5 4 0
1315 * +----+-------------+----+---------------------+--------+
1316 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt |
1317 * +----+-------------+----+---------------------+--------+
1318 */
1319 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1320 {
1321 unsigned int sf, op, rt;
1322 uint64_t addr;
1323 TCGLabel *label_match;
1324 TCGv_i64 tcg_cmp;
1325
1326 sf = extract32(insn, 31, 1);
1327 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1328 rt = extract32(insn, 0, 5);
1329 addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1330
1331 tcg_cmp = read_cpu_reg(s, rt, sf);
1332 label_match = gen_new_label();
1333
1334 reset_btype(s);
1335 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1336 tcg_cmp, 0, label_match);
1337
1338 gen_goto_tb(s, 0, s->base.pc_next);
1339 gen_set_label(label_match);
1340 gen_goto_tb(s, 1, addr);
1341 }
1342
1343 /* Test and branch (immediate)
1344 * 31 30 25 24 23 19 18 5 4 0
1345 * +----+-------------+----+-------+-------------+------+
1346 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt |
1347 * +----+-------------+----+-------+-------------+------+
1348 */
1349 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1350 {
1351 unsigned int bit_pos, op, rt;
1352 uint64_t addr;
1353 TCGLabel *label_match;
1354 TCGv_i64 tcg_cmp;
1355
1356 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1357 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1358 addr = s->pc_curr + sextract32(insn, 5, 14) * 4;
1359 rt = extract32(insn, 0, 5);
1360
1361 tcg_cmp = tcg_temp_new_i64();
1362 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1363 label_match = gen_new_label();
1364
1365 reset_btype(s);
1366 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1367 tcg_cmp, 0, label_match);
1368 tcg_temp_free_i64(tcg_cmp);
1369 gen_goto_tb(s, 0, s->base.pc_next);
1370 gen_set_label(label_match);
1371 gen_goto_tb(s, 1, addr);
1372 }
1373
1374 /* Conditional branch (immediate)
1375 * 31 25 24 23 5 4 3 0
1376 * +---------------+----+---------------------+----+------+
1377 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond |
1378 * +---------------+----+---------------------+----+------+
1379 */
1380 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1381 {
1382 unsigned int cond;
1383 uint64_t addr;
1384
1385 if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1386 unallocated_encoding(s);
1387 return;
1388 }
1389 addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1390 cond = extract32(insn, 0, 4);
1391
1392 reset_btype(s);
1393 if (cond < 0x0e) {
1394 /* genuinely conditional branches */
1395 TCGLabel *label_match = gen_new_label();
1396 arm_gen_test_cc(cond, label_match);
1397 gen_goto_tb(s, 0, s->base.pc_next);
1398 gen_set_label(label_match);
1399 gen_goto_tb(s, 1, addr);
1400 } else {
1401 /* 0xe and 0xf are both "always" conditions */
1402 gen_goto_tb(s, 0, addr);
1403 }
1404 }
1405
1406 /* HINT instruction group, including various allocated HINTs */
1407 static void handle_hint(DisasContext *s, uint32_t insn,
1408 unsigned int op1, unsigned int op2, unsigned int crm)
1409 {
1410 unsigned int selector = crm << 3 | op2;
1411
1412 if (op1 != 3) {
1413 unallocated_encoding(s);
1414 return;
1415 }
1416
1417 switch (selector) {
1418 case 0b00000: /* NOP */
1419 break;
1420 case 0b00011: /* WFI */
1421 s->base.is_jmp = DISAS_WFI;
1422 break;
1423 case 0b00001: /* YIELD */
1424 /* When running in MTTCG we don't generate jumps to the yield and
1425 * WFE helpers as it won't affect the scheduling of other vCPUs.
1426 * If we wanted to more completely model WFE/SEV so we don't busy
1427 * spin unnecessarily we would need to do something more involved.
1428 */
1429 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1430 s->base.is_jmp = DISAS_YIELD;
1431 }
1432 break;
1433 case 0b00010: /* WFE */
1434 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1435 s->base.is_jmp = DISAS_WFE;
1436 }
1437 break;
1438 case 0b00100: /* SEV */
1439 case 0b00101: /* SEVL */
1440 /* we treat all as NOP at least for now */
1441 break;
1442 case 0b00111: /* XPACLRI */
1443 if (s->pauth_active) {
1444 gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1445 }
1446 break;
1447 case 0b01000: /* PACIA1716 */
1448 if (s->pauth_active) {
1449 gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1450 }
1451 break;
1452 case 0b01010: /* PACIB1716 */
1453 if (s->pauth_active) {
1454 gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1455 }
1456 break;
1457 case 0b01100: /* AUTIA1716 */
1458 if (s->pauth_active) {
1459 gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1460 }
1461 break;
1462 case 0b01110: /* AUTIB1716 */
1463 if (s->pauth_active) {
1464 gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1465 }
1466 break;
1467 case 0b11000: /* PACIAZ */
1468 if (s->pauth_active) {
1469 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1470 new_tmp_a64_zero(s));
1471 }
1472 break;
1473 case 0b11001: /* PACIASP */
1474 if (s->pauth_active) {
1475 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1476 }
1477 break;
1478 case 0b11010: /* PACIBZ */
1479 if (s->pauth_active) {
1480 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1481 new_tmp_a64_zero(s));
1482 }
1483 break;
1484 case 0b11011: /* PACIBSP */
1485 if (s->pauth_active) {
1486 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1487 }
1488 break;
1489 case 0b11100: /* AUTIAZ */
1490 if (s->pauth_active) {
1491 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1492 new_tmp_a64_zero(s));
1493 }
1494 break;
1495 case 0b11101: /* AUTIASP */
1496 if (s->pauth_active) {
1497 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1498 }
1499 break;
1500 case 0b11110: /* AUTIBZ */
1501 if (s->pauth_active) {
1502 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1503 new_tmp_a64_zero(s));
1504 }
1505 break;
1506 case 0b11111: /* AUTIBSP */
1507 if (s->pauth_active) {
1508 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1509 }
1510 break;
1511 default:
1512 /* default specified as NOP equivalent */
1513 break;
1514 }
1515 }
1516
1517 static void gen_clrex(DisasContext *s, uint32_t insn)
1518 {
1519 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1520 }
1521
1522 /* CLREX, DSB, DMB, ISB */
1523 static void handle_sync(DisasContext *s, uint32_t insn,
1524 unsigned int op1, unsigned int op2, unsigned int crm)
1525 {
1526 TCGBar bar;
1527
1528 if (op1 != 3) {
1529 unallocated_encoding(s);
1530 return;
1531 }
1532
1533 switch (op2) {
1534 case 2: /* CLREX */
1535 gen_clrex(s, insn);
1536 return;
1537 case 4: /* DSB */
1538 case 5: /* DMB */
1539 switch (crm & 3) {
1540 case 1: /* MBReqTypes_Reads */
1541 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1542 break;
1543 case 2: /* MBReqTypes_Writes */
1544 bar = TCG_BAR_SC | TCG_MO_ST_ST;
1545 break;
1546 default: /* MBReqTypes_All */
1547 bar = TCG_BAR_SC | TCG_MO_ALL;
1548 break;
1549 }
1550 tcg_gen_mb(bar);
1551 return;
1552 case 6: /* ISB */
1553 /* We need to break the TB after this insn to execute
1554 * a self-modified code correctly and also to take
1555 * any pending interrupts immediately.
1556 */
1557 reset_btype(s);
1558 gen_goto_tb(s, 0, s->base.pc_next);
1559 return;
1560
1561 case 7: /* SB */
1562 if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1563 goto do_unallocated;
1564 }
1565 /*
1566 * TODO: There is no speculation barrier opcode for TCG;
1567 * MB and end the TB instead.
1568 */
1569 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1570 gen_goto_tb(s, 0, s->base.pc_next);
1571 return;
1572
1573 default:
1574 do_unallocated:
1575 unallocated_encoding(s);
1576 return;
1577 }
1578 }
1579
1580 static void gen_xaflag(void)
1581 {
1582 TCGv_i32 z = tcg_temp_new_i32();
1583
1584 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1585
1586 /*
1587 * (!C & !Z) << 31
1588 * (!(C | Z)) << 31
1589 * ~((C | Z) << 31)
1590 * ~-(C | Z)
1591 * (C | Z) - 1
1592 */
1593 tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1594 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1595
1596 /* !(Z & C) */
1597 tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1598 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1599
1600 /* (!C & Z) << 31 -> -(Z & ~C) */
1601 tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1602 tcg_gen_neg_i32(cpu_VF, cpu_VF);
1603
1604 /* C | Z */
1605 tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1606
1607 tcg_temp_free_i32(z);
1608 }
1609
1610 static void gen_axflag(void)
1611 {
1612 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */
1613 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */
1614
1615 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1616 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1617
1618 tcg_gen_movi_i32(cpu_NF, 0);
1619 tcg_gen_movi_i32(cpu_VF, 0);
1620 }
1621
1622 /* MSR (immediate) - move immediate to processor state field */
1623 static void handle_msr_i(DisasContext *s, uint32_t insn,
1624 unsigned int op1, unsigned int op2, unsigned int crm)
1625 {
1626 TCGv_i32 t1;
1627 int op = op1 << 3 | op2;
1628
1629 /* End the TB by default, chaining is ok. */
1630 s->base.is_jmp = DISAS_TOO_MANY;
1631
1632 switch (op) {
1633 case 0x00: /* CFINV */
1634 if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1635 goto do_unallocated;
1636 }
1637 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1638 s->base.is_jmp = DISAS_NEXT;
1639 break;
1640
1641 case 0x01: /* XAFlag */
1642 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1643 goto do_unallocated;
1644 }
1645 gen_xaflag();
1646 s->base.is_jmp = DISAS_NEXT;
1647 break;
1648
1649 case 0x02: /* AXFlag */
1650 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1651 goto do_unallocated;
1652 }
1653 gen_axflag();
1654 s->base.is_jmp = DISAS_NEXT;
1655 break;
1656
1657 case 0x03: /* UAO */
1658 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1659 goto do_unallocated;
1660 }
1661 if (crm & 1) {
1662 set_pstate_bits(PSTATE_UAO);
1663 } else {
1664 clear_pstate_bits(PSTATE_UAO);
1665 }
1666 t1 = tcg_const_i32(s->current_el);
1667 gen_helper_rebuild_hflags_a64(cpu_env, t1);
1668 tcg_temp_free_i32(t1);
1669 break;
1670
1671 case 0x04: /* PAN */
1672 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1673 goto do_unallocated;
1674 }
1675 if (crm & 1) {
1676 set_pstate_bits(PSTATE_PAN);
1677 } else {
1678 clear_pstate_bits(PSTATE_PAN);
1679 }
1680 t1 = tcg_const_i32(s->current_el);
1681 gen_helper_rebuild_hflags_a64(cpu_env, t1);
1682 tcg_temp_free_i32(t1);
1683 break;
1684
1685 case 0x05: /* SPSel */
1686 if (s->current_el == 0) {
1687 goto do_unallocated;
1688 }
1689 t1 = tcg_const_i32(crm & PSTATE_SP);
1690 gen_helper_msr_i_spsel(cpu_env, t1);
1691 tcg_temp_free_i32(t1);
1692 break;
1693
1694 case 0x1e: /* DAIFSet */
1695 t1 = tcg_const_i32(crm);
1696 gen_helper_msr_i_daifset(cpu_env, t1);
1697 tcg_temp_free_i32(t1);
1698 break;
1699
1700 case 0x1f: /* DAIFClear */
1701 t1 = tcg_const_i32(crm);
1702 gen_helper_msr_i_daifclear(cpu_env, t1);
1703 tcg_temp_free_i32(t1);
1704 /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */
1705 s->base.is_jmp = DISAS_UPDATE_EXIT;
1706 break;
1707
1708 case 0x1c: /* TCO */
1709 if (dc_isar_feature(aa64_mte, s)) {
1710 /* Full MTE is enabled -- set the TCO bit as directed. */
1711 if (crm & 1) {
1712 set_pstate_bits(PSTATE_TCO);
1713 } else {
1714 clear_pstate_bits(PSTATE_TCO);
1715 }
1716 t1 = tcg_const_i32(s->current_el);
1717 gen_helper_rebuild_hflags_a64(cpu_env, t1);
1718 tcg_temp_free_i32(t1);
1719 /* Many factors, including TCO, go into MTE_ACTIVE. */
1720 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1721 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1722 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */
1723 s->base.is_jmp = DISAS_NEXT;
1724 } else {
1725 goto do_unallocated;
1726 }
1727 break;
1728
1729 default:
1730 do_unallocated:
1731 unallocated_encoding(s);
1732 return;
1733 }
1734 }
1735
1736 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1737 {
1738 TCGv_i32 tmp = tcg_temp_new_i32();
1739 TCGv_i32 nzcv = tcg_temp_new_i32();
1740
1741 /* build bit 31, N */
1742 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1743 /* build bit 30, Z */
1744 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1745 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1746 /* build bit 29, C */
1747 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1748 /* build bit 28, V */
1749 tcg_gen_shri_i32(tmp, cpu_VF, 31);
1750 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1751 /* generate result */
1752 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1753
1754 tcg_temp_free_i32(nzcv);
1755 tcg_temp_free_i32(tmp);
1756 }
1757
1758 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1759 {
1760 TCGv_i32 nzcv = tcg_temp_new_i32();
1761
1762 /* take NZCV from R[t] */
1763 tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1764
1765 /* bit 31, N */
1766 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1767 /* bit 30, Z */
1768 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1769 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1770 /* bit 29, C */
1771 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1772 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1773 /* bit 28, V */
1774 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1775 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1776 tcg_temp_free_i32(nzcv);
1777 }
1778
1779 /* MRS - move from system register
1780 * MSR (register) - move to system register
1781 * SYS
1782 * SYSL
1783 * These are all essentially the same insn in 'read' and 'write'
1784 * versions, with varying op0 fields.
1785 */
1786 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1787 unsigned int op0, unsigned int op1, unsigned int op2,
1788 unsigned int crn, unsigned int crm, unsigned int rt)
1789 {
1790 const ARMCPRegInfo *ri;
1791 TCGv_i64 tcg_rt;
1792
1793 ri = get_arm_cp_reginfo(s->cp_regs,
1794 ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1795 crn, crm, op0, op1, op2));
1796
1797 if (!ri) {
1798 /* Unknown register; this might be a guest error or a QEMU
1799 * unimplemented feature.
1800 */
1801 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1802 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1803 isread ? "read" : "write", op0, op1, crn, crm, op2);
1804 unallocated_encoding(s);
1805 return;
1806 }
1807
1808 /* Check access permissions */
1809 if (!cp_access_ok(s->current_el, ri, isread)) {
1810 unallocated_encoding(s);
1811 return;
1812 }
1813
1814 if (ri->accessfn) {
1815 /* Emit code to perform further access permissions checks at
1816 * runtime; this may result in an exception.
1817 */
1818 TCGv_ptr tmpptr;
1819 TCGv_i32 tcg_syn, tcg_isread;
1820 uint32_t syndrome;
1821
1822 gen_a64_set_pc_im(s->pc_curr);
1823 tmpptr = tcg_const_ptr(ri);
1824 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1825 tcg_syn = tcg_const_i32(syndrome);
1826 tcg_isread = tcg_const_i32(isread);
1827 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1828 tcg_temp_free_ptr(tmpptr);
1829 tcg_temp_free_i32(tcg_syn);
1830 tcg_temp_free_i32(tcg_isread);
1831 } else if (ri->type & ARM_CP_RAISES_EXC) {
1832 /*
1833 * The readfn or writefn might raise an exception;
1834 * synchronize the CPU state in case it does.
1835 */
1836 gen_a64_set_pc_im(s->pc_curr);
1837 }
1838
1839 /* Handle special cases first */
1840 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1841 case ARM_CP_NOP:
1842 return;
1843 case ARM_CP_NZCV:
1844 tcg_rt = cpu_reg(s, rt);
1845 if (isread) {
1846 gen_get_nzcv(tcg_rt);
1847 } else {
1848 gen_set_nzcv(tcg_rt);
1849 }
1850 return;
1851 case ARM_CP_CURRENTEL:
1852 /* Reads as current EL value from pstate, which is
1853 * guaranteed to be constant by the tb flags.
1854 */
1855 tcg_rt = cpu_reg(s, rt);
1856 tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1857 return;
1858 case ARM_CP_DC_ZVA:
1859 /* Writes clear the aligned block of memory which rt points into. */
1860 if (s->mte_active[0]) {
1861 TCGv_i32 t_desc;
1862 int desc = 0;
1863
1864 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
1865 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
1866 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
1867 t_desc = tcg_const_i32(desc);
1868
1869 tcg_rt = new_tmp_a64(s);
1870 gen_helper_mte_check_zva(tcg_rt, cpu_env, t_desc, cpu_reg(s, rt));
1871 tcg_temp_free_i32(t_desc);
1872 } else {
1873 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
1874 }
1875 gen_helper_dc_zva(cpu_env, tcg_rt);
1876 return;
1877 default:
1878 break;
1879 }
1880 if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1881 return;
1882 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1883 return;
1884 }
1885
1886 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1887 gen_io_start();
1888 }
1889
1890 tcg_rt = cpu_reg(s, rt);
1891
1892 if (isread) {
1893 if (ri->type & ARM_CP_CONST) {
1894 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1895 } else if (ri->readfn) {
1896 TCGv_ptr tmpptr;
1897 tmpptr = tcg_const_ptr(ri);
1898 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1899 tcg_temp_free_ptr(tmpptr);
1900 } else {
1901 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1902 }
1903 } else {
1904 if (ri->type & ARM_CP_CONST) {
1905 /* If not forbidden by access permissions, treat as WI */
1906 return;
1907 } else if (ri->writefn) {
1908 TCGv_ptr tmpptr;
1909 tmpptr = tcg_const_ptr(ri);
1910 gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1911 tcg_temp_free_ptr(tmpptr);
1912 } else {
1913 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1914 }
1915 }
1916
1917 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1918 /* I/O operations must end the TB here (whether read or write) */
1919 s->base.is_jmp = DISAS_UPDATE_EXIT;
1920 }
1921 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1922 /*
1923 * A write to any coprocessor regiser that ends a TB
1924 * must rebuild the hflags for the next TB.
1925 */
1926 TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
1927 gen_helper_rebuild_hflags_a64(cpu_env, tcg_el);
1928 tcg_temp_free_i32(tcg_el);
1929 /*
1930 * We default to ending the TB on a coprocessor register write,
1931 * but allow this to be suppressed by the register definition
1932 * (usually only necessary to work around guest bugs).
1933 */
1934 s->base.is_jmp = DISAS_UPDATE_EXIT;
1935 }
1936 }
1937
1938 /* System
1939 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0
1940 * +---------------------+---+-----+-----+-------+-------+-----+------+
1941 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt |
1942 * +---------------------+---+-----+-----+-------+-------+-----+------+
1943 */
1944 static void disas_system(DisasContext *s, uint32_t insn)
1945 {
1946 unsigned int l, op0, op1, crn, crm, op2, rt;
1947 l = extract32(insn, 21, 1);
1948 op0 = extract32(insn, 19, 2);
1949 op1 = extract32(insn, 16, 3);
1950 crn = extract32(insn, 12, 4);
1951 crm = extract32(insn, 8, 4);
1952 op2 = extract32(insn, 5, 3);
1953 rt = extract32(insn, 0, 5);
1954
1955 if (op0 == 0) {
1956 if (l || rt != 31) {
1957 unallocated_encoding(s);
1958 return;
1959 }
1960 switch (crn) {
1961 case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1962 handle_hint(s, insn, op1, op2, crm);
1963 break;
1964 case 3: /* CLREX, DSB, DMB, ISB */
1965 handle_sync(s, insn, op1, op2, crm);
1966 break;
1967 case 4: /* MSR (immediate) */
1968 handle_msr_i(s, insn, op1, op2, crm);
1969 break;
1970 default:
1971 unallocated_encoding(s);
1972 break;
1973 }
1974 return;
1975 }
1976 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1977 }
1978
1979 /* Exception generation
1980 *
1981 * 31 24 23 21 20 5 4 2 1 0
1982 * +-----------------+-----+------------------------+-----+----+
1983 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL |
1984 * +-----------------------+------------------------+----------+
1985 */
1986 static void disas_exc(DisasContext *s, uint32_t insn)
1987 {
1988 int opc = extract32(insn, 21, 3);
1989 int op2_ll = extract32(insn, 0, 5);
1990 int imm16 = extract32(insn, 5, 16);
1991 TCGv_i32 tmp;
1992
1993 switch (opc) {
1994 case 0:
1995 /* For SVC, HVC and SMC we advance the single-step state
1996 * machine before taking the exception. This is architecturally
1997 * mandated, to ensure that single-stepping a system call
1998 * instruction works properly.
1999 */
2000 switch (op2_ll) {
2001 case 1: /* SVC */
2002 gen_ss_advance(s);
2003 gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
2004 syn_aa64_svc(imm16), default_exception_el(s));
2005 break;
2006 case 2: /* HVC */
2007 if (s->current_el == 0) {
2008 unallocated_encoding(s);
2009 break;
2010 }
2011 /* The pre HVC helper handles cases when HVC gets trapped
2012 * as an undefined insn by runtime configuration.
2013 */
2014 gen_a64_set_pc_im(s->pc_curr);
2015 gen_helper_pre_hvc(cpu_env);
2016 gen_ss_advance(s);
2017 gen_exception_insn(s, s->base.pc_next, EXCP_HVC,
2018 syn_aa64_hvc(imm16), 2);
2019 break;
2020 case 3: /* SMC */
2021 if (s->current_el == 0) {
2022 unallocated_encoding(s);
2023 break;
2024 }
2025 gen_a64_set_pc_im(s->pc_curr);
2026 tmp = tcg_const_i32(syn_aa64_smc(imm16));
2027 gen_helper_pre_smc(cpu_env, tmp);
2028 tcg_temp_free_i32(tmp);
2029 gen_ss_advance(s);
2030 gen_exception_insn(s, s->base.pc_next, EXCP_SMC,
2031 syn_aa64_smc(imm16), 3);
2032 break;
2033 default:
2034 unallocated_encoding(s);
2035 break;
2036 }
2037 break;
2038 case 1:
2039 if (op2_ll != 0) {
2040 unallocated_encoding(s);
2041 break;
2042 }
2043 /* BRK */
2044 gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2045 break;
2046 case 2:
2047 if (op2_ll != 0) {
2048 unallocated_encoding(s);
2049 break;
2050 }
2051 /* HLT. This has two purposes.
2052 * Architecturally, it is an external halting debug instruction.
2053 * Since QEMU doesn't implement external debug, we treat this as
2054 * it is required for halting debug disabled: it will UNDEF.
2055 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2056 */
2057 if (semihosting_enabled() && imm16 == 0xf000) {
2058 #ifndef CONFIG_USER_ONLY
2059 /* In system mode, don't allow userspace access to semihosting,
2060 * to provide some semblance of security (and for consistency
2061 * with our 32-bit semihosting).
2062 */
2063 if (s->current_el == 0) {
2064 unsupported_encoding(s, insn);
2065 break;
2066 }
2067 #endif
2068 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
2069 } else {
2070 unsupported_encoding(s, insn);
2071 }
2072 break;
2073 case 5:
2074 if (op2_ll < 1 || op2_ll > 3) {
2075 unallocated_encoding(s);
2076 break;
2077 }
2078 /* DCPS1, DCPS2, DCPS3 */
2079 unsupported_encoding(s, insn);
2080 break;
2081 default:
2082 unallocated_encoding(s);
2083 break;
2084 }
2085 }
2086
2087 /* Unconditional branch (register)
2088 * 31 25 24 21 20 16 15 10 9 5 4 0
2089 * +---------------+-------+-------+-------+------+-------+
2090 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 |
2091 * +---------------+-------+-------+-------+------+-------+
2092 */
2093 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
2094 {
2095 unsigned int opc, op2, op3, rn, op4;
2096 unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */
2097 TCGv_i64 dst;
2098 TCGv_i64 modifier;
2099
2100 opc = extract32(insn, 21, 4);
2101 op2 = extract32(insn, 16, 5);
2102 op3 = extract32(insn, 10, 6);
2103 rn = extract32(insn, 5, 5);
2104 op4 = extract32(insn, 0, 5);
2105
2106 if (op2 != 0x1f) {
2107 goto do_unallocated;
2108 }
2109
2110 switch (opc) {
2111 case 0: /* BR */
2112 case 1: /* BLR */
2113 case 2: /* RET */
2114 btype_mod = opc;
2115 switch (op3) {
2116 case 0:
2117 /* BR, BLR, RET */
2118 if (op4 != 0) {
2119 goto do_unallocated;
2120 }
2121 dst = cpu_reg(s, rn);
2122 break;
2123
2124 case 2:
2125 case 3:
2126 if (!dc_isar_feature(aa64_pauth, s)) {
2127 goto do_unallocated;
2128 }
2129 if (opc == 2) {
2130 /* RETAA, RETAB */
2131 if (rn != 0x1f || op4 != 0x1f) {
2132 goto do_unallocated;
2133 }
2134 rn = 30;
2135 modifier = cpu_X[31];
2136 } else {
2137 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2138 if (op4 != 0x1f) {
2139 goto do_unallocated;
2140 }
2141 modifier = new_tmp_a64_zero(s);
2142 }
2143 if (s->pauth_active) {
2144 dst = new_tmp_a64(s);
2145 if (op3 == 2) {
2146 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2147 } else {
2148 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2149 }
2150 } else {
2151 dst = cpu_reg(s, rn);
2152 }
2153 break;
2154
2155 default:
2156 goto do_unallocated;
2157 }
2158 gen_a64_set_pc(s, dst);
2159 /* BLR also needs to load return address */
2160 if (opc == 1) {
2161 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2162 }
2163 break;
2164
2165 case 8: /* BRAA */
2166 case 9: /* BLRAA */
2167 if (!dc_isar_feature(aa64_pauth, s)) {
2168 goto do_unallocated;
2169 }
2170 if ((op3 & ~1) != 2) {
2171 goto do_unallocated;
2172 }
2173 btype_mod = opc & 1;
2174 if (s->pauth_active) {
2175 dst = new_tmp_a64(s);
2176 modifier = cpu_reg_sp(s, op4);
2177 if (op3 == 2) {
2178 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2179 } else {
2180 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2181 }
2182 } else {
2183 dst = cpu_reg(s, rn);
2184 }
2185 gen_a64_set_pc(s, dst);
2186 /* BLRAA also needs to load return address */
2187 if (opc == 9) {
2188 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2189 }
2190 break;
2191
2192 case 4: /* ERET */
2193 if (s->current_el == 0) {
2194 goto do_unallocated;
2195 }
2196 switch (op3) {
2197 case 0: /* ERET */
2198 if (op4 != 0) {
2199 goto do_unallocated;
2200 }
2201 dst = tcg_temp_new_i64();
2202 tcg_gen_ld_i64(dst, cpu_env,
2203 offsetof(CPUARMState, elr_el[s->current_el]));
2204 break;
2205
2206 case 2: /* ERETAA */
2207 case 3: /* ERETAB */
2208 if (!dc_isar_feature(aa64_pauth, s)) {
2209 goto do_unallocated;
2210 }
2211 if (rn != 0x1f || op4 != 0x1f) {
2212 goto do_unallocated;
2213 }
2214 dst = tcg_temp_new_i64();
2215 tcg_gen_ld_i64(dst, cpu_env,
2216 offsetof(CPUARMState, elr_el[s->current_el]));
2217 if (s->pauth_active) {
2218 modifier = cpu_X[31];
2219 if (op3 == 2) {
2220 gen_helper_autia(dst, cpu_env, dst, modifier);
2221 } else {
2222 gen_helper_autib(dst, cpu_env, dst, modifier);
2223 }
2224 }
2225 break;
2226
2227 default:
2228 goto do_unallocated;
2229 }
2230 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2231 gen_io_start();
2232 }
2233
2234 gen_helper_exception_return(cpu_env, dst);
2235 tcg_temp_free_i64(dst);
2236 /* Must exit loop to check un-masked IRQs */
2237 s->base.is_jmp = DISAS_EXIT;
2238 return;
2239
2240 case 5: /* DRPS */
2241 if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2242 goto do_unallocated;
2243 } else {
2244 unsupported_encoding(s, insn);
2245 }
2246 return;
2247
2248 default:
2249 do_unallocated:
2250 unallocated_encoding(s);
2251 return;
2252 }
2253
2254 switch (btype_mod) {
2255 case 0: /* BR */
2256 if (dc_isar_feature(aa64_bti, s)) {
2257 /* BR to {x16,x17} or !guard -> 1, else 3. */
2258 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2259 }
2260 break;
2261
2262 case 1: /* BLR */
2263 if (dc_isar_feature(aa64_bti, s)) {
2264 /* BLR sets BTYPE to 2, regardless of source guarded page. */
2265 set_btype(s, 2);
2266 }
2267 break;
2268
2269 default: /* RET or none of the above. */
2270 /* BTYPE will be set to 0 by normal end-of-insn processing. */
2271 break;
2272 }
2273
2274 s->base.is_jmp = DISAS_JUMP;
2275 }
2276
2277 /* Branches, exception generating and system instructions */
2278 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2279 {
2280 switch (extract32(insn, 25, 7)) {
2281 case 0x0a: case 0x0b:
2282 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2283 disas_uncond_b_imm(s, insn);
2284 break;
2285 case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2286 disas_comp_b_imm(s, insn);
2287 break;
2288 case 0x1b: case 0x5b: /* Test & branch (immediate) */
2289 disas_test_b_imm(s, insn);
2290 break;
2291 case 0x2a: /* Conditional branch (immediate) */
2292 disas_cond_b_imm(s, insn);
2293 break;
2294 case 0x6a: /* Exception generation / System */
2295 if (insn & (1 << 24)) {
2296 if (extract32(insn, 22, 2) == 0) {
2297 disas_system(s, insn);
2298 } else {
2299 unallocated_encoding(s);
2300 }
2301 } else {
2302 disas_exc(s, insn);
2303 }
2304 break;
2305 case 0x6b: /* Unconditional branch (register) */
2306 disas_uncond_b_reg(s, insn);
2307 break;
2308 default:
2309 unallocated_encoding(s);
2310 break;
2311 }
2312 }
2313
2314 /*
2315 * Load/Store exclusive instructions are implemented by remembering
2316 * the value/address loaded, and seeing if these are the same
2317 * when the store is performed. This is not actually the architecturally
2318 * mandated semantics, but it works for typical guest code sequences
2319 * and avoids having to monitor regular stores.
2320 *
2321 * The store exclusive uses the atomic cmpxchg primitives to avoid
2322 * races in multi-threaded linux-user and when MTTCG softmmu is
2323 * enabled.
2324 */
2325 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2326 TCGv_i64 addr, int size, bool is_pair)
2327 {
2328 int idx = get_mem_index(s);
2329 MemOp memop = s->be_data;
2330
2331 g_assert(size <= 3);
2332 if (is_pair) {
2333 g_assert(size >= 2);
2334 if (size == 2) {
2335 /* The pair must be single-copy atomic for the doubleword. */
2336 memop |= MO_64 | MO_ALIGN;
2337 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2338 if (s->be_data == MO_LE) {
2339 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2340 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2341 } else {
2342 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2343 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2344 }
2345 } else {
2346 /* The pair must be single-copy atomic for *each* doubleword, not
2347 the entire quadword, however it must be quadword aligned. */
2348 memop |= MO_64;
2349 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2350 memop | MO_ALIGN_16);
2351
2352 TCGv_i64 addr2 = tcg_temp_new_i64();
2353 tcg_gen_addi_i64(addr2, addr, 8);
2354 tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2355 tcg_temp_free_i64(addr2);
2356
2357 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2358 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2359 }
2360 } else {
2361 memop |= size | MO_ALIGN;
2362 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2363 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2364 }
2365 tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2366 }
2367
2368 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2369 TCGv_i64 addr, int size, int is_pair)
2370 {
2371 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2372 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
2373 * [addr] = {Rt};
2374 * if (is_pair) {
2375 * [addr + datasize] = {Rt2};
2376 * }
2377 * {Rd} = 0;
2378 * } else {
2379 * {Rd} = 1;
2380 * }
2381 * env->exclusive_addr = -1;
2382 */
2383 TCGLabel *fail_label = gen_new_label();
2384 TCGLabel *done_label = gen_new_label();
2385 TCGv_i64 tmp;
2386
2387 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2388
2389 tmp = tcg_temp_new_i64();
2390 if (is_pair) {
2391 if (size == 2) {
2392 if (s->be_data == MO_LE) {
2393 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2394 } else {
2395 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2396 }
2397 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2398 cpu_exclusive_val, tmp,
2399 get_mem_index(s),
2400 MO_64 | MO_ALIGN | s->be_data);
2401 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2402 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2403 if (!HAVE_CMPXCHG128) {
2404 gen_helper_exit_atomic(cpu_env);
2405 s->base.is_jmp = DISAS_NORETURN;
2406 } else if (s->be_data == MO_LE) {
2407 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2408 cpu_exclusive_addr,
2409 cpu_reg(s, rt),
2410 cpu_reg(s, rt2));
2411 } else {
2412 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2413 cpu_exclusive_addr,
2414 cpu_reg(s, rt),
2415 cpu_reg(s, rt2));
2416 }
2417 } else if (s->be_data == MO_LE) {
2418 gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2419 cpu_reg(s, rt), cpu_reg(s, rt2));
2420 } else {
2421 gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2422 cpu_reg(s, rt), cpu_reg(s, rt2));
2423 }
2424 } else {
2425 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2426 cpu_reg(s, rt), get_mem_index(s),
2427 size | MO_ALIGN | s->be_data);
2428 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2429 }
2430 tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2431 tcg_temp_free_i64(tmp);
2432 tcg_gen_br(done_label);
2433
2434 gen_set_label(fail_label);
2435 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2436 gen_set_label(done_label);
2437 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2438 }
2439
2440 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2441 int rn, int size)
2442 {
2443 TCGv_i64 tcg_rs = cpu_reg(s, rs);
2444 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2445 int memidx = get_mem_index(s);
2446 TCGv_i64 clean_addr;
2447
2448 if (rn == 31) {
2449 gen_check_sp_alignment(s);
2450 }
2451 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
2452 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2453 size | MO_ALIGN | s->be_data);
2454 }
2455
2456 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2457 int rn, int size)
2458 {
2459 TCGv_i64 s1 = cpu_reg(s, rs);
2460 TCGv_i64 s2 = cpu_reg(s, rs + 1);
2461 TCGv_i64 t1 = cpu_reg(s, rt);
2462 TCGv_i64 t2 = cpu_reg(s, rt + 1);
2463 TCGv_i64 clean_addr;
2464 int memidx = get_mem_index(s);
2465
2466 if (rn == 31) {
2467 gen_check_sp_alignment(s);
2468 }
2469
2470 /* This is a single atomic access, despite the "pair". */
2471 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
2472
2473 if (size == 2) {
2474 TCGv_i64 cmp = tcg_temp_new_i64();
2475 TCGv_i64 val = tcg_temp_new_i64();
2476
2477 if (s->be_data == MO_LE) {
2478 tcg_gen_concat32_i64(val, t1, t2);
2479 tcg_gen_concat32_i64(cmp, s1, s2);
2480 } else {
2481 tcg_gen_concat32_i64(val, t2, t1);
2482 tcg_gen_concat32_i64(cmp, s2, s1);
2483 }
2484
2485 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2486 MO_64 | MO_ALIGN | s->be_data);
2487 tcg_temp_free_i64(val);
2488
2489 if (s->be_data == MO_LE) {
2490 tcg_gen_extr32_i64(s1, s2, cmp);
2491 } else {
2492 tcg_gen_extr32_i64(s2, s1, cmp);
2493 }
2494 tcg_temp_free_i64(cmp);
2495 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2496 if (HAVE_CMPXCHG128) {
2497 TCGv_i32 tcg_rs = tcg_const_i32(rs);
2498 if (s->be_data == MO_LE) {
2499 gen_helper_casp_le_parallel(cpu_env, tcg_rs,
2500 clean_addr, t1, t2);
2501 } else {
2502 gen_helper_casp_be_parallel(cpu_env, tcg_rs,
2503 clean_addr, t1, t2);
2504 }
2505 tcg_temp_free_i32(tcg_rs);
2506 } else {
2507 gen_helper_exit_atomic(cpu_env);
2508 s->base.is_jmp = DISAS_NORETURN;
2509 }
2510 } else {
2511 TCGv_i64 d1 = tcg_temp_new_i64();
2512 TCGv_i64 d2 = tcg_temp_new_i64();
2513 TCGv_i64 a2 = tcg_temp_new_i64();
2514 TCGv_i64 c1 = tcg_temp_new_i64();
2515 TCGv_i64 c2 = tcg_temp_new_i64();
2516 TCGv_i64 zero = tcg_const_i64(0);
2517
2518 /* Load the two words, in memory order. */
2519 tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
2520 MO_64 | MO_ALIGN_16 | s->be_data);
2521 tcg_gen_addi_i64(a2, clean_addr, 8);
2522 tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
2523
2524 /* Compare the two words, also in memory order. */
2525 tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2526 tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2527 tcg_gen_and_i64(c2, c2, c1);
2528
2529 /* If compare equal, write back new data, else write back old data. */
2530 tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2531 tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2532 tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
2533 tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2534 tcg_temp_free_i64(a2);
2535 tcg_temp_free_i64(c1);
2536 tcg_temp_free_i64(c2);
2537 tcg_temp_free_i64(zero);
2538
2539 /* Write back the data from memory to Rs. */
2540 tcg_gen_mov_i64(s1, d1);
2541 tcg_gen_mov_i64(s2, d2);
2542 tcg_temp_free_i64(d1);
2543 tcg_temp_free_i64(d2);
2544 }
2545 }
2546
2547 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2548 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2549 */
2550 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2551 {
2552 int opc0 = extract32(opc, 0, 1);
2553 int regsize;
2554
2555 if (is_signed) {
2556 regsize = opc0 ? 32 : 64;
2557 } else {
2558 regsize = size == 3 ? 64 : 32;
2559 }
2560 return regsize == 64;
2561 }
2562
2563 /* Load/store exclusive
2564 *
2565 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0
2566 * +-----+-------------+----+---+----+------+----+-------+------+------+
2567 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt |
2568 * +-----+-------------+----+---+----+------+----+-------+------+------+
2569 *
2570 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2571 * L: 0 -> store, 1 -> load
2572 * o2: 0 -> exclusive, 1 -> not
2573 * o1: 0 -> single register, 1 -> register pair
2574 * o0: 1 -> load-acquire/store-release, 0 -> not
2575 */
2576 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2577 {
2578 int rt = extract32(insn, 0, 5);
2579 int rn = extract32(insn, 5, 5);
2580 int rt2 = extract32(insn, 10, 5);
2581 int rs = extract32(insn, 16, 5);
2582 int is_lasr = extract32(insn, 15, 1);
2583 int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2584 int size = extract32(insn, 30, 2);
2585 TCGv_i64 clean_addr;
2586
2587 switch (o2_L_o1_o0) {
2588 case 0x0: /* STXR */
2589 case 0x1: /* STLXR */
2590 if (rn == 31) {
2591 gen_check_sp_alignment(s);
2592 }
2593 if (is_lasr) {
2594 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2595 }
2596 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2597 true, rn != 31, size);
2598 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2599 return;
2600
2601 case 0x4: /* LDXR */
2602 case 0x5: /* LDAXR */
2603 if (rn == 31) {
2604 gen_check_sp_alignment(s);
2605 }
2606 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2607 false, rn != 31, size);
2608 s->is_ldex = true;
2609 gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2610 if (is_lasr) {
2611 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2612 }
2613 return;
2614
2615 case 0x8: /* STLLR */
2616 if (!dc_isar_feature(aa64_lor, s)) {
2617 break;
2618 }
2619 /* StoreLORelease is the same as Store-Release for QEMU. */
2620 /* fall through */
2621 case 0x9: /* STLR */
2622 /* Generate ISS for non-exclusive accesses including LASR. */
2623 if (rn == 31) {
2624 gen_check_sp_alignment(s);
2625 }
2626 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2627 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2628 true, rn != 31, size);
2629 do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
2630 disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2631 return;
2632
2633 case 0xc: /* LDLAR */
2634 if (!dc_isar_feature(aa64_lor, s)) {
2635 break;
2636 }
2637 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
2638 /* fall through */
2639 case 0xd: /* LDAR */
2640 /* Generate ISS for non-exclusive accesses including LASR. */
2641 if (rn == 31) {
2642 gen_check_sp_alignment(s);
2643 }
2644 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2645 false, rn != 31, size);
2646 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
2647 disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2648 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2649 return;
2650
2651 case 0x2: case 0x3: /* CASP / STXP */
2652 if (size & 2) { /* STXP / STLXP */
2653 if (rn == 31) {
2654 gen_check_sp_alignment(s);
2655 }
2656 if (is_lasr) {
2657 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2658 }
2659 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2660 true, rn != 31, size);
2661 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2662 return;
2663 }
2664 if (rt2 == 31
2665 && ((rt | rs) & 1) == 0
2666 && dc_isar_feature(aa64_atomics, s)) {
2667 /* CASP / CASPL */
2668 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2669 return;
2670 }
2671 break;
2672
2673 case 0x6: case 0x7: /* CASPA / LDXP */
2674 if (size & 2) { /* LDXP / LDAXP */
2675 if (rn == 31) {
2676 gen_check_sp_alignment(s);
2677 }
2678 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2679 false, rn != 31, size);
2680 s->is_ldex = true;
2681 gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2682 if (is_lasr) {
2683 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2684 }
2685 return;
2686 }
2687 if (rt2 == 31
2688 && ((rt | rs) & 1) == 0
2689 && dc_isar_feature(aa64_atomics, s)) {
2690 /* CASPA / CASPAL */
2691 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2692 return;
2693 }
2694 break;
2695
2696 case 0xa: /* CAS */
2697 case 0xb: /* CASL */
2698 case 0xe: /* CASA */
2699 case 0xf: /* CASAL */
2700 if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2701 gen_compare_and_swap(s, rs, rt, rn, size);
2702 return;
2703 }
2704 break;
2705 }
2706 unallocated_encoding(s);
2707 }
2708
2709 /*
2710 * Load register (literal)
2711 *
2712 * 31 30 29 27 26 25 24 23 5 4 0
2713 * +-----+-------+---+-----+-------------------+-------+
2714 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt |
2715 * +-----+-------+---+-----+-------------------+-------+
2716 *
2717 * V: 1 -> vector (simd/fp)
2718 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2719 * 10-> 32 bit signed, 11 -> prefetch
2720 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2721 */
2722 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2723 {
2724 int rt = extract32(insn, 0, 5);
2725 int64_t imm = sextract32(insn, 5, 19) << 2;
2726 bool is_vector = extract32(insn, 26, 1);
2727 int opc = extract32(insn, 30, 2);
2728 bool is_signed = false;
2729 int size = 2;
2730 TCGv_i64 tcg_rt, clean_addr;
2731
2732 if (is_vector) {
2733 if (opc == 3) {
2734 unallocated_encoding(s);
2735 return;
2736 }
2737 size = 2 + opc;
2738 if (!fp_access_check(s)) {
2739 return;
2740 }
2741 } else {
2742 if (opc == 3) {
2743 /* PRFM (literal) : prefetch */
2744 return;
2745 }
2746 size = 2 + extract32(opc, 0, 1);
2747 is_signed = extract32(opc, 1, 1);
2748 }
2749
2750 tcg_rt = cpu_reg(s, rt);
2751
2752 clean_addr = tcg_const_i64(s->pc_curr + imm);
2753 if (is_vector) {
2754 do_fp_ld(s, rt, clean_addr, size);
2755 } else {
2756 /* Only unsigned 32bit loads target 32bit registers. */
2757 bool iss_sf = opc != 0;
2758
2759 do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
2760 true, rt, iss_sf, false);
2761 }
2762 tcg_temp_free_i64(clean_addr);
2763 }
2764
2765 /*
2766 * LDNP (Load Pair - non-temporal hint)
2767 * LDP (Load Pair - non vector)
2768 * LDPSW (Load Pair Signed Word - non vector)
2769 * STNP (Store Pair - non-temporal hint)
2770 * STP (Store Pair - non vector)
2771 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2772 * LDP (Load Pair of SIMD&FP)
2773 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2774 * STP (Store Pair of SIMD&FP)
2775 *
2776 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0
2777 * +-----+-------+---+---+-------+---+-----------------------------+
2778 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt |
2779 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2780 *
2781 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
2782 * LDPSW/STGP 01
2783 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2784 * V: 0 -> GPR, 1 -> Vector
2785 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2786 * 10 -> signed offset, 11 -> pre-index
2787 * L: 0 -> Store 1 -> Load
2788 *
2789 * Rt, Rt2 = GPR or SIMD registers to be stored
2790 * Rn = general purpose register containing address
2791 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2792 */
2793 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2794 {
2795 int rt = extract32(insn, 0, 5);
2796 int rn = extract32(insn, 5, 5);
2797 int rt2 = extract32(insn, 10, 5);
2798 uint64_t offset = sextract64(insn, 15, 7);
2799 int index = extract32(insn, 23, 2);
2800 bool is_vector = extract32(insn, 26, 1);
2801 bool is_load = extract32(insn, 22, 1);
2802 int opc = extract32(insn, 30, 2);
2803
2804 bool is_signed = false;
2805 bool postindex = false;
2806 bool wback = false;
2807 bool set_tag = false;
2808
2809 TCGv_i64 clean_addr, dirty_addr;
2810
2811 int size;
2812
2813 if (opc == 3) {
2814 unallocated_encoding(s);
2815 return;
2816 }
2817
2818 if (is_vector) {
2819 size = 2 + opc;
2820 } else if (opc == 1 && !is_load) {
2821 /* STGP */
2822 if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2823 unallocated_encoding(s);
2824 return;
2825 }
2826 size = 3;
2827 set_tag = true;
2828 } else {
2829 size = 2 + extract32(opc, 1, 1);
2830 is_signed = extract32(opc, 0, 1);
2831 if (!is_load && is_signed) {
2832 unallocated_encoding(s);
2833 return;
2834 }
2835 }
2836
2837 switch (index) {
2838 case 1: /* post-index */
2839 postindex = true;
2840 wback = true;
2841 break;
2842 case 0:
2843 /* signed offset with "non-temporal" hint. Since we don't emulate
2844 * caches we don't care about hints to the cache system about
2845 * data access patterns, and handle this identically to plain
2846 * signed offset.
2847 */
2848 if (is_signed) {
2849 /* There is no non-temporal-hint version of LDPSW */
2850 unallocated_encoding(s);
2851 return;
2852 }
2853 postindex = false;
2854 break;
2855 case 2: /* signed offset, rn not updated */
2856 postindex = false;
2857 break;
2858 case 3: /* pre-index */
2859 postindex = false;
2860 wback = true;
2861 break;
2862 }
2863
2864 if (is_vector && !fp_access_check(s)) {
2865 return;
2866 }
2867
2868 offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2869
2870 if (rn == 31) {
2871 gen_check_sp_alignment(s);
2872 }
2873
2874 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2875 if (!postindex) {
2876 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2877 }
2878
2879 if (set_tag) {
2880 if (!s->ata) {
2881 /*
2882 * TODO: We could rely on the stores below, at least for
2883 * system mode, if we arrange to add MO_ALIGN_16.
2884 */
2885 gen_helper_stg_stub(cpu_env, dirty_addr);
2886 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2887 gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2888 } else {
2889 gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2890 }
2891 }
2892
2893 clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2894 (wback || rn != 31) && !set_tag,
2895 size, 2 << size);
2896
2897 if (is_vector) {
2898 if (is_load) {
2899 do_fp_ld(s, rt, clean_addr, size);
2900 } else {
2901 do_fp_st(s, rt, clean_addr, size);
2902 }
2903 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2904 if (is_load) {
2905 do_fp_ld(s, rt2, clean_addr, size);
2906 } else {
2907 do_fp_st(s, rt2, clean_addr, size);
2908 }
2909 } else {
2910 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2911 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2912
2913 if (is_load) {
2914 TCGv_i64 tmp = tcg_temp_new_i64();
2915
2916 /* Do not modify tcg_rt before recognizing any exception
2917 * from the second load.
2918 */
2919 do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
2920 false, 0, false, false);
2921 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2922 do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
2923 false, 0, false, false);
2924
2925 tcg_gen_mov_i64(tcg_rt, tmp);
2926 tcg_temp_free_i64(tmp);
2927 } else {
2928 do_gpr_st(s, tcg_rt, clean_addr, size,
2929 false, 0, false, false);
2930 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2931 do_gpr_st(s, tcg_rt2, clean_addr, size,
2932 false, 0, false, false);
2933 }
2934 }
2935
2936 if (wback) {
2937 if (postindex) {
2938 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2939 }
2940 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2941 }
2942 }
2943
2944 /*
2945 * Load/store (immediate post-indexed)
2946 * Load/store (immediate pre-indexed)
2947 * Load/store (unscaled immediate)
2948 *
2949 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0
2950 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2951 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt |
2952 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2953 *
2954 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2955 10 -> unprivileged
2956 * V = 0 -> non-vector
2957 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2958 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2959 */
2960 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2961 int opc,
2962 int size,
2963 int rt,
2964 bool is_vector)
2965 {
2966 int rn = extract32(insn, 5, 5);
2967 int imm9 = sextract32(insn, 12, 9);
2968 int idx = extract32(insn, 10, 2);
2969 bool is_signed = false;
2970 bool is_store = false;
2971 bool is_extended = false;
2972 bool is_unpriv = (idx == 2);
2973 bool iss_valid = !is_vector;
2974 bool post_index;
2975 bool writeback;
2976 int memidx;
2977
2978 TCGv_i64 clean_addr, dirty_addr;
2979
2980 if (is_vector) {
2981 size |= (opc & 2) << 1;
2982 if (size > 4 || is_unpriv) {
2983 unallocated_encoding(s);
2984 return;
2985 }
2986 is_store = ((opc & 1) == 0);
2987 if (!fp_access_check(s)) {
2988 return;
2989 }
2990 } else {
2991 if (size == 3 && opc == 2) {
2992 /* PRFM - prefetch */
2993 if (idx != 0) {
2994 unallocated_encoding(s);
2995 return;
2996 }
2997 return;
2998 }
2999 if (opc == 3 && size > 1) {
3000 unallocated_encoding(s);
3001 return;
3002 }
3003 is_store = (opc == 0);
3004 is_signed = extract32(opc, 1, 1);
3005 is_extended = (size < 3) && extract32(opc, 0, 1);
3006 }
3007
3008 switch (idx) {
3009 case 0:
3010 case 2:
3011 post_index = false;
3012 writeback = false;
3013 break;
3014 case 1:
3015 post_index = true;
3016 writeback = true;
3017 break;
3018 case 3:
3019 post_index = false;
3020 writeback = true;
3021 break;
3022 default:
3023 g_assert_not_reached();
3024 }
3025
3026 if (rn == 31) {
3027 gen_check_sp_alignment(s);
3028 }
3029
3030 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3031 if (!post_index) {
3032 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3033 }
3034
3035 memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3036 clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3037 writeback || rn != 31,
3038 size, is_unpriv, memidx);
3039
3040 if (is_vector) {
3041 if (is_store) {
3042 do_fp_st(s, rt, clean_addr, size);
3043 } else {
3044 do_fp_ld(s, rt, clean_addr, size);
3045 }
3046 } else {
3047 TCGv_i64 tcg_rt = cpu_reg(s, rt);
3048 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3049
3050 if (is_store) {
3051 do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
3052 iss_valid, rt, iss_sf, false);
3053 } else {
3054 do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
3055 is_signed, is_extended, memidx,
3056 iss_valid, rt, iss_sf, false);
3057 }
3058 }
3059
3060 if (writeback) {
3061 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3062 if (post_index) {
3063 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3064 }
3065 tcg_gen_mov_i64(tcg_rn, dirty_addr);
3066 }
3067 }
3068
3069 /*
3070 * Load/store (register offset)
3071 *
3072 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0
3073 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3074 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt |
3075 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3076 *
3077 * For non-vector:
3078 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3079 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3080 * For vector:
3081 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3082 * opc<0>: 0 -> store, 1 -> load
3083 * V: 1 -> vector/simd
3084 * opt: extend encoding (see DecodeRegExtend)
3085 * S: if S=1 then scale (essentially index by sizeof(size))
3086 * Rt: register to transfer into/out of
3087 * Rn: address register or SP for base
3088 * Rm: offset register or ZR for offset
3089 */
3090 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3091 int opc,
3092 int size,
3093 int rt,
3094 bool is_vector)
3095 {
3096 int rn = extract32(insn, 5, 5);
3097 int shift = extract32(insn, 12, 1);
3098 int rm = extract32(insn, 16, 5);
3099 int opt = extract32(insn, 13, 3);
3100 bool is_signed = false;
3101 bool is_store = false;
3102 bool is_extended = false;
3103
3104 TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3105
3106 if (extract32(opt, 1, 1) == 0) {
3107 unallocated_encoding(s);
3108 return;
3109 }
3110
3111 if (is_vector) {
3112 size |= (opc & 2) << 1;
3113 if (size > 4) {
3114 unallocated_encoding(s);
3115 return;
3116 }
3117 is_store = !extract32(opc, 0, 1);
3118 if (!fp_access_check(s)) {
3119 return;
3120 }
3121 } else {
3122 if (size == 3 && opc == 2) {
3123 /* PRFM - prefetch */
3124 return;
3125 }
3126 if (opc == 3 && size > 1) {
3127 unallocated_encoding(s);
3128 return;
3129 }
3130 is_store = (opc == 0);
3131 is_signed = extract32(opc, 1, 1);
3132 is_extended = (size < 3) && extract32(opc, 0, 1);
3133 }
3134
3135 if (rn == 31) {
3136 gen_check_sp_alignment(s);
3137 }
3138 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3139
3140 tcg_rm = read_cpu_reg(s, rm, 1);
3141 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3142
3143 tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3144 clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
3145
3146 if (is_vector) {
3147 if (is_store) {
3148 do_fp_st(s, rt, clean_addr, size);
3149 } else {
3150 do_fp_ld(s, rt, clean_addr, size);
3151 }
3152 } else {
3153 TCGv_i64 tcg_rt = cpu_reg(s, rt);
3154 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3155 if (is_store) {
3156 do_gpr_st(s, tcg_rt, clean_addr, size,
3157 true, rt, iss_sf, false);
3158 } else {
3159 do_gpr_ld(s, tcg_rt, clean_addr, size,
3160 is_signed, is_extended,
3161 true, rt, iss_sf, false);
3162 }
3163 }
3164 }
3165
3166 /*
3167 * Load/store (unsigned immediate)
3168 *
3169 * 31 30 29 27 26 25 24 23 22 21 10 9 5
3170 * +----+-------+---+-----+-----+------------+-------+------+
3171 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt |
3172 * +----+-------+---+-----+-----+------------+-------+------+
3173 *
3174 * For non-vector:
3175 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3176 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3177 * For vector:
3178 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3179 * opc<0>: 0 -> store, 1 -> load
3180 * Rn: base address register (inc SP)
3181 * Rt: target register
3182 */
3183 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3184 int opc,
3185 int size,
3186 int rt,
3187 bool is_vector)
3188 {
3189 int rn = extract32(insn, 5, 5);
3190 unsigned int imm12 = extract32(insn, 10, 12);
3191 unsigned int offset;
3192
3193 TCGv_i64 clean_addr, dirty_addr;
3194
3195 bool is_store;
3196 bool is_signed = false;
3197 bool is_extended = false;
3198
3199 if (is_vector) {