target/arm: Convert Neon VCVT fixed-point to gvec
[qemu.git] / target / arm / translate-a64.c
1 /*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "qemu/log.h"
26 #include "arm_ldst.h"
27 #include "translate.h"
28 #include "internals.h"
29 #include "qemu/host-utils.h"
30
31 #include "hw/semihosting/semihost.h"
32 #include "exec/gen-icount.h"
33
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
36 #include "exec/log.h"
37
38 #include "trace-tcg.h"
39 #include "translate-a64.h"
40 #include "qemu/atomic128.h"
41
42 static TCGv_i64 cpu_X[32];
43 static TCGv_i64 cpu_pc;
44
45 /* Load/store exclusive handling */
46 static TCGv_i64 cpu_exclusive_high;
47
48 static const char *regnames[] = {
49 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
50 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
51 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
52 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
53 };
54
55 enum a64_shift_type {
56 A64_SHIFT_TYPE_LSL = 0,
57 A64_SHIFT_TYPE_LSR = 1,
58 A64_SHIFT_TYPE_ASR = 2,
59 A64_SHIFT_TYPE_ROR = 3
60 };
61
62 /* Table based decoder typedefs - used when the relevant bits for decode
63 * are too awkwardly scattered across the instruction (eg SIMD).
64 */
65 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
66
67 typedef struct AArch64DecodeTable {
68 uint32_t pattern;
69 uint32_t mask;
70 AArch64DecodeFn *disas_fn;
71 } AArch64DecodeTable;
72
73 /* initialize TCG globals. */
74 void a64_translate_init(void)
75 {
76 int i;
77
78 cpu_pc = tcg_global_mem_new_i64(cpu_env,
79 offsetof(CPUARMState, pc),
80 "pc");
81 for (i = 0; i < 32; i++) {
82 cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
83 offsetof(CPUARMState, xregs[i]),
84 regnames[i]);
85 }
86
87 cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
88 offsetof(CPUARMState, exclusive_high), "exclusive_high");
89 }
90
91 /*
92 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
93 */
94 static int get_a64_user_mem_index(DisasContext *s)
95 {
96 /*
97 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
98 * which is the usual mmu_idx for this cpu state.
99 */
100 ARMMMUIdx useridx = s->mmu_idx;
101
102 if (s->unpriv) {
103 /*
104 * We have pre-computed the condition for AccType_UNPRIV.
105 * Therefore we should never get here with a mmu_idx for
106 * which we do not know the corresponding user mmu_idx.
107 */
108 switch (useridx) {
109 case ARMMMUIdx_E10_1:
110 case ARMMMUIdx_E10_1_PAN:
111 useridx = ARMMMUIdx_E10_0;
112 break;
113 case ARMMMUIdx_E20_2:
114 case ARMMMUIdx_E20_2_PAN:
115 useridx = ARMMMUIdx_E20_0;
116 break;
117 case ARMMMUIdx_SE10_1:
118 case ARMMMUIdx_SE10_1_PAN:
119 useridx = ARMMMUIdx_SE10_0;
120 break;
121 default:
122 g_assert_not_reached();
123 }
124 }
125 return arm_to_core_mmu_idx(useridx);
126 }
127
128 static void reset_btype(DisasContext *s)
129 {
130 if (s->btype != 0) {
131 TCGv_i32 zero = tcg_const_i32(0);
132 tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
133 tcg_temp_free_i32(zero);
134 s->btype = 0;
135 }
136 }
137
138 static void set_btype(DisasContext *s, int val)
139 {
140 TCGv_i32 tcg_val;
141
142 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
143 tcg_debug_assert(val >= 1 && val <= 3);
144
145 tcg_val = tcg_const_i32(val);
146 tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
147 tcg_temp_free_i32(tcg_val);
148 s->btype = -1;
149 }
150
151 void gen_a64_set_pc_im(uint64_t val)
152 {
153 tcg_gen_movi_i64(cpu_pc, val);
154 }
155
156 /*
157 * Handle Top Byte Ignore (TBI) bits.
158 *
159 * If address tagging is enabled via the TCR TBI bits:
160 * + for EL2 and EL3 there is only one TBI bit, and if it is set
161 * then the address is zero-extended, clearing bits [63:56]
162 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
163 * and TBI1 controls addressses with bit 55 == 1.
164 * If the appropriate TBI bit is set for the address then
165 * the address is sign-extended from bit 55 into bits [63:56]
166 *
167 * Here We have concatenated TBI{1,0} into tbi.
168 */
169 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
170 TCGv_i64 src, int tbi)
171 {
172 if (tbi == 0) {
173 /* Load unmodified address */
174 tcg_gen_mov_i64(dst, src);
175 } else if (!regime_has_2_ranges(s->mmu_idx)) {
176 /* Force tag byte to all zero */
177 tcg_gen_extract_i64(dst, src, 0, 56);
178 } else {
179 /* Sign-extend from bit 55. */
180 tcg_gen_sextract_i64(dst, src, 0, 56);
181
182 if (tbi != 3) {
183 TCGv_i64 tcg_zero = tcg_const_i64(0);
184
185 /*
186 * The two TBI bits differ.
187 * If tbi0, then !tbi1: only use the extension if positive.
188 * if !tbi0, then tbi1: only use the extension if negative.
189 */
190 tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
191 dst, dst, tcg_zero, dst, src);
192 tcg_temp_free_i64(tcg_zero);
193 }
194 }
195 }
196
197 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
198 {
199 /*
200 * If address tagging is enabled for instructions via the TCR TBI bits,
201 * then loading an address into the PC will clear out any tag.
202 */
203 gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
204 }
205
206 /*
207 * Handle MTE and/or TBI.
208 *
209 * For TBI, ideally, we would do nothing. Proper behaviour on fault is
210 * for the tag to be present in the FAR_ELx register. But for user-only
211 * mode we do not have a TLB with which to implement this, so we must
212 * remove the top byte now.
213 *
214 * Always return a fresh temporary that we can increment independently
215 * of the write-back address.
216 */
217
218 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
219 {
220 TCGv_i64 clean = new_tmp_a64(s);
221 #ifdef CONFIG_USER_ONLY
222 gen_top_byte_ignore(s, clean, addr, s->tbid);
223 #else
224 tcg_gen_mov_i64(clean, addr);
225 #endif
226 return clean;
227 }
228
229 /* Insert a zero tag into src, with the result at dst. */
230 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
231 {
232 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
233 }
234
235 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
236 MMUAccessType acc, int log2_size)
237 {
238 TCGv_i32 t_acc = tcg_const_i32(acc);
239 TCGv_i32 t_idx = tcg_const_i32(get_mem_index(s));
240 TCGv_i32 t_size = tcg_const_i32(1 << log2_size);
241
242 gen_helper_probe_access(cpu_env, ptr, t_acc, t_idx, t_size);
243 tcg_temp_free_i32(t_acc);
244 tcg_temp_free_i32(t_idx);
245 tcg_temp_free_i32(t_size);
246 }
247
248 /*
249 * For MTE, check a single logical or atomic access. This probes a single
250 * address, the exact one specified. The size and alignment of the access
251 * is not relevant to MTE, per se, but watchpoints do require the size,
252 * and we want to recognize those before making any other changes to state.
253 */
254 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
255 bool is_write, bool tag_checked,
256 int log2_size, bool is_unpriv,
257 int core_idx)
258 {
259 if (tag_checked && s->mte_active[is_unpriv]) {
260 TCGv_i32 tcg_desc;
261 TCGv_i64 ret;
262 int desc = 0;
263
264 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
265 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
266 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
267 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
268 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size);
269 tcg_desc = tcg_const_i32(desc);
270
271 ret = new_tmp_a64(s);
272 gen_helper_mte_check1(ret, cpu_env, tcg_desc, addr);
273 tcg_temp_free_i32(tcg_desc);
274
275 return ret;
276 }
277 return clean_data_tbi(s, addr);
278 }
279
280 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
281 bool tag_checked, int log2_size)
282 {
283 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
284 false, get_mem_index(s));
285 }
286
287 /*
288 * For MTE, check multiple logical sequential accesses.
289 */
290 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
291 bool tag_checked, int log2_esize, int total_size)
292 {
293 if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) {
294 TCGv_i32 tcg_desc;
295 TCGv_i64 ret;
296 int desc = 0;
297
298 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
299 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
300 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
301 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
302 desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize);
303 desc = FIELD_DP32(desc, MTEDESC, TSIZE, total_size);
304 tcg_desc = tcg_const_i32(desc);
305
306 ret = new_tmp_a64(s);
307 gen_helper_mte_checkN(ret, cpu_env, tcg_desc, addr);
308 tcg_temp_free_i32(tcg_desc);
309
310 return ret;
311 }
312 return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize);
313 }
314
315 typedef struct DisasCompare64 {
316 TCGCond cond;
317 TCGv_i64 value;
318 } DisasCompare64;
319
320 static void a64_test_cc(DisasCompare64 *c64, int cc)
321 {
322 DisasCompare c32;
323
324 arm_test_cc(&c32, cc);
325
326 /* Sign-extend the 32-bit value so that the GE/LT comparisons work
327 * properly. The NE/EQ comparisons are also fine with this choice. */
328 c64->cond = c32.cond;
329 c64->value = tcg_temp_new_i64();
330 tcg_gen_ext_i32_i64(c64->value, c32.value);
331
332 arm_free_cc(&c32);
333 }
334
335 static void a64_free_cc(DisasCompare64 *c64)
336 {
337 tcg_temp_free_i64(c64->value);
338 }
339
340 static void gen_exception_internal(int excp)
341 {
342 TCGv_i32 tcg_excp = tcg_const_i32(excp);
343
344 assert(excp_is_internal(excp));
345 gen_helper_exception_internal(cpu_env, tcg_excp);
346 tcg_temp_free_i32(tcg_excp);
347 }
348
349 static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
350 {
351 gen_a64_set_pc_im(pc);
352 gen_exception_internal(excp);
353 s->base.is_jmp = DISAS_NORETURN;
354 }
355
356 static void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
357 uint32_t syndrome, uint32_t target_el)
358 {
359 gen_a64_set_pc_im(pc);
360 gen_exception(excp, syndrome, target_el);
361 s->base.is_jmp = DISAS_NORETURN;
362 }
363
364 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
365 {
366 TCGv_i32 tcg_syn;
367
368 gen_a64_set_pc_im(s->pc_curr);
369 tcg_syn = tcg_const_i32(syndrome);
370 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
371 tcg_temp_free_i32(tcg_syn);
372 s->base.is_jmp = DISAS_NORETURN;
373 }
374
375 static void gen_step_complete_exception(DisasContext *s)
376 {
377 /* We just completed step of an insn. Move from Active-not-pending
378 * to Active-pending, and then also take the swstep exception.
379 * This corresponds to making the (IMPDEF) choice to prioritize
380 * swstep exceptions over asynchronous exceptions taken to an exception
381 * level where debug is disabled. This choice has the advantage that
382 * we do not need to maintain internal state corresponding to the
383 * ISV/EX syndrome bits between completion of the step and generation
384 * of the exception, and our syndrome information is always correct.
385 */
386 gen_ss_advance(s);
387 gen_swstep_exception(s, 1, s->is_ldex);
388 s->base.is_jmp = DISAS_NORETURN;
389 }
390
391 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
392 {
393 /* No direct tb linking with singlestep (either QEMU's or the ARM
394 * debug architecture kind) or deterministic io
395 */
396 if (s->base.singlestep_enabled || s->ss_active ||
397 (tb_cflags(s->base.tb) & CF_LAST_IO)) {
398 return false;
399 }
400
401 #ifndef CONFIG_USER_ONLY
402 /* Only link tbs from inside the same guest page */
403 if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
404 return false;
405 }
406 #endif
407
408 return true;
409 }
410
411 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
412 {
413 TranslationBlock *tb;
414
415 tb = s->base.tb;
416 if (use_goto_tb(s, n, dest)) {
417 tcg_gen_goto_tb(n);
418 gen_a64_set_pc_im(dest);
419 tcg_gen_exit_tb(tb, n);
420 s->base.is_jmp = DISAS_NORETURN;
421 } else {
422 gen_a64_set_pc_im(dest);
423 if (s->ss_active) {
424 gen_step_complete_exception(s);
425 } else if (s->base.singlestep_enabled) {
426 gen_exception_internal(EXCP_DEBUG);
427 } else {
428 tcg_gen_lookup_and_goto_ptr();
429 s->base.is_jmp = DISAS_NORETURN;
430 }
431 }
432 }
433
434 void unallocated_encoding(DisasContext *s)
435 {
436 /* Unallocated and reserved encodings are uncategorized */
437 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
438 default_exception_el(s));
439 }
440
441 static void init_tmp_a64_array(DisasContext *s)
442 {
443 #ifdef CONFIG_DEBUG_TCG
444 memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
445 #endif
446 s->tmp_a64_count = 0;
447 }
448
449 static void free_tmp_a64(DisasContext *s)
450 {
451 int i;
452 for (i = 0; i < s->tmp_a64_count; i++) {
453 tcg_temp_free_i64(s->tmp_a64[i]);
454 }
455 init_tmp_a64_array(s);
456 }
457
458 TCGv_i64 new_tmp_a64(DisasContext *s)
459 {
460 assert(s->tmp_a64_count < TMP_A64_MAX);
461 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
462 }
463
464 TCGv_i64 new_tmp_a64_local(DisasContext *s)
465 {
466 assert(s->tmp_a64_count < TMP_A64_MAX);
467 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_local_new_i64();
468 }
469
470 TCGv_i64 new_tmp_a64_zero(DisasContext *s)
471 {
472 TCGv_i64 t = new_tmp_a64(s);
473 tcg_gen_movi_i64(t, 0);
474 return t;
475 }
476
477 /*
478 * Register access functions
479 *
480 * These functions are used for directly accessing a register in where
481 * changes to the final register value are likely to be made. If you
482 * need to use a register for temporary calculation (e.g. index type
483 * operations) use the read_* form.
484 *
485 * B1.2.1 Register mappings
486 *
487 * In instruction register encoding 31 can refer to ZR (zero register) or
488 * the SP (stack pointer) depending on context. In QEMU's case we map SP
489 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
490 * This is the point of the _sp forms.
491 */
492 TCGv_i64 cpu_reg(DisasContext *s, int reg)
493 {
494 if (reg == 31) {
495 return new_tmp_a64_zero(s);
496 } else {
497 return cpu_X[reg];
498 }
499 }
500
501 /* register access for when 31 == SP */
502 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
503 {
504 return cpu_X[reg];
505 }
506
507 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
508 * representing the register contents. This TCGv is an auto-freed
509 * temporary so it need not be explicitly freed, and may be modified.
510 */
511 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
512 {
513 TCGv_i64 v = new_tmp_a64(s);
514 if (reg != 31) {
515 if (sf) {
516 tcg_gen_mov_i64(v, cpu_X[reg]);
517 } else {
518 tcg_gen_ext32u_i64(v, cpu_X[reg]);
519 }
520 } else {
521 tcg_gen_movi_i64(v, 0);
522 }
523 return v;
524 }
525
526 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
527 {
528 TCGv_i64 v = new_tmp_a64(s);
529 if (sf) {
530 tcg_gen_mov_i64(v, cpu_X[reg]);
531 } else {
532 tcg_gen_ext32u_i64(v, cpu_X[reg]);
533 }
534 return v;
535 }
536
537 /* Return the offset into CPUARMState of a slice (from
538 * the least significant end) of FP register Qn (ie
539 * Dn, Sn, Hn or Bn).
540 * (Note that this is not the same mapping as for A32; see cpu.h)
541 */
542 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
543 {
544 return vec_reg_offset(s, regno, 0, size);
545 }
546
547 /* Offset of the high half of the 128 bit vector Qn */
548 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
549 {
550 return vec_reg_offset(s, regno, 1, MO_64);
551 }
552
553 /* Convenience accessors for reading and writing single and double
554 * FP registers. Writing clears the upper parts of the associated
555 * 128 bit vector register, as required by the architecture.
556 * Note that unlike the GP register accessors, the values returned
557 * by the read functions must be manually freed.
558 */
559 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
560 {
561 TCGv_i64 v = tcg_temp_new_i64();
562
563 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
564 return v;
565 }
566
567 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
568 {
569 TCGv_i32 v = tcg_temp_new_i32();
570
571 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
572 return v;
573 }
574
575 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
576 {
577 TCGv_i32 v = tcg_temp_new_i32();
578
579 tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
580 return v;
581 }
582
583 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
584 * If SVE is not enabled, then there are only 128 bits in the vector.
585 */
586 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
587 {
588 unsigned ofs = fp_reg_offset(s, rd, MO_64);
589 unsigned vsz = vec_full_reg_size(s);
590
591 /* Nop move, with side effect of clearing the tail. */
592 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
593 }
594
595 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
596 {
597 unsigned ofs = fp_reg_offset(s, reg, MO_64);
598
599 tcg_gen_st_i64(v, cpu_env, ofs);
600 clear_vec_high(s, false, reg);
601 }
602
603 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
604 {
605 TCGv_i64 tmp = tcg_temp_new_i64();
606
607 tcg_gen_extu_i32_i64(tmp, v);
608 write_fp_dreg(s, reg, tmp);
609 tcg_temp_free_i64(tmp);
610 }
611
612 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */
613 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
614 GVecGen2Fn *gvec_fn, int vece)
615 {
616 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
617 is_q ? 16 : 8, vec_full_reg_size(s));
618 }
619
620 /* Expand a 2-operand + immediate AdvSIMD vector operation using
621 * an expander function.
622 */
623 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
624 int64_t imm, GVecGen2iFn *gvec_fn, int vece)
625 {
626 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
627 imm, is_q ? 16 : 8, vec_full_reg_size(s));
628 }
629
630 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */
631 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
632 GVecGen3Fn *gvec_fn, int vece)
633 {
634 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
635 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
636 }
637
638 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */
639 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
640 int rx, GVecGen4Fn *gvec_fn, int vece)
641 {
642 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
643 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
644 is_q ? 16 : 8, vec_full_reg_size(s));
645 }
646
647 /* Expand a 2-operand operation using an out-of-line helper. */
648 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
649 int rn, int data, gen_helper_gvec_2 *fn)
650 {
651 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
652 vec_full_reg_offset(s, rn),
653 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
654 }
655
656 /* Expand a 3-operand operation using an out-of-line helper. */
657 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
658 int rn, int rm, int data, gen_helper_gvec_3 *fn)
659 {
660 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
661 vec_full_reg_offset(s, rn),
662 vec_full_reg_offset(s, rm),
663 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
664 }
665
666 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
667 * an out-of-line helper.
668 */
669 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
670 int rm, bool is_fp16, int data,
671 gen_helper_gvec_3_ptr *fn)
672 {
673 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
674 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
675 vec_full_reg_offset(s, rn),
676 vec_full_reg_offset(s, rm), fpst,
677 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
678 tcg_temp_free_ptr(fpst);
679 }
680
681 /* Expand a 3-operand + qc + operation using an out-of-line helper. */
682 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
683 int rm, gen_helper_gvec_3_ptr *fn)
684 {
685 TCGv_ptr qc_ptr = tcg_temp_new_ptr();
686
687 tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
688 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
689 vec_full_reg_offset(s, rn),
690 vec_full_reg_offset(s, rm), qc_ptr,
691 is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
692 tcg_temp_free_ptr(qc_ptr);
693 }
694
695 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
696 * than the 32 bit equivalent.
697 */
698 static inline void gen_set_NZ64(TCGv_i64 result)
699 {
700 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
701 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
702 }
703
704 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
705 static inline void gen_logic_CC(int sf, TCGv_i64 result)
706 {
707 if (sf) {
708 gen_set_NZ64(result);
709 } else {
710 tcg_gen_extrl_i64_i32(cpu_ZF, result);
711 tcg_gen_mov_i32(cpu_NF, cpu_ZF);
712 }
713 tcg_gen_movi_i32(cpu_CF, 0);
714 tcg_gen_movi_i32(cpu_VF, 0);
715 }
716
717 /* dest = T0 + T1; compute C, N, V and Z flags */
718 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
719 {
720 if (sf) {
721 TCGv_i64 result, flag, tmp;
722 result = tcg_temp_new_i64();
723 flag = tcg_temp_new_i64();
724 tmp = tcg_temp_new_i64();
725
726 tcg_gen_movi_i64(tmp, 0);
727 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
728
729 tcg_gen_extrl_i64_i32(cpu_CF, flag);
730
731 gen_set_NZ64(result);
732
733 tcg_gen_xor_i64(flag, result, t0);
734 tcg_gen_xor_i64(tmp, t0, t1);
735 tcg_gen_andc_i64(flag, flag, tmp);
736 tcg_temp_free_i64(tmp);
737 tcg_gen_extrh_i64_i32(cpu_VF, flag);
738
739 tcg_gen_mov_i64(dest, result);
740 tcg_temp_free_i64(result);
741 tcg_temp_free_i64(flag);
742 } else {
743 /* 32 bit arithmetic */
744 TCGv_i32 t0_32 = tcg_temp_new_i32();
745 TCGv_i32 t1_32 = tcg_temp_new_i32();
746 TCGv_i32 tmp = tcg_temp_new_i32();
747
748 tcg_gen_movi_i32(tmp, 0);
749 tcg_gen_extrl_i64_i32(t0_32, t0);
750 tcg_gen_extrl_i64_i32(t1_32, t1);
751 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
752 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
753 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
754 tcg_gen_xor_i32(tmp, t0_32, t1_32);
755 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
756 tcg_gen_extu_i32_i64(dest, cpu_NF);
757
758 tcg_temp_free_i32(tmp);
759 tcg_temp_free_i32(t0_32);
760 tcg_temp_free_i32(t1_32);
761 }
762 }
763
764 /* dest = T0 - T1; compute C, N, V and Z flags */
765 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
766 {
767 if (sf) {
768 /* 64 bit arithmetic */
769 TCGv_i64 result, flag, tmp;
770
771 result = tcg_temp_new_i64();
772 flag = tcg_temp_new_i64();
773 tcg_gen_sub_i64(result, t0, t1);
774
775 gen_set_NZ64(result);
776
777 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
778 tcg_gen_extrl_i64_i32(cpu_CF, flag);
779
780 tcg_gen_xor_i64(flag, result, t0);
781 tmp = tcg_temp_new_i64();
782 tcg_gen_xor_i64(tmp, t0, t1);
783 tcg_gen_and_i64(flag, flag, tmp);
784 tcg_temp_free_i64(tmp);
785 tcg_gen_extrh_i64_i32(cpu_VF, flag);
786 tcg_gen_mov_i64(dest, result);
787 tcg_temp_free_i64(flag);
788 tcg_temp_free_i64(result);
789 } else {
790 /* 32 bit arithmetic */
791 TCGv_i32 t0_32 = tcg_temp_new_i32();
792 TCGv_i32 t1_32 = tcg_temp_new_i32();
793 TCGv_i32 tmp;
794
795 tcg_gen_extrl_i64_i32(t0_32, t0);
796 tcg_gen_extrl_i64_i32(t1_32, t1);
797 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
798 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
799 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
800 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
801 tmp = tcg_temp_new_i32();
802 tcg_gen_xor_i32(tmp, t0_32, t1_32);
803 tcg_temp_free_i32(t0_32);
804 tcg_temp_free_i32(t1_32);
805 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
806 tcg_temp_free_i32(tmp);
807 tcg_gen_extu_i32_i64(dest, cpu_NF);
808 }
809 }
810
811 /* dest = T0 + T1 + CF; do not compute flags. */
812 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
813 {
814 TCGv_i64 flag = tcg_temp_new_i64();
815 tcg_gen_extu_i32_i64(flag, cpu_CF);
816 tcg_gen_add_i64(dest, t0, t1);
817 tcg_gen_add_i64(dest, dest, flag);
818 tcg_temp_free_i64(flag);
819
820 if (!sf) {
821 tcg_gen_ext32u_i64(dest, dest);
822 }
823 }
824
825 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
826 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
827 {
828 if (sf) {
829 TCGv_i64 result, cf_64, vf_64, tmp;
830 result = tcg_temp_new_i64();
831 cf_64 = tcg_temp_new_i64();
832 vf_64 = tcg_temp_new_i64();
833 tmp = tcg_const_i64(0);
834
835 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
836 tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
837 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
838 tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
839 gen_set_NZ64(result);
840
841 tcg_gen_xor_i64(vf_64, result, t0);
842 tcg_gen_xor_i64(tmp, t0, t1);
843 tcg_gen_andc_i64(vf_64, vf_64, tmp);
844 tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
845
846 tcg_gen_mov_i64(dest, result);
847
848 tcg_temp_free_i64(tmp);
849 tcg_temp_free_i64(vf_64);
850 tcg_temp_free_i64(cf_64);
851 tcg_temp_free_i64(result);
852 } else {
853 TCGv_i32 t0_32, t1_32, tmp;
854 t0_32 = tcg_temp_new_i32();
855 t1_32 = tcg_temp_new_i32();
856 tmp = tcg_const_i32(0);
857
858 tcg_gen_extrl_i64_i32(t0_32, t0);
859 tcg_gen_extrl_i64_i32(t1_32, t1);
860 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
861 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
862
863 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
864 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
865 tcg_gen_xor_i32(tmp, t0_32, t1_32);
866 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
867 tcg_gen_extu_i32_i64(dest, cpu_NF);
868
869 tcg_temp_free_i32(tmp);
870 tcg_temp_free_i32(t1_32);
871 tcg_temp_free_i32(t0_32);
872 }
873 }
874
875 /*
876 * Load/Store generators
877 */
878
879 /*
880 * Store from GPR register to memory.
881 */
882 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
883 TCGv_i64 tcg_addr, int size, int memidx,
884 bool iss_valid,
885 unsigned int iss_srt,
886 bool iss_sf, bool iss_ar)
887 {
888 g_assert(size <= 3);
889 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
890
891 if (iss_valid) {
892 uint32_t syn;
893
894 syn = syn_data_abort_with_iss(0,
895 size,
896 false,
897 iss_srt,
898 iss_sf,
899 iss_ar,
900 0, 0, 0, 0, 0, false);
901 disas_set_insn_syndrome(s, syn);
902 }
903 }
904
905 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
906 TCGv_i64 tcg_addr, int size,
907 bool iss_valid,
908 unsigned int iss_srt,
909 bool iss_sf, bool iss_ar)
910 {
911 do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
912 iss_valid, iss_srt, iss_sf, iss_ar);
913 }
914
915 /*
916 * Load from memory to GPR register
917 */
918 static void do_gpr_ld_memidx(DisasContext *s,
919 TCGv_i64 dest, TCGv_i64 tcg_addr,
920 int size, bool is_signed,
921 bool extend, int memidx,
922 bool iss_valid, unsigned int iss_srt,
923 bool iss_sf, bool iss_ar)
924 {
925 MemOp memop = s->be_data + size;
926
927 g_assert(size <= 3);
928
929 if (is_signed) {
930 memop += MO_SIGN;
931 }
932
933 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
934
935 if (extend && is_signed) {
936 g_assert(size < 3);
937 tcg_gen_ext32u_i64(dest, dest);
938 }
939
940 if (iss_valid) {
941 uint32_t syn;
942
943 syn = syn_data_abort_with_iss(0,
944 size,
945 is_signed,
946 iss_srt,
947 iss_sf,
948 iss_ar,
949 0, 0, 0, 0, 0, false);
950 disas_set_insn_syndrome(s, syn);
951 }
952 }
953
954 static void do_gpr_ld(DisasContext *s,
955 TCGv_i64 dest, TCGv_i64 tcg_addr,
956 int size, bool is_signed, bool extend,
957 bool iss_valid, unsigned int iss_srt,
958 bool iss_sf, bool iss_ar)
959 {
960 do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
961 get_mem_index(s),
962 iss_valid, iss_srt, iss_sf, iss_ar);
963 }
964
965 /*
966 * Store from FP register to memory
967 */
968 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
969 {
970 /* This writes the bottom N bits of a 128 bit wide vector to memory */
971 TCGv_i64 tmp = tcg_temp_new_i64();
972 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
973 if (size < 4) {
974 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
975 s->be_data + size);
976 } else {
977 bool be = s->be_data == MO_BE;
978 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
979
980 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
981 tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
982 s->be_data | MO_Q);
983 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
984 tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
985 s->be_data | MO_Q);
986 tcg_temp_free_i64(tcg_hiaddr);
987 }
988
989 tcg_temp_free_i64(tmp);
990 }
991
992 /*
993 * Load from memory to FP register
994 */
995 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
996 {
997 /* This always zero-extends and writes to a full 128 bit wide vector */
998 TCGv_i64 tmplo = tcg_temp_new_i64();
999 TCGv_i64 tmphi = NULL;
1000
1001 if (size < 4) {
1002 MemOp memop = s->be_data + size;
1003 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
1004 } else {
1005 bool be = s->be_data == MO_BE;
1006 TCGv_i64 tcg_hiaddr;
1007
1008 tmphi = tcg_temp_new_i64();
1009 tcg_hiaddr = tcg_temp_new_i64();
1010
1011 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
1012 tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
1013 s->be_data | MO_Q);
1014 tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
1015 s->be_data | MO_Q);
1016 tcg_temp_free_i64(tcg_hiaddr);
1017 }
1018
1019 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
1020 tcg_temp_free_i64(tmplo);
1021
1022 if (tmphi) {
1023 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
1024 tcg_temp_free_i64(tmphi);
1025 }
1026 clear_vec_high(s, tmphi != NULL, destidx);
1027 }
1028
1029 /*
1030 * Vector load/store helpers.
1031 *
1032 * The principal difference between this and a FP load is that we don't
1033 * zero extend as we are filling a partial chunk of the vector register.
1034 * These functions don't support 128 bit loads/stores, which would be
1035 * normal load/store operations.
1036 *
1037 * The _i32 versions are useful when operating on 32 bit quantities
1038 * (eg for floating point single or using Neon helper functions).
1039 */
1040
1041 /* Get value of an element within a vector register */
1042 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1043 int element, MemOp memop)
1044 {
1045 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1046 switch (memop) {
1047 case MO_8:
1048 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1049 break;
1050 case MO_16:
1051 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1052 break;
1053 case MO_32:
1054 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1055 break;
1056 case MO_8|MO_SIGN:
1057 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1058 break;
1059 case MO_16|MO_SIGN:
1060 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1061 break;
1062 case MO_32|MO_SIGN:
1063 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1064 break;
1065 case MO_64:
1066 case MO_64|MO_SIGN:
1067 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1068 break;
1069 default:
1070 g_assert_not_reached();
1071 }
1072 }
1073
1074 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1075 int element, MemOp memop)
1076 {
1077 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1078 switch (memop) {
1079 case MO_8:
1080 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1081 break;
1082 case MO_16:
1083 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1084 break;
1085 case MO_8|MO_SIGN:
1086 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1087 break;
1088 case MO_16|MO_SIGN:
1089 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1090 break;
1091 case MO_32:
1092 case MO_32|MO_SIGN:
1093 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1094 break;
1095 default:
1096 g_assert_not_reached();
1097 }
1098 }
1099
1100 /* Set value of an element within a vector register */
1101 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1102 int element, MemOp memop)
1103 {
1104 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1105 switch (memop) {
1106 case MO_8:
1107 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1108 break;
1109 case MO_16:
1110 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1111 break;
1112 case MO_32:
1113 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1114 break;
1115 case MO_64:
1116 tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1117 break;
1118 default:
1119 g_assert_not_reached();
1120 }
1121 }
1122
1123 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1124 int destidx, int element, MemOp memop)
1125 {
1126 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1127 switch (memop) {
1128 case MO_8:
1129 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1130 break;
1131 case MO_16:
1132 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1133 break;
1134 case MO_32:
1135 tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1136 break;
1137 default:
1138 g_assert_not_reached();
1139 }
1140 }
1141
1142 /* Store from vector register to memory */
1143 static void do_vec_st(DisasContext *s, int srcidx, int element,
1144 TCGv_i64 tcg_addr, int size, MemOp endian)
1145 {
1146 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1147
1148 read_vec_element(s, tcg_tmp, srcidx, element, size);
1149 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1150
1151 tcg_temp_free_i64(tcg_tmp);
1152 }
1153
1154 /* Load from memory to vector register */
1155 static void do_vec_ld(DisasContext *s, int destidx, int element,
1156 TCGv_i64 tcg_addr, int size, MemOp endian)
1157 {
1158 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1159
1160 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1161 write_vec_element(s, tcg_tmp, destidx, element, size);
1162
1163 tcg_temp_free_i64(tcg_tmp);
1164 }
1165
1166 /* Check that FP/Neon access is enabled. If it is, return
1167 * true. If not, emit code to generate an appropriate exception,
1168 * and return false; the caller should not emit any code for
1169 * the instruction. Note that this check must happen after all
1170 * unallocated-encoding checks (otherwise the syndrome information
1171 * for the resulting exception will be incorrect).
1172 */
1173 static bool fp_access_check(DisasContext *s)
1174 {
1175 if (s->fp_excp_el) {
1176 assert(!s->fp_access_checked);
1177 s->fp_access_checked = true;
1178
1179 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1180 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
1181 return false;
1182 }
1183 s->fp_access_checked = true;
1184 return true;
1185 }
1186
1187 /* Check that SVE access is enabled. If it is, return true.
1188 * If not, emit code to generate an appropriate exception and return false.
1189 */
1190 bool sve_access_check(DisasContext *s)
1191 {
1192 if (s->sve_excp_el) {
1193 assert(!s->sve_access_checked);
1194 s->sve_access_checked = true;
1195
1196 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1197 syn_sve_access_trap(), s->sve_excp_el);
1198 return false;
1199 }
1200 s->sve_access_checked = true;
1201 return fp_access_check(s);
1202 }
1203
1204 /*
1205 * This utility function is for doing register extension with an
1206 * optional shift. You will likely want to pass a temporary for the
1207 * destination register. See DecodeRegExtend() in the ARM ARM.
1208 */
1209 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1210 int option, unsigned int shift)
1211 {
1212 int extsize = extract32(option, 0, 2);
1213 bool is_signed = extract32(option, 2, 1);
1214
1215 if (is_signed) {
1216 switch (extsize) {
1217 case 0:
1218 tcg_gen_ext8s_i64(tcg_out, tcg_in);
1219 break;
1220 case 1:
1221 tcg_gen_ext16s_i64(tcg_out, tcg_in);
1222 break;
1223 case 2:
1224 tcg_gen_ext32s_i64(tcg_out, tcg_in);
1225 break;
1226 case 3:
1227 tcg_gen_mov_i64(tcg_out, tcg_in);
1228 break;
1229 }
1230 } else {
1231 switch (extsize) {
1232 case 0:
1233 tcg_gen_ext8u_i64(tcg_out, tcg_in);
1234 break;
1235 case 1:
1236 tcg_gen_ext16u_i64(tcg_out, tcg_in);
1237 break;
1238 case 2:
1239 tcg_gen_ext32u_i64(tcg_out, tcg_in);
1240 break;
1241 case 3:
1242 tcg_gen_mov_i64(tcg_out, tcg_in);
1243 break;
1244 }
1245 }
1246
1247 if (shift) {
1248 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1249 }
1250 }
1251
1252 static inline void gen_check_sp_alignment(DisasContext *s)
1253 {
1254 /* The AArch64 architecture mandates that (if enabled via PSTATE
1255 * or SCTLR bits) there is a check that SP is 16-aligned on every
1256 * SP-relative load or store (with an exception generated if it is not).
1257 * In line with general QEMU practice regarding misaligned accesses,
1258 * we omit these checks for the sake of guest program performance.
1259 * This function is provided as a hook so we can more easily add these
1260 * checks in future (possibly as a "favour catching guest program bugs
1261 * over speed" user selectable option).
1262 */
1263 }
1264
1265 /*
1266 * This provides a simple table based table lookup decoder. It is
1267 * intended to be used when the relevant bits for decode are too
1268 * awkwardly placed and switch/if based logic would be confusing and
1269 * deeply nested. Since it's a linear search through the table, tables
1270 * should be kept small.
1271 *
1272 * It returns the first handler where insn & mask == pattern, or
1273 * NULL if there is no match.
1274 * The table is terminated by an empty mask (i.e. 0)
1275 */
1276 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1277 uint32_t insn)
1278 {
1279 const AArch64DecodeTable *tptr = table;
1280
1281 while (tptr->mask) {
1282 if ((insn & tptr->mask) == tptr->pattern) {
1283 return tptr->disas_fn;
1284 }
1285 tptr++;
1286 }
1287 return NULL;
1288 }
1289
1290 /*
1291 * The instruction disassembly implemented here matches
1292 * the instruction encoding classifications in chapter C4
1293 * of the ARM Architecture Reference Manual (DDI0487B_a);
1294 * classification names and decode diagrams here should generally
1295 * match up with those in the manual.
1296 */
1297
1298 /* Unconditional branch (immediate)
1299 * 31 30 26 25 0
1300 * +----+-----------+-------------------------------------+
1301 * | op | 0 0 1 0 1 | imm26 |
1302 * +----+-----------+-------------------------------------+
1303 */
1304 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1305 {
1306 uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4;
1307
1308 if (insn & (1U << 31)) {
1309 /* BL Branch with link */
1310 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
1311 }
1312
1313 /* B Branch / BL Branch with link */
1314 reset_btype(s);
1315 gen_goto_tb(s, 0, addr);
1316 }
1317
1318 /* Compare and branch (immediate)
1319 * 31 30 25 24 23 5 4 0
1320 * +----+-------------+----+---------------------+--------+
1321 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt |
1322 * +----+-------------+----+---------------------+--------+
1323 */
1324 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1325 {
1326 unsigned int sf, op, rt;
1327 uint64_t addr;
1328 TCGLabel *label_match;
1329 TCGv_i64 tcg_cmp;
1330
1331 sf = extract32(insn, 31, 1);
1332 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1333 rt = extract32(insn, 0, 5);
1334 addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1335
1336 tcg_cmp = read_cpu_reg(s, rt, sf);
1337 label_match = gen_new_label();
1338
1339 reset_btype(s);
1340 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1341 tcg_cmp, 0, label_match);
1342
1343 gen_goto_tb(s, 0, s->base.pc_next);
1344 gen_set_label(label_match);
1345 gen_goto_tb(s, 1, addr);
1346 }
1347
1348 /* Test and branch (immediate)
1349 * 31 30 25 24 23 19 18 5 4 0
1350 * +----+-------------+----+-------+-------------+------+
1351 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt |
1352 * +----+-------------+----+-------+-------------+------+
1353 */
1354 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1355 {
1356 unsigned int bit_pos, op, rt;
1357 uint64_t addr;
1358 TCGLabel *label_match;
1359 TCGv_i64 tcg_cmp;
1360
1361 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1362 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1363 addr = s->pc_curr + sextract32(insn, 5, 14) * 4;
1364 rt = extract32(insn, 0, 5);
1365
1366 tcg_cmp = tcg_temp_new_i64();
1367 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1368 label_match = gen_new_label();
1369
1370 reset_btype(s);
1371 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1372 tcg_cmp, 0, label_match);
1373 tcg_temp_free_i64(tcg_cmp);
1374 gen_goto_tb(s, 0, s->base.pc_next);
1375 gen_set_label(label_match);
1376 gen_goto_tb(s, 1, addr);
1377 }
1378
1379 /* Conditional branch (immediate)
1380 * 31 25 24 23 5 4 3 0
1381 * +---------------+----+---------------------+----+------+
1382 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond |
1383 * +---------------+----+---------------------+----+------+
1384 */
1385 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1386 {
1387 unsigned int cond;
1388 uint64_t addr;
1389
1390 if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1391 unallocated_encoding(s);
1392 return;
1393 }
1394 addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1395 cond = extract32(insn, 0, 4);
1396
1397 reset_btype(s);
1398 if (cond < 0x0e) {
1399 /* genuinely conditional branches */
1400 TCGLabel *label_match = gen_new_label();
1401 arm_gen_test_cc(cond, label_match);
1402 gen_goto_tb(s, 0, s->base.pc_next);
1403 gen_set_label(label_match);
1404 gen_goto_tb(s, 1, addr);
1405 } else {
1406 /* 0xe and 0xf are both "always" conditions */
1407 gen_goto_tb(s, 0, addr);
1408 }
1409 }
1410
1411 /* HINT instruction group, including various allocated HINTs */
1412 static void handle_hint(DisasContext *s, uint32_t insn,
1413 unsigned int op1, unsigned int op2, unsigned int crm)
1414 {
1415 unsigned int selector = crm << 3 | op2;
1416
1417 if (op1 != 3) {
1418 unallocated_encoding(s);
1419 return;
1420 }
1421
1422 switch (selector) {
1423 case 0b00000: /* NOP */
1424 break;
1425 case 0b00011: /* WFI */
1426 s->base.is_jmp = DISAS_WFI;
1427 break;
1428 case 0b00001: /* YIELD */
1429 /* When running in MTTCG we don't generate jumps to the yield and
1430 * WFE helpers as it won't affect the scheduling of other vCPUs.
1431 * If we wanted to more completely model WFE/SEV so we don't busy
1432 * spin unnecessarily we would need to do something more involved.
1433 */
1434 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1435 s->base.is_jmp = DISAS_YIELD;
1436 }
1437 break;
1438 case 0b00010: /* WFE */
1439 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1440 s->base.is_jmp = DISAS_WFE;
1441 }
1442 break;
1443 case 0b00100: /* SEV */
1444 case 0b00101: /* SEVL */
1445 /* we treat all as NOP at least for now */
1446 break;
1447 case 0b00111: /* XPACLRI */
1448 if (s->pauth_active) {
1449 gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1450 }
1451 break;
1452 case 0b01000: /* PACIA1716 */
1453 if (s->pauth_active) {
1454 gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1455 }
1456 break;
1457 case 0b01010: /* PACIB1716 */
1458 if (s->pauth_active) {
1459 gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1460 }
1461 break;
1462 case 0b01100: /* AUTIA1716 */
1463 if (s->pauth_active) {
1464 gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1465 }
1466 break;
1467 case 0b01110: /* AUTIB1716 */
1468 if (s->pauth_active) {
1469 gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1470 }
1471 break;
1472 case 0b11000: /* PACIAZ */
1473 if (s->pauth_active) {
1474 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1475 new_tmp_a64_zero(s));
1476 }
1477 break;
1478 case 0b11001: /* PACIASP */
1479 if (s->pauth_active) {
1480 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1481 }
1482 break;
1483 case 0b11010: /* PACIBZ */
1484 if (s->pauth_active) {
1485 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1486 new_tmp_a64_zero(s));
1487 }
1488 break;
1489 case 0b11011: /* PACIBSP */
1490 if (s->pauth_active) {
1491 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1492 }
1493 break;
1494 case 0b11100: /* AUTIAZ */
1495 if (s->pauth_active) {
1496 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1497 new_tmp_a64_zero(s));
1498 }
1499 break;
1500 case 0b11101: /* AUTIASP */
1501 if (s->pauth_active) {
1502 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1503 }
1504 break;
1505 case 0b11110: /* AUTIBZ */
1506 if (s->pauth_active) {
1507 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1508 new_tmp_a64_zero(s));
1509 }
1510 break;
1511 case 0b11111: /* AUTIBSP */
1512 if (s->pauth_active) {
1513 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1514 }
1515 break;
1516 default:
1517 /* default specified as NOP equivalent */
1518 break;
1519 }
1520 }
1521
1522 static void gen_clrex(DisasContext *s, uint32_t insn)
1523 {
1524 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1525 }
1526
1527 /* CLREX, DSB, DMB, ISB */
1528 static void handle_sync(DisasContext *s, uint32_t insn,
1529 unsigned int op1, unsigned int op2, unsigned int crm)
1530 {
1531 TCGBar bar;
1532
1533 if (op1 != 3) {
1534 unallocated_encoding(s);
1535 return;
1536 }
1537
1538 switch (op2) {
1539 case 2: /* CLREX */
1540 gen_clrex(s, insn);
1541 return;
1542 case 4: /* DSB */
1543 case 5: /* DMB */
1544 switch (crm & 3) {
1545 case 1: /* MBReqTypes_Reads */
1546 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1547 break;
1548 case 2: /* MBReqTypes_Writes */
1549 bar = TCG_BAR_SC | TCG_MO_ST_ST;
1550 break;
1551 default: /* MBReqTypes_All */
1552 bar = TCG_BAR_SC | TCG_MO_ALL;
1553 break;
1554 }
1555 tcg_gen_mb(bar);
1556 return;
1557 case 6: /* ISB */
1558 /* We need to break the TB after this insn to execute
1559 * a self-modified code correctly and also to take
1560 * any pending interrupts immediately.
1561 */
1562 reset_btype(s);
1563 gen_goto_tb(s, 0, s->base.pc_next);
1564 return;
1565
1566 case 7: /* SB */
1567 if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1568 goto do_unallocated;
1569 }
1570 /*
1571 * TODO: There is no speculation barrier opcode for TCG;
1572 * MB and end the TB instead.
1573 */
1574 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1575 gen_goto_tb(s, 0, s->base.pc_next);
1576 return;
1577
1578 default:
1579 do_unallocated:
1580 unallocated_encoding(s);
1581 return;
1582 }
1583 }
1584
1585 static void gen_xaflag(void)
1586 {
1587 TCGv_i32 z = tcg_temp_new_i32();
1588
1589 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1590
1591 /*
1592 * (!C & !Z) << 31
1593 * (!(C | Z)) << 31
1594 * ~((C | Z) << 31)
1595 * ~-(C | Z)
1596 * (C | Z) - 1
1597 */
1598 tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1599 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1600
1601 /* !(Z & C) */
1602 tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1603 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1604
1605 /* (!C & Z) << 31 -> -(Z & ~C) */
1606 tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1607 tcg_gen_neg_i32(cpu_VF, cpu_VF);
1608
1609 /* C | Z */
1610 tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1611
1612 tcg_temp_free_i32(z);
1613 }
1614
1615 static void gen_axflag(void)
1616 {
1617 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */
1618 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */
1619
1620 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1621 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1622
1623 tcg_gen_movi_i32(cpu_NF, 0);
1624 tcg_gen_movi_i32(cpu_VF, 0);
1625 }
1626
1627 /* MSR (immediate) - move immediate to processor state field */
1628 static void handle_msr_i(DisasContext *s, uint32_t insn,
1629 unsigned int op1, unsigned int op2, unsigned int crm)
1630 {
1631 TCGv_i32 t1;
1632 int op = op1 << 3 | op2;
1633
1634 /* End the TB by default, chaining is ok. */
1635 s->base.is_jmp = DISAS_TOO_MANY;
1636
1637 switch (op) {
1638 case 0x00: /* CFINV */
1639 if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1640 goto do_unallocated;
1641 }
1642 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1643 s->base.is_jmp = DISAS_NEXT;
1644 break;
1645
1646 case 0x01: /* XAFlag */
1647 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1648 goto do_unallocated;
1649 }
1650 gen_xaflag();
1651 s->base.is_jmp = DISAS_NEXT;
1652 break;
1653
1654 case 0x02: /* AXFlag */
1655 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1656 goto do_unallocated;
1657 }
1658 gen_axflag();
1659 s->base.is_jmp = DISAS_NEXT;
1660 break;
1661
1662 case 0x03: /* UAO */
1663 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1664 goto do_unallocated;
1665 }
1666 if (crm & 1) {
1667 set_pstate_bits(PSTATE_UAO);
1668 } else {
1669 clear_pstate_bits(PSTATE_UAO);
1670 }
1671 t1 = tcg_const_i32(s->current_el);
1672 gen_helper_rebuild_hflags_a64(cpu_env, t1);
1673 tcg_temp_free_i32(t1);
1674 break;
1675
1676 case 0x04: /* PAN */
1677 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1678 goto do_unallocated;
1679 }
1680 if (crm & 1) {
1681 set_pstate_bits(PSTATE_PAN);
1682 } else {
1683 clear_pstate_bits(PSTATE_PAN);
1684 }
1685 t1 = tcg_const_i32(s->current_el);
1686 gen_helper_rebuild_hflags_a64(cpu_env, t1);
1687 tcg_temp_free_i32(t1);
1688 break;
1689
1690 case 0x05: /* SPSel */
1691 if (s->current_el == 0) {
1692 goto do_unallocated;
1693 }
1694 t1 = tcg_const_i32(crm & PSTATE_SP);
1695 gen_helper_msr_i_spsel(cpu_env, t1);
1696 tcg_temp_free_i32(t1);
1697 break;
1698
1699 case 0x1e: /* DAIFSet */
1700 t1 = tcg_const_i32(crm);
1701 gen_helper_msr_i_daifset(cpu_env, t1);
1702 tcg_temp_free_i32(t1);
1703 break;
1704
1705 case 0x1f: /* DAIFClear */
1706 t1 = tcg_const_i32(crm);
1707 gen_helper_msr_i_daifclear(cpu_env, t1);
1708 tcg_temp_free_i32(t1);
1709 /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */
1710 s->base.is_jmp = DISAS_UPDATE_EXIT;
1711 break;
1712
1713 case 0x1c: /* TCO */
1714 if (dc_isar_feature(aa64_mte, s)) {
1715 /* Full MTE is enabled -- set the TCO bit as directed. */
1716 if (crm & 1) {
1717 set_pstate_bits(PSTATE_TCO);
1718 } else {
1719 clear_pstate_bits(PSTATE_TCO);
1720 }
1721 t1 = tcg_const_i32(s->current_el);
1722 gen_helper_rebuild_hflags_a64(cpu_env, t1);
1723 tcg_temp_free_i32(t1);
1724 /* Many factors, including TCO, go into MTE_ACTIVE. */
1725 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1726 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1727 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */
1728 s->base.is_jmp = DISAS_NEXT;
1729 } else {
1730 goto do_unallocated;
1731 }
1732 break;
1733
1734 default:
1735 do_unallocated:
1736 unallocated_encoding(s);
1737 return;
1738 }
1739 }
1740
1741 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1742 {
1743 TCGv_i32 tmp = tcg_temp_new_i32();
1744 TCGv_i32 nzcv = tcg_temp_new_i32();
1745
1746 /* build bit 31, N */
1747 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1748 /* build bit 30, Z */
1749 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1750 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1751 /* build bit 29, C */
1752 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1753 /* build bit 28, V */
1754 tcg_gen_shri_i32(tmp, cpu_VF, 31);
1755 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1756 /* generate result */
1757 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1758
1759 tcg_temp_free_i32(nzcv);
1760 tcg_temp_free_i32(tmp);
1761 }
1762
1763 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1764 {
1765 TCGv_i32 nzcv = tcg_temp_new_i32();
1766
1767 /* take NZCV from R[t] */
1768 tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1769
1770 /* bit 31, N */
1771 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1772 /* bit 30, Z */
1773 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1774 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1775 /* bit 29, C */
1776 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1777 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1778 /* bit 28, V */
1779 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1780 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1781 tcg_temp_free_i32(nzcv);
1782 }
1783
1784 /* MRS - move from system register
1785 * MSR (register) - move to system register
1786 * SYS
1787 * SYSL
1788 * These are all essentially the same insn in 'read' and 'write'
1789 * versions, with varying op0 fields.
1790 */
1791 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1792 unsigned int op0, unsigned int op1, unsigned int op2,
1793 unsigned int crn, unsigned int crm, unsigned int rt)
1794 {
1795 const ARMCPRegInfo *ri;
1796 TCGv_i64 tcg_rt;
1797
1798 ri = get_arm_cp_reginfo(s->cp_regs,
1799 ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1800 crn, crm, op0, op1, op2));
1801
1802 if (!ri) {
1803 /* Unknown register; this might be a guest error or a QEMU
1804 * unimplemented feature.
1805 */
1806 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1807 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1808 isread ? "read" : "write", op0, op1, crn, crm, op2);
1809 unallocated_encoding(s);
1810 return;
1811 }
1812
1813 /* Check access permissions */
1814 if (!cp_access_ok(s->current_el, ri, isread)) {
1815 unallocated_encoding(s);
1816 return;
1817 }
1818
1819 if (ri->accessfn) {
1820 /* Emit code to perform further access permissions checks at
1821 * runtime; this may result in an exception.
1822 */
1823 TCGv_ptr tmpptr;
1824 TCGv_i32 tcg_syn, tcg_isread;
1825 uint32_t syndrome;
1826
1827 gen_a64_set_pc_im(s->pc_curr);
1828 tmpptr = tcg_const_ptr(ri);
1829 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1830 tcg_syn = tcg_const_i32(syndrome);
1831 tcg_isread = tcg_const_i32(isread);
1832 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1833 tcg_temp_free_ptr(tmpptr);
1834 tcg_temp_free_i32(tcg_syn);
1835 tcg_temp_free_i32(tcg_isread);
1836 } else if (ri->type & ARM_CP_RAISES_EXC) {
1837 /*
1838 * The readfn or writefn might raise an exception;
1839 * synchronize the CPU state in case it does.
1840 */
1841 gen_a64_set_pc_im(s->pc_curr);
1842 }
1843
1844 /* Handle special cases first */
1845 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1846 case ARM_CP_NOP:
1847 return;
1848 case ARM_CP_NZCV:
1849 tcg_rt = cpu_reg(s, rt);
1850 if (isread) {
1851 gen_get_nzcv(tcg_rt);
1852 } else {
1853 gen_set_nzcv(tcg_rt);
1854 }
1855 return;
1856 case ARM_CP_CURRENTEL:
1857 /* Reads as current EL value from pstate, which is
1858 * guaranteed to be constant by the tb flags.
1859 */
1860 tcg_rt = cpu_reg(s, rt);
1861 tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1862 return;
1863 case ARM_CP_DC_ZVA:
1864 /* Writes clear the aligned block of memory which rt points into. */
1865 if (s->mte_active[0]) {
1866 TCGv_i32 t_desc;
1867 int desc = 0;
1868
1869 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
1870 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
1871 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
1872 t_desc = tcg_const_i32(desc);
1873
1874 tcg_rt = new_tmp_a64(s);
1875 gen_helper_mte_check_zva(tcg_rt, cpu_env, t_desc, cpu_reg(s, rt));
1876 tcg_temp_free_i32(t_desc);
1877 } else {
1878 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
1879 }
1880 gen_helper_dc_zva(cpu_env, tcg_rt);
1881 return;
1882 case ARM_CP_DC_GVA:
1883 {
1884 TCGv_i64 clean_addr, tag;
1885
1886 /*
1887 * DC_GVA, like DC_ZVA, requires that we supply the original
1888 * pointer for an invalid page. Probe that address first.
1889 */
1890 tcg_rt = cpu_reg(s, rt);
1891 clean_addr = clean_data_tbi(s, tcg_rt);
1892 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
1893
1894 if (s->ata) {
1895 /* Extract the tag from the register to match STZGM. */
1896 tag = tcg_temp_new_i64();
1897 tcg_gen_shri_i64(tag, tcg_rt, 56);
1898 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
1899 tcg_temp_free_i64(tag);
1900 }
1901 }
1902 return;
1903 case ARM_CP_DC_GZVA:
1904 {
1905 TCGv_i64 clean_addr, tag;
1906
1907 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
1908 tcg_rt = cpu_reg(s, rt);
1909 clean_addr = clean_data_tbi(s, tcg_rt);
1910 gen_helper_dc_zva(cpu_env, clean_addr);
1911
1912 if (s->ata) {
1913 /* Extract the tag from the register to match STZGM. */
1914 tag = tcg_temp_new_i64();
1915 tcg_gen_shri_i64(tag, tcg_rt, 56);
1916 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
1917 tcg_temp_free_i64(tag);
1918 }
1919 }
1920 return;
1921 default:
1922 break;
1923 }
1924 if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1925 return;
1926 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1927 return;
1928 }
1929
1930 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1931 gen_io_start();
1932 }
1933
1934 tcg_rt = cpu_reg(s, rt);
1935
1936 if (isread) {
1937 if (ri->type & ARM_CP_CONST) {
1938 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1939 } else if (ri->readfn) {
1940 TCGv_ptr tmpptr;
1941 tmpptr = tcg_const_ptr(ri);
1942 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1943 tcg_temp_free_ptr(tmpptr);
1944 } else {
1945 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1946 }
1947 } else {
1948 if (ri->type & ARM_CP_CONST) {
1949 /* If not forbidden by access permissions, treat as WI */
1950 return;
1951 } else if (ri->writefn) {
1952 TCGv_ptr tmpptr;
1953 tmpptr = tcg_const_ptr(ri);
1954 gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1955 tcg_temp_free_ptr(tmpptr);
1956 } else {
1957 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1958 }
1959 }
1960
1961 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1962 /* I/O operations must end the TB here (whether read or write) */
1963 s->base.is_jmp = DISAS_UPDATE_EXIT;
1964 }
1965 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1966 /*
1967 * A write to any coprocessor regiser that ends a TB
1968 * must rebuild the hflags for the next TB.
1969 */
1970 TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
1971 gen_helper_rebuild_hflags_a64(cpu_env, tcg_el);
1972 tcg_temp_free_i32(tcg_el);
1973 /*
1974 * We default to ending the TB on a coprocessor register write,
1975 * but allow this to be suppressed by the register definition
1976 * (usually only necessary to work around guest bugs).
1977 */
1978 s->base.is_jmp = DISAS_UPDATE_EXIT;
1979 }
1980 }
1981
1982 /* System
1983 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0
1984 * +---------------------+---+-----+-----+-------+-------+-----+------+
1985 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt |
1986 * +---------------------+---+-----+-----+-------+-------+-----+------+
1987 */
1988 static void disas_system(DisasContext *s, uint32_t insn)
1989 {
1990 unsigned int l, op0, op1, crn, crm, op2, rt;
1991 l = extract32(insn, 21, 1);
1992 op0 = extract32(insn, 19, 2);
1993 op1 = extract32(insn, 16, 3);
1994 crn = extract32(insn, 12, 4);
1995 crm = extract32(insn, 8, 4);
1996 op2 = extract32(insn, 5, 3);
1997 rt = extract32(insn, 0, 5);
1998
1999 if (op0 == 0) {
2000 if (l || rt != 31) {
2001 unallocated_encoding(s);
2002 return;
2003 }
2004 switch (crn) {
2005 case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
2006 handle_hint(s, insn, op1, op2, crm);
2007 break;
2008 case 3: /* CLREX, DSB, DMB, ISB */
2009 handle_sync(s, insn, op1, op2, crm);
2010 break;
2011 case 4: /* MSR (immediate) */
2012 handle_msr_i(s, insn, op1, op2, crm);
2013 break;
2014 default:
2015 unallocated_encoding(s);
2016 break;
2017 }
2018 return;
2019 }
2020 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
2021 }
2022
2023 /* Exception generation
2024 *
2025 * 31 24 23 21 20 5 4 2 1 0
2026 * +-----------------+-----+------------------------+-----+----+
2027 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL |
2028 * +-----------------------+------------------------+----------+
2029 */
2030 static void disas_exc(DisasContext *s, uint32_t insn)
2031 {
2032 int opc = extract32(insn, 21, 3);
2033 int op2_ll = extract32(insn, 0, 5);
2034 int imm16 = extract32(insn, 5, 16);
2035 TCGv_i32 tmp;
2036
2037 switch (opc) {
2038 case 0:
2039 /* For SVC, HVC and SMC we advance the single-step state
2040 * machine before taking the exception. This is architecturally
2041 * mandated, to ensure that single-stepping a system call
2042 * instruction works properly.
2043 */
2044 switch (op2_ll) {
2045 case 1: /* SVC */
2046 gen_ss_advance(s);
2047 gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
2048 syn_aa64_svc(imm16), default_exception_el(s));
2049 break;
2050 case 2: /* HVC */
2051 if (s->current_el == 0) {
2052 unallocated_encoding(s);
2053 break;
2054 }
2055 /* The pre HVC helper handles cases when HVC gets trapped
2056 * as an undefined insn by runtime configuration.
2057 */
2058 gen_a64_set_pc_im(s->pc_curr);
2059 gen_helper_pre_hvc(cpu_env);
2060 gen_ss_advance(s);
2061 gen_exception_insn(s, s->base.pc_next, EXCP_HVC,
2062 syn_aa64_hvc(imm16), 2);
2063 break;
2064 case 3: /* SMC */
2065 if (s->current_el == 0) {
2066 unallocated_encoding(s);
2067 break;
2068 }
2069 gen_a64_set_pc_im(s->pc_curr);
2070 tmp = tcg_const_i32(syn_aa64_smc(imm16));
2071 gen_helper_pre_smc(cpu_env, tmp);
2072 tcg_temp_free_i32(tmp);
2073 gen_ss_advance(s);
2074 gen_exception_insn(s, s->base.pc_next, EXCP_SMC,
2075 syn_aa64_smc(imm16), 3);
2076 break;
2077 default:
2078 unallocated_encoding(s);
2079 break;
2080 }
2081 break;
2082 case 1:
2083 if (op2_ll != 0) {
2084 unallocated_encoding(s);
2085 break;
2086 }
2087 /* BRK */
2088 gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2089 break;
2090 case 2:
2091 if (op2_ll != 0) {
2092 unallocated_encoding(s);
2093 break;
2094 }
2095 /* HLT. This has two purposes.
2096 * Architecturally, it is an external halting debug instruction.
2097 * Since QEMU doesn't implement external debug, we treat this as
2098 * it is required for halting debug disabled: it will UNDEF.
2099 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2100 */
2101 if (semihosting_enabled() && imm16 == 0xf000) {
2102 #ifndef CONFIG_USER_ONLY
2103 /* In system mode, don't allow userspace access to semihosting,
2104 * to provide some semblance of security (and for consistency
2105 * with our 32-bit semihosting).
2106 */
2107 if (s->current_el == 0) {
2108 unsupported_encoding(s, insn);
2109 break;
2110 }
2111 #endif
2112 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
2113 } else {
2114 unsupported_encoding(s, insn);
2115 }
2116 break;
2117 case 5:
2118 if (op2_ll < 1 || op2_ll > 3) {
2119 unallocated_encoding(s);
2120 break;
2121 }
2122 /* DCPS1, DCPS2, DCPS3 */
2123 unsupported_encoding(s, insn);
2124 break;
2125 default:
2126 unallocated_encoding(s);
2127 break;
2128 }
2129 }
2130
2131 /* Unconditional branch (register)
2132 * 31 25 24 21 20 16 15 10 9 5 4 0
2133 * +---------------+-------+-------+-------+------+-------+
2134 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 |
2135 * +---------------+-------+-------+-------+------+-------+
2136 */
2137 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
2138 {
2139 unsigned int opc, op2, op3, rn, op4;
2140 unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */
2141 TCGv_i64 dst;
2142 TCGv_i64 modifier;
2143
2144 opc = extract32(insn, 21, 4);
2145 op2 = extract32(insn, 16, 5);
2146 op3 = extract32(insn, 10, 6);
2147 rn = extract32(insn, 5, 5);
2148 op4 = extract32(insn, 0, 5);
2149
2150 if (op2 != 0x1f) {
2151 goto do_unallocated;
2152 }
2153
2154 switch (opc) {
2155 case 0: /* BR */
2156 case 1: /* BLR */
2157 case 2: /* RET */
2158 btype_mod = opc;
2159 switch (op3) {
2160 case 0:
2161 /* BR, BLR, RET */
2162 if (op4 != 0) {
2163 goto do_unallocated;
2164 }
2165 dst = cpu_reg(s, rn);
2166 break;
2167
2168 case 2:
2169 case 3:
2170 if (!dc_isar_feature(aa64_pauth, s)) {
2171 goto do_unallocated;
2172 }
2173 if (opc == 2) {
2174 /* RETAA, RETAB */
2175 if (rn != 0x1f || op4 != 0x1f) {
2176 goto do_unallocated;
2177 }
2178 rn = 30;
2179 modifier = cpu_X[31];
2180 } else {
2181 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2182 if (op4 != 0x1f) {
2183 goto do_unallocated;
2184 }
2185 modifier = new_tmp_a64_zero(s);
2186 }
2187 if (s->pauth_active) {
2188 dst = new_tmp_a64(s);
2189 if (op3 == 2) {
2190 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2191 } else {
2192 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2193 }
2194 } else {
2195 dst = cpu_reg(s, rn);
2196 }
2197 break;
2198
2199 default:
2200 goto do_unallocated;
2201 }
2202 gen_a64_set_pc(s, dst);
2203 /* BLR also needs to load return address */
2204 if (opc == 1) {
2205 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2206 }
2207 break;
2208
2209 case 8: /* BRAA */
2210 case 9: /* BLRAA */
2211 if (!dc_isar_feature(aa64_pauth, s)) {
2212 goto do_unallocated;
2213 }
2214 if ((op3 & ~1) != 2) {
2215 goto do_unallocated;
2216 }
2217 btype_mod = opc & 1;
2218 if (s->pauth_active) {
2219 dst = new_tmp_a64(s);
2220 modifier = cpu_reg_sp(s, op4);
2221 if (op3 == 2) {
2222 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2223 } else {
2224 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2225 }
2226 } else {
2227 dst = cpu_reg(s, rn);
2228 }
2229 gen_a64_set_pc(s, dst);
2230 /* BLRAA also needs to load return address */
2231 if (opc == 9) {
2232 tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2233 }
2234 break;
2235
2236 case 4: /* ERET */
2237 if (s->current_el == 0) {
2238 goto do_unallocated;
2239 }
2240 switch (op3) {
2241 case 0: /* ERET */
2242 if (op4 != 0) {
2243 goto do_unallocated;
2244 }
2245 dst = tcg_temp_new_i64();
2246 tcg_gen_ld_i64(dst, cpu_env,
2247 offsetof(CPUARMState, elr_el[s->current_el]));
2248 break;
2249
2250 case 2: /* ERETAA */
2251 case 3: /* ERETAB */
2252 if (!dc_isar_feature(aa64_pauth, s)) {
2253 goto do_unallocated;
2254 }
2255 if (rn != 0x1f || op4 != 0x1f) {
2256 goto do_unallocated;
2257 }
2258 dst = tcg_temp_new_i64();
2259 tcg_gen_ld_i64(dst, cpu_env,
2260 offsetof(CPUARMState, elr_el[s->current_el]));
2261 if (s->pauth_active) {
2262 modifier = cpu_X[31];
2263 if (op3 == 2) {
2264 gen_helper_autia(dst, cpu_env, dst, modifier);
2265 } else {
2266 gen_helper_autib(dst, cpu_env, dst, modifier);
2267 }
2268 }
2269 break;
2270
2271 default:
2272 goto do_unallocated;
2273 }
2274 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2275 gen_io_start();
2276 }
2277
2278 gen_helper_exception_return(cpu_env, dst);
2279 tcg_temp_free_i64(dst);
2280 /* Must exit loop to check un-masked IRQs */
2281 s->base.is_jmp = DISAS_EXIT;
2282 return;
2283
2284 case 5: /* DRPS */
2285 if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2286 goto do_unallocated;
2287 } else {
2288 unsupported_encoding(s, insn);
2289 }
2290 return;
2291
2292 default:
2293 do_unallocated:
2294 unallocated_encoding(s);
2295 return;
2296 }
2297
2298 switch (btype_mod) {
2299 case 0: /* BR */
2300 if (dc_isar_feature(aa64_bti, s)) {
2301 /* BR to {x16,x17} or !guard -> 1, else 3. */
2302 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2303 }
2304 break;
2305
2306 case 1: /* BLR */
2307 if (dc_isar_feature(aa64_bti, s)) {
2308 /* BLR sets BTYPE to 2, regardless of source guarded page. */
2309 set_btype(s, 2);
2310 }
2311 break;
2312
2313 default: /* RET or none of the above. */
2314 /* BTYPE will be set to 0 by normal end-of-insn processing. */
2315 break;
2316 }
2317
2318 s->base.is_jmp = DISAS_JUMP;
2319 }
2320
2321 /* Branches, exception generating and system instructions */
2322 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2323 {
2324 switch (extract32(insn, 25, 7)) {
2325 case 0x0a: case 0x0b:
2326 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2327 disas_uncond_b_imm(s, insn);
2328 break;
2329 case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2330 disas_comp_b_imm(s, insn);
2331 break;
2332 case 0x1b: case 0x5b: /* Test & branch (immediate) */
2333 disas_test_b_imm(s, insn);
2334 break;
2335 case 0x2a: /* Conditional branch (immediate) */
2336 disas_cond_b_imm(s, insn);
2337 break;
2338 case 0x6a: /* Exception generation / System */
2339 if (insn & (1 << 24)) {
2340 if (extract32(insn, 22, 2) == 0) {
2341 disas_system(s, insn);
2342 } else {
2343 unallocated_encoding(s);
2344 }
2345 } else {
2346 disas_exc(s, insn);
2347 }
2348 break;
2349 case 0x6b: /* Unconditional branch (register) */
2350 disas_uncond_b_reg(s, insn);
2351 break;
2352 default:
2353 unallocated_encoding(s);
2354 break;
2355 }
2356 }
2357
2358 /*
2359 * Load/Store exclusive instructions are implemented by remembering
2360 * the value/address loaded, and seeing if these are the same
2361 * when the store is performed. This is not actually the architecturally
2362 * mandated semantics, but it works for typical guest code sequences
2363 * and avoids having to monitor regular stores.
2364 *
2365 * The store exclusive uses the atomic cmpxchg primitives to avoid
2366 * races in multi-threaded linux-user and when MTTCG softmmu is
2367 * enabled.
2368 */
2369 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2370 TCGv_i64 addr, int size, bool is_pair)
2371 {
2372 int idx = get_mem_index(s);
2373 MemOp memop = s->be_data;
2374
2375 g_assert(size <= 3);
2376 if (is_pair) {
2377 g_assert(size >= 2);
2378 if (size == 2) {
2379 /* The pair must be single-copy atomic for the doubleword. */
2380 memop |= MO_64 | MO_ALIGN;
2381 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2382 if (s->be_data == MO_LE) {
2383 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2384 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2385 } else {
2386 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2387 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2388 }
2389 } else {
2390 /* The pair must be single-copy atomic for *each* doubleword, not
2391 the entire quadword, however it must be quadword aligned. */
2392 memop |= MO_64;
2393 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2394 memop | MO_ALIGN_16);
2395
2396 TCGv_i64 addr2 = tcg_temp_new_i64();
2397 tcg_gen_addi_i64(addr2, addr, 8);
2398 tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2399 tcg_temp_free_i64(addr2);
2400
2401 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2402 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2403 }
2404 } else {
2405 memop |= size | MO_ALIGN;
2406 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2407 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2408 }
2409 tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2410 }
2411
2412 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2413 TCGv_i64 addr, int size, int is_pair)
2414 {
2415 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2416 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
2417 * [addr] = {Rt};
2418 * if (is_pair) {
2419 * [addr + datasize] = {Rt2};
2420 * }
2421 * {Rd} = 0;
2422 * } else {
2423 * {Rd} = 1;
2424 * }
2425 * env->exclusive_addr = -1;
2426 */
2427 TCGLabel *fail_label = gen_new_label();
2428 TCGLabel *done_label = gen_new_label();
2429 TCGv_i64 tmp;
2430
2431 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2432
2433 tmp = tcg_temp_new_i64();
2434 if (is_pair) {
2435 if (size == 2) {
2436 if (s->be_data == MO_LE) {
2437 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2438 } else {
2439 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2440 }
2441 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2442 cpu_exclusive_val, tmp,
2443 get_mem_index(s),
2444 MO_64 | MO_ALIGN | s->be_data);
2445 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2446 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2447 if (!HAVE_CMPXCHG128) {
2448 gen_helper_exit_atomic(cpu_env);
2449 s->base.is_jmp = DISAS_NORETURN;
2450 } else if (s->be_data == MO_LE) {
2451 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2452 cpu_exclusive_addr,
2453 cpu_reg(s, rt),
2454 cpu_reg(s, rt2));
2455 } else {
2456 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2457 cpu_exclusive_addr,
2458 cpu_reg(s, rt),
2459 cpu_reg(s, rt2));
2460 }
2461 } else if (s->be_data == MO_LE) {
2462 gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2463 cpu_reg(s, rt), cpu_reg(s, rt2));
2464 } else {
2465 gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2466 cpu_reg(s, rt), cpu_reg(s, rt2));
2467 }
2468 } else {
2469 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2470 cpu_reg(s, rt), get_mem_index(s),
2471 size | MO_ALIGN | s->be_data);
2472 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2473 }
2474 tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2475 tcg_temp_free_i64(tmp);
2476 tcg_gen_br(done_label);
2477
2478 gen_set_label(fail_label);
2479 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2480 gen_set_label(done_label);
2481 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2482 }
2483
2484 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2485 int rn, int size)
2486 {
2487 TCGv_i64 tcg_rs = cpu_reg(s, rs);
2488 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2489 int memidx = get_mem_index(s);
2490 TCGv_i64 clean_addr;
2491
2492 if (rn == 31) {
2493 gen_check_sp_alignment(s);
2494 }
2495 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
2496 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2497 size | MO_ALIGN | s->be_data);
2498 }
2499
2500 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2501 int rn, int size)
2502 {
2503 TCGv_i64 s1 = cpu_reg(s, rs);
2504 TCGv_i64 s2 = cpu_reg(s, rs + 1);
2505 TCGv_i64 t1 = cpu_reg(s, rt);
2506 TCGv_i64 t2 = cpu_reg(s, rt + 1);
2507 TCGv_i64 clean_addr;
2508 int memidx = get_mem_index(s);
2509
2510 if (rn == 31) {
2511 gen_check_sp_alignment(s);
2512 }
2513
2514 /* This is a single atomic access, despite the "pair". */
2515 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
2516
2517 if (size == 2) {
2518 TCGv_i64 cmp = tcg_temp_new_i64();
2519 TCGv_i64 val = tcg_temp_new_i64();
2520
2521 if (s->be_data == MO_LE) {
2522 tcg_gen_concat32_i64(val, t1, t2);
2523 tcg_gen_concat32_i64(cmp, s1, s2);
2524 } else {
2525 tcg_gen_concat32_i64(val, t2, t1);
2526 tcg_gen_concat32_i64(cmp, s2, s1);
2527 }
2528
2529 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2530 MO_64 | MO_ALIGN | s->be_data);
2531 tcg_temp_free_i64(val);
2532
2533 if (s->be_data == MO_LE) {
2534 tcg_gen_extr32_i64(s1, s2, cmp);
2535 } else {
2536 tcg_gen_extr32_i64(s2, s1, cmp);
2537 }
2538 tcg_temp_free_i64(cmp);
2539 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2540 if (HAVE_CMPXCHG128) {
2541 TCGv_i32 tcg_rs = tcg_const_i32(rs);
2542 if (s->be_data == MO_LE) {
2543 gen_helper_casp_le_parallel(cpu_env, tcg_rs,
2544 clean_addr, t1, t2);
2545 } else {
2546 gen_helper_casp_be_parallel(cpu_env, tcg_rs,
2547 clean_addr, t1, t2);
2548 }
2549 tcg_temp_free_i32(tcg_rs);
2550 } else {
2551 gen_helper_exit_atomic(cpu_env);
2552 s->base.is_jmp = DISAS_NORETURN;
2553 }
2554 } else {
2555 TCGv_i64 d1 = tcg_temp_new_i64();
2556 TCGv_i64 d2 = tcg_temp_new_i64();
2557 TCGv_i64 a2 = tcg_temp_new_i64();
2558 TCGv_i64 c1 = tcg_temp_new_i64();
2559 TCGv_i64 c2 = tcg_temp_new_i64();
2560 TCGv_i64 zero = tcg_const_i64(0);
2561
2562 /* Load the two words, in memory order. */
2563 tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
2564 MO_64 | MO_ALIGN_16 | s->be_data);
2565 tcg_gen_addi_i64(a2, clean_addr, 8);
2566 tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
2567
2568 /* Compare the two words, also in memory order. */
2569 tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2570 tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2571 tcg_gen_and_i64(c2, c2, c1);
2572
2573 /* If compare equal, write back new data, else write back old data. */
2574 tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2575 tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2576 tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
2577 tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2578 tcg_temp_free_i64(a2);
2579 tcg_temp_free_i64(c1);
2580 tcg_temp_free_i64(c2);
2581 tcg_temp_free_i64(zero);
2582
2583 /* Write back the data from memory to Rs. */
2584 tcg_gen_mov_i64(s1, d1);
2585 tcg_gen_mov_i64(s2, d2);
2586 tcg_temp_free_i64(d1);
2587 tcg_temp_free_i64(d2);
2588 }
2589 }
2590
2591 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2592 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2593 */
2594 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2595 {
2596 int opc0 = extract32(opc, 0, 1);
2597 int regsize;
2598
2599 if (is_signed) {
2600 regsize = opc0 ? 32 : 64;
2601 } else {
2602 regsize = size == 3 ? 64 : 32;
2603 }
2604 return regsize == 64;
2605 }
2606
2607 /* Load/store exclusive
2608 *
2609 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0
2610 * +-----+-------------+----+---+----+------+----+-------+------+------+
2611 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt |
2612 * +-----+-------------+----+---+----+------+----+-------+------+------+
2613 *
2614 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2615 * L: 0 -> store, 1 -> load
2616 * o2: 0 -> exclusive, 1 -> not
2617 * o1: 0 -> single register, 1 -> register pair
2618 * o0: 1 -> load-acquire/store-release, 0 -> not
2619 */
2620 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2621 {
2622 int rt = extract32(insn, 0, 5);
2623 int rn = extract32(insn, 5, 5);
2624 int rt2 = extract32(insn, 10, 5);
2625 int rs = extract32(insn, 16, 5);
2626 int is_lasr = extract32(insn, 15, 1);
2627 int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2628 int size = extract32(insn, 30, 2);
2629 TCGv_i64 clean_addr;
2630
2631 switch (o2_L_o1_o0) {
2632 case 0x0: /* STXR */
2633 case 0x1: /* STLXR */
2634 if (rn == 31) {
2635 gen_check_sp_alignment(s);
2636 }
2637 if (is_lasr) {
2638 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2639 }
2640 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2641 true, rn != 31, size);
2642 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2643 return;
2644
2645 case 0x4: /* LDXR */
2646 case 0x5: /* LDAXR */
2647 if (rn == 31) {
2648 gen_check_sp_alignment(s);
2649 }
2650 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2651 false, rn != 31, size);
2652 s->is_ldex = true;
2653 gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2654 if (is_lasr) {
2655 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2656 }
2657 return;
2658
2659 case 0x8: /* STLLR */
2660 if (!dc_isar_feature(aa64_lor, s)) {
2661 break;
2662 }
2663 /* StoreLORelease is the same as Store-Release for QEMU. */
2664 /* fall through */
2665 case 0x9: /* STLR */
2666 /* Generate ISS for non-exclusive accesses including LASR. */
2667 if (rn == 31) {
2668 gen_check_sp_alignment(s);
2669 }
2670 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2671 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2672 true, rn != 31, size);
2673 do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
2674 disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2675 return;
2676
2677 case 0xc: /* LDLAR */
2678 if (!dc_isar_feature(aa64_lor, s)) {
2679 break;
2680 }
2681 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
2682 /* fall through */
2683 case 0xd: /* LDAR */
2684 /* Generate ISS for non-exclusive accesses including LASR. */
2685 if (rn == 31) {
2686 gen_check_sp_alignment(s);
2687 }
2688 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2689 false, rn != 31, size);
2690 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
2691 disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2692 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2693 return;
2694
2695 case 0x2: case 0x3: /* CASP / STXP */
2696 if (size & 2) { /* STXP / STLXP */
2697 if (rn == 31) {
2698 gen_check_sp_alignment(s);
2699 }
2700 if (is_lasr) {
2701 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2702 }
2703 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2704 true, rn != 31, size);
2705 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2706 return;
2707 }
2708 if (rt2 == 31
2709 && ((rt | rs) & 1) == 0
2710 && dc_isar_feature(aa64_atomics, s)) {
2711 /* CASP / CASPL */
2712 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2713 return;
2714 }
2715 break;
2716
2717 case 0x6: case 0x7: /* CASPA / LDXP */
2718 if (size & 2) { /* LDXP / LDAXP */
2719 if (rn == 31) {
2720 gen_check_sp_alignment(s);
2721 }
2722 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2723 false, rn != 31, size);
2724 s->is_ldex = true;
2725 gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2726 if (is_lasr) {
2727 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2728 }
2729 return;
2730 }
2731 if (rt2 == 31
2732 && ((rt | rs) & 1) == 0
2733 && dc_isar_feature(aa64_atomics, s)) {
2734 /* CASPA / CASPAL */
2735 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2736 return;
2737 }
2738 break;
2739
2740 case 0xa: /* CAS */
2741 case 0xb: /* CASL */
2742 case 0xe: /* CASA */
2743 case 0xf: /* CASAL */
2744 if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2745 gen_compare_and_swap(s, rs, rt, rn, size);
2746 return;
2747 }
2748 break;
2749 }
2750 unallocated_encoding(s);
2751 }
2752
2753 /*
2754 * Load register (literal)
2755 *
2756 * 31 30 29 27 26 25 24 23 5 4 0
2757 * +-----+-------+---+-----+-------------------+-------+
2758 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt |
2759 * +-----+-------+---+-----+-------------------+-------+
2760 *
2761 * V: 1 -> vector (simd/fp)
2762 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2763 * 10-> 32 bit signed, 11 -> prefetch
2764 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2765 */
2766 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2767 {
2768 int rt = extract32(insn, 0, 5);
2769 int64_t imm = sextract32(insn, 5, 19) << 2;
2770 bool is_vector = extract32(insn, 26, 1);
2771 int opc = extract32(insn, 30, 2);
2772 bool is_signed = false;
2773 int size = 2;
2774 TCGv_i64 tcg_rt, clean_addr;
2775
2776 if (is_vector) {
2777 if (opc == 3) {
2778 unallocated_encoding(s);
2779 return;
2780 }
2781 size = 2 + opc;
2782 if (!fp_access_check(s)) {
2783 return;
2784 }
2785 } else {
2786 if (opc == 3) {
2787 /* PRFM (literal) : prefetch */
2788 return;
2789 }
2790 size = 2 + extract32(opc, 0, 1);
2791 is_signed = extract32(opc, 1, 1);
2792 }
2793
2794 tcg_rt = cpu_reg(s, rt);
2795
2796 clean_addr = tcg_const_i64(s->pc_curr + imm);
2797 if (is_vector) {
2798 do_fp_ld(s, rt, clean_addr, size);
2799 } else {
2800 /* Only unsigned 32bit loads target 32bit registers. */
2801 bool iss_sf = opc != 0;
2802
2803 do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
2804 true, rt, iss_sf, false);
2805 }
2806 tcg_temp_free_i64(clean_addr);
2807 }
2808
2809 /*
2810 * LDNP (Load Pair - non-temporal hint)
2811 * LDP (Load Pair - non vector)
2812 * LDPSW (Load Pair Signed Word - non vector)
2813 * STNP (Store Pair - non-temporal hint)
2814 * STP (Store Pair - non vector)
2815 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2816 * LDP (Load Pair of SIMD&FP)
2817 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2818 * STP (Store Pair of SIMD&FP)
2819 *
2820 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0
2821 * +-----+-------+---+---+-------+---+-----------------------------+
2822 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt |
2823 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2824 *
2825 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
2826 * LDPSW/STGP 01
2827 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2828 * V: 0 -> GPR, 1 -> Vector
2829 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2830 * 10 -> signed offset, 11 -> pre-index
2831 * L: 0 -> Store 1 -> Load
2832 *
2833 * Rt, Rt2 = GPR or SIMD registers to be stored
2834 * Rn = general purpose register containing address
2835 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2836 */
2837 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2838 {
2839 int rt = extract32(insn, 0, 5);
2840 int rn = extract32(insn, 5, 5);
2841 int rt2 = extract32(insn, 10, 5);
2842 uint64_t offset = sextract64(insn, 15, 7);
2843 int index = extract32(insn, 23, 2);
2844 bool is_vector = extract32(insn, 26, 1);
2845 bool is_load = extract32(insn, 22, 1);
2846 int opc = extract32(insn, 30, 2);
2847
2848 bool is_signed = false;
2849 bool postindex = false;
2850 bool wback = false;
2851 bool set_tag = false;
2852
2853 TCGv_i64 clean_addr, dirty_addr;
2854
2855 int size;
2856
2857 if (opc == 3) {
2858 unallocated_encoding(s);
2859 return;
2860 }
2861
2862 if (is_vector) {
2863 size = 2 + opc;
2864 } else if (opc == 1 && !is_load) {
2865 /* STGP */
2866 if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2867 unallocated_encoding(s);
2868 return;
2869 }
2870 size = 3;
2871 set_tag = true;
2872 } else {
2873 size = 2 + extract32(opc, 1, 1);
2874 is_signed = extract32(opc, 0, 1);
2875 if (!is_load && is_signed) {
2876 unallocated_encoding(s);
2877 return;
2878 }
2879 }
2880
2881 switch (index) {
2882 case 1: /* post-index */
2883 postindex = true;
2884 wback = true;
2885 break;
2886 case 0:
2887 /* signed offset with "non-temporal" hint. Since we don't emulate
2888 * caches we don't care about hints to the cache system about
2889 * data access patterns, and handle this identically to plain
2890 * signed offset.
2891 */
2892 if (is_signed) {
2893 /* There is no non-temporal-hint version of LDPSW */
2894 unallocated_encoding(s);
2895 return;
2896 }
2897 postindex = false;
2898 break;
2899 case 2: /* signed offset, rn not updated */
2900 postindex = false;
2901 break;
2902 case 3: /* pre-index */
2903 postindex = false;
2904 wback = true;
2905 break;
2906 }
2907
2908 if (is_vector && !fp_access_check(s)) {
2909 return;
2910 }
2911
2912 offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2913
2914 if (rn == 31) {
2915 gen_check_sp_alignment(s);
2916 }
2917
2918 dirty_addr = read_cpu_reg_sp(s, rn, 1);
2919 if (!postindex) {
2920 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2921 }
2922
2923 if (set_tag) {
2924 if (!s->ata) {
2925 /*
2926 * TODO: We could rely on the stores below, at least for
2927 * system mode, if we arrange to add MO_ALIGN_16.
2928 */
2929 gen_helper_stg_stub(cpu_env, dirty_addr);
2930 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2931 gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2932 } else {
2933 gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2934 }
2935 }
2936
2937 clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2938 (wback || rn != 31) && !set_tag,
2939 size, 2 << size);
2940
2941 if (is_vector) {
2942 if (is_load) {
2943 do_fp_ld(s, rt, clean_addr, size);
2944 } else {
2945 do_fp_st(s, rt, clean_addr, size);
2946 }
2947 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2948 if (is_load) {
2949 do_fp_ld(s, rt2, clean_addr, size);
2950 } else {
2951 do_fp_st(s, rt2, clean_addr, size);
2952 }
2953 } else {
2954 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2955 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2956
2957 if (is_load) {
2958 TCGv_i64 tmp = tcg_temp_new_i64();
2959
2960 /* Do not modify tcg_rt before recognizing any exception
2961 * from the second load.
2962 */
2963 do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
2964 false, 0, false, false);
2965 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2966 do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
2967 false, 0, false, false);
2968
2969 tcg_gen_mov_i64(tcg_rt, tmp);
2970 tcg_temp_free_i64(tmp);
2971 } else {
2972 do_gpr_st(s, tcg_rt, clean_addr, size,
2973 false, 0, false, false);
2974 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2975 do_gpr_st(s, tcg_rt2, clean_addr, size,
2976 false, 0, false, false);
2977 }
2978 }
2979
2980 if (wback) {
2981 if (postindex) {
2982 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2983 }
2984 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2985 }
2986 }
2987
2988 /*
2989 * Load/store (immediate post-indexed)
2990 * Load/store (immediate pre-indexed)
2991 * Load/store (unscaled immediate)
2992 *
2993 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0
2994 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2995 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt |
2996 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2997 *
2998 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2999 10 -> unprivileged
3000 * V = 0 -> non-vector
3001 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
3002 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3003 */
3004 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
3005 int opc,
3006 int size,
3007 int rt,
3008 bool is_vector)
3009 {
3010 int rn = extract32(insn, 5, 5);
3011 int imm9 = sextract32(insn, 12, 9);
3012 int idx = extract32(insn, 10, 2);
3013 bool is_signed = false;
3014 bool is_store = false;
3015 bool is_extended = false;
3016 bool is_unpriv = (idx == 2);
3017 bool iss_valid = !is_vector;
3018 bool post_index;
3019 bool writeback;
3020 int memidx;
3021
3022 TCGv_i64 clean_addr, dirty_addr;
3023
3024 if (is_vector) {
3025 size |= (opc & 2) << 1;
3026 if (size > 4 || is_unpriv) {
3027 unallocated_encoding(s);
3028 return;
3029 }
3030 is_store = ((opc & 1) == 0);
3031 if (!fp_access_check(s)) {
3032 return;
3033 }
3034 } else {
3035 if (size == 3 && opc == 2) {
3036 /* PRFM - prefetch */
3037 if (idx != 0) {
3038 unallocated_encoding(s);
3039 return;
3040 }
3041 return;
3042 }
3043 if (opc == 3 && size > 1) {
3044 unallocated_encoding(s);
3045 return;
3046 }
3047 is_store = (opc == 0);
3048 is_signed = extract32(opc, 1, 1);
3049 is_extended = (size < 3) && extract32(opc, 0, 1);
3050 }
3051
3052 switch (idx) {
3053 case 0:
3054 case 2:
3055 post_index = false;
3056 writeback = false;
3057 break;
3058 case 1:
3059 post_index = true;
3060 writeback = true;
3061 break;
3062 case 3:
3063 post_index = false;
3064 writeback = true;
3065 break;
3066 default:
3067 g_assert_not_reached();
3068 }
3069
3070 if (rn == 31) {
3071 gen_check_sp_alignment(s);
3072 }
3073
3074 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3075 if (!post_index) {
3076 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3077 }
3078
3079 memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3080 clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3081 writeback || rn != 31,
3082 size, is_unpriv, memidx);
3083
3084 if (is_vector) {
3085 if (is_store) {
3086 do_fp_st(s, rt, clean_addr, size);
3087 } else {
3088 do_fp_ld(s, rt, clean_addr, size);
3089 }
3090 } else {
3091 TCGv_i64 tcg_rt = cpu_reg(s, rt);
3092 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3093
3094 if (is_store) {
3095 do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
3096 iss_valid, rt, iss_sf, false);
3097 } else {
3098 do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
3099 is_signed, is_extended, memidx,
3100 iss_valid, rt, iss_sf, false);
3101 }
3102 }
3103
3104 if (writeback) {
3105 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3106 if (post_index) {
3107 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3108 }
3109 tcg_gen_mov_i64(tcg_rn, dirty_addr);
3110 }
3111 }
3112
3113 /*
3114 * Load/store (register offset)
3115 *
3116 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0
3117 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3118 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt |
3119 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3120 *
3121 * For non-vector:
3122 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3123 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3124 * For vector:
3125 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3126 * opc<0>: 0 -> store, 1 -> load
3127 * V: 1 -> vector/simd
3128 * opt: extend encoding (see DecodeRegExtend)
3129 * S: if S=1 then scale (essentially index by sizeof(size))
3130 * Rt: register to transfer into/out of
3131 * Rn: address register or SP for base
3132 * Rm: offset register or ZR for offset
3133 */
3134 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3135 int opc,
3136 int size,
3137 int rt,
3138 bool is_vector)
3139 {
3140 int rn = extract32(insn, 5, 5);
3141 int shift = extract32(insn, 12, 1);
3142 int rm = extract32(insn, 16, 5);
3143 int opt = extract32(insn, 13, 3);
3144 bool is_signed = false;
3145 bool is_store = false;
3146 bool is_extended = false;
3147
3148 TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3149
3150 if (extract32(opt, 1, 1) == 0) {
3151 unallocated_encoding(s);
3152 return;
3153 }
3154
3155 if (is_vector) {
3156 size |= (opc & 2) << 1;
3157 if (size > 4) {
3158 unallocated_encoding(s);
3159 return;
3160 }
3161 is_store = !extract32(opc, 0, 1);
3162 if (!fp_access_check(s)) {
3163 return;
3164 }
3165 } else {
3166 if (size == 3 && opc == 2) {
3167 /* PRFM - prefetch */
3168 return;
3169 }
3170 if (opc == 3 && size > 1) {
3171 unallocated_encoding(s);
3172 return;
3173 }
3174 is_store = (opc == 0);
3175 is_signed = extract32(opc, 1, 1);
3176 is_extended = (size < 3) && extract32(opc, 0, 1);
3177 }
3178
3179 if (rn == 31) {
3180 gen_check_sp_alignment(s);
3181 }
3182 dirty_addr = read_cpu_reg_sp(s, rn, 1);
3183
3184 tcg_rm = read_cpu_reg(s, rm, 1);
3185 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3186
3187 tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3188 clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
3189
3190 if (is_vector) {
3191 if (is_store) {
3192 do_fp_st(s, rt, clean_addr, size);
3193 } else {
3194 do_fp_ld(s, rt, clean_addr, size);
3195 }