target/arm: Implement SVE load and broadcast element
[qemu.git] / target / arm / translate-sve.c
1 /*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35
36
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
46
47 /*
48 * Helpers for extracting complex instruction fields.
49 */
50
51 /* See e.g. ASR (immediate, predicated).
52 * Returns -1 for unallocated encoding; diagnose later.
53 */
54 static int tszimm_esz(int x)
55 {
56 x >>= 3; /* discard imm3 */
57 return 31 - clz32(x);
58 }
59
60 static int tszimm_shr(int x)
61 {
62 return (16 << tszimm_esz(x)) - x;
63 }
64
65 /* See e.g. LSL (immediate, predicated). */
66 static int tszimm_shl(int x)
67 {
68 return x - (8 << tszimm_esz(x));
69 }
70
71 static inline int plus1(int x)
72 {
73 return x + 1;
74 }
75
76 /* The SH bit is in bit 8. Extract the low 8 and shift. */
77 static inline int expand_imm_sh8s(int x)
78 {
79 return (int8_t)x << (x & 0x100 ? 8 : 0);
80 }
81
82 static inline int expand_imm_sh8u(int x)
83 {
84 return (uint8_t)x << (x & 0x100 ? 8 : 0);
85 }
86
87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
88 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
89 */
90 static inline int msz_dtype(int msz)
91 {
92 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
93 return dtype[msz];
94 }
95
96 /*
97 * Include the generated decoder.
98 */
99
100 #include "decode-sve.inc.c"
101
102 /*
103 * Implement all of the translator functions referenced by the decoder.
104 */
105
106 /* Return the offset info CPUARMState of the predicate vector register Pn.
107 * Note for this purpose, FFR is P16.
108 */
109 static inline int pred_full_reg_offset(DisasContext *s, int regno)
110 {
111 return offsetof(CPUARMState, vfp.pregs[regno]);
112 }
113
114 /* Return the byte size of the whole predicate register, VL / 64. */
115 static inline int pred_full_reg_size(DisasContext *s)
116 {
117 return s->sve_len >> 3;
118 }
119
120 /* Round up the size of a register to a size allowed by
121 * the tcg vector infrastructure. Any operation which uses this
122 * size may assume that the bits above pred_full_reg_size are zero,
123 * and must leave them the same way.
124 *
125 * Note that this is not needed for the vector registers as they
126 * are always properly sized for tcg vectors.
127 */
128 static int size_for_gvec(int size)
129 {
130 if (size <= 8) {
131 return 8;
132 } else {
133 return QEMU_ALIGN_UP(size, 16);
134 }
135 }
136
137 static int pred_gvec_reg_size(DisasContext *s)
138 {
139 return size_for_gvec(pred_full_reg_size(s));
140 }
141
142 /* Invoke a vector expander on two Zregs. */
143 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
144 int esz, int rd, int rn)
145 {
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 gvec_fn(esz, vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn), vsz, vsz);
150 }
151 return true;
152 }
153
154 /* Invoke a vector expander on three Zregs. */
155 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
156 int esz, int rd, int rn, int rm)
157 {
158 if (sve_access_check(s)) {
159 unsigned vsz = vec_full_reg_size(s);
160 gvec_fn(esz, vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm), vsz, vsz);
163 }
164 return true;
165 }
166
167 /* Invoke a vector move on two Zregs. */
168 static bool do_mov_z(DisasContext *s, int rd, int rn)
169 {
170 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
171 }
172
173 /* Initialize a Zreg with replications of a 64-bit immediate. */
174 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
175 {
176 unsigned vsz = vec_full_reg_size(s);
177 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
178 }
179
180 /* Invoke a vector expander on two Pregs. */
181 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
182 int esz, int rd, int rn)
183 {
184 if (sve_access_check(s)) {
185 unsigned psz = pred_gvec_reg_size(s);
186 gvec_fn(esz, pred_full_reg_offset(s, rd),
187 pred_full_reg_offset(s, rn), psz, psz);
188 }
189 return true;
190 }
191
192 /* Invoke a vector expander on three Pregs. */
193 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
194 int esz, int rd, int rn, int rm)
195 {
196 if (sve_access_check(s)) {
197 unsigned psz = pred_gvec_reg_size(s);
198 gvec_fn(esz, pred_full_reg_offset(s, rd),
199 pred_full_reg_offset(s, rn),
200 pred_full_reg_offset(s, rm), psz, psz);
201 }
202 return true;
203 }
204
205 /* Invoke a vector operation on four Pregs. */
206 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
207 int rd, int rn, int rm, int rg)
208 {
209 if (sve_access_check(s)) {
210 unsigned psz = pred_gvec_reg_size(s);
211 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
212 pred_full_reg_offset(s, rn),
213 pred_full_reg_offset(s, rm),
214 pred_full_reg_offset(s, rg),
215 psz, psz, gvec_op);
216 }
217 return true;
218 }
219
220 /* Invoke a vector move on two Pregs. */
221 static bool do_mov_p(DisasContext *s, int rd, int rn)
222 {
223 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
224 }
225
226 /* Set the cpu flags as per a return from an SVE helper. */
227 static void do_pred_flags(TCGv_i32 t)
228 {
229 tcg_gen_mov_i32(cpu_NF, t);
230 tcg_gen_andi_i32(cpu_ZF, t, 2);
231 tcg_gen_andi_i32(cpu_CF, t, 1);
232 tcg_gen_movi_i32(cpu_VF, 0);
233 }
234
235 /* Subroutines computing the ARM PredTest psuedofunction. */
236 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
237 {
238 TCGv_i32 t = tcg_temp_new_i32();
239
240 gen_helper_sve_predtest1(t, d, g);
241 do_pred_flags(t);
242 tcg_temp_free_i32(t);
243 }
244
245 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
246 {
247 TCGv_ptr dptr = tcg_temp_new_ptr();
248 TCGv_ptr gptr = tcg_temp_new_ptr();
249 TCGv_i32 t;
250
251 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
252 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
253 t = tcg_const_i32(words);
254
255 gen_helper_sve_predtest(t, dptr, gptr, t);
256 tcg_temp_free_ptr(dptr);
257 tcg_temp_free_ptr(gptr);
258
259 do_pred_flags(t);
260 tcg_temp_free_i32(t);
261 }
262
263 /* For each element size, the bits within a predicate word that are active. */
264 const uint64_t pred_esz_masks[4] = {
265 0xffffffffffffffffull, 0x5555555555555555ull,
266 0x1111111111111111ull, 0x0101010101010101ull
267 };
268
269 /*
270 *** SVE Logical - Unpredicated Group
271 */
272
273 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
274 {
275 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
276 }
277
278 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
279 {
280 if (a->rn == a->rm) { /* MOV */
281 return do_mov_z(s, a->rd, a->rn);
282 } else {
283 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
284 }
285 }
286
287 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
288 {
289 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
290 }
291
292 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
293 {
294 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
295 }
296
297 /*
298 *** SVE Integer Arithmetic - Unpredicated Group
299 */
300
301 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
302 {
303 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
304 }
305
306 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
307 {
308 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
309 }
310
311 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
312 {
313 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
314 }
315
316 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
317 {
318 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
319 }
320
321 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
322 {
323 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
324 }
325
326 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
327 {
328 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
329 }
330
331 /*
332 *** SVE Integer Arithmetic - Binary Predicated Group
333 */
334
335 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
336 {
337 unsigned vsz = vec_full_reg_size(s);
338 if (fn == NULL) {
339 return false;
340 }
341 if (sve_access_check(s)) {
342 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
343 vec_full_reg_offset(s, a->rn),
344 vec_full_reg_offset(s, a->rm),
345 pred_full_reg_offset(s, a->pg),
346 vsz, vsz, 0, fn);
347 }
348 return true;
349 }
350
351 #define DO_ZPZZ(NAME, name) \
352 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
353 uint32_t insn) \
354 { \
355 static gen_helper_gvec_4 * const fns[4] = { \
356 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
357 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
358 }; \
359 return do_zpzz_ool(s, a, fns[a->esz]); \
360 }
361
362 DO_ZPZZ(AND, and)
363 DO_ZPZZ(EOR, eor)
364 DO_ZPZZ(ORR, orr)
365 DO_ZPZZ(BIC, bic)
366
367 DO_ZPZZ(ADD, add)
368 DO_ZPZZ(SUB, sub)
369
370 DO_ZPZZ(SMAX, smax)
371 DO_ZPZZ(UMAX, umax)
372 DO_ZPZZ(SMIN, smin)
373 DO_ZPZZ(UMIN, umin)
374 DO_ZPZZ(SABD, sabd)
375 DO_ZPZZ(UABD, uabd)
376
377 DO_ZPZZ(MUL, mul)
378 DO_ZPZZ(SMULH, smulh)
379 DO_ZPZZ(UMULH, umulh)
380
381 DO_ZPZZ(ASR, asr)
382 DO_ZPZZ(LSR, lsr)
383 DO_ZPZZ(LSL, lsl)
384
385 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
386 {
387 static gen_helper_gvec_4 * const fns[4] = {
388 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
389 };
390 return do_zpzz_ool(s, a, fns[a->esz]);
391 }
392
393 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
394 {
395 static gen_helper_gvec_4 * const fns[4] = {
396 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
397 };
398 return do_zpzz_ool(s, a, fns[a->esz]);
399 }
400
401 DO_ZPZZ(SEL, sel)
402
403 #undef DO_ZPZZ
404
405 /*
406 *** SVE Integer Arithmetic - Unary Predicated Group
407 */
408
409 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
410 {
411 if (fn == NULL) {
412 return false;
413 }
414 if (sve_access_check(s)) {
415 unsigned vsz = vec_full_reg_size(s);
416 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
417 vec_full_reg_offset(s, a->rn),
418 pred_full_reg_offset(s, a->pg),
419 vsz, vsz, 0, fn);
420 }
421 return true;
422 }
423
424 #define DO_ZPZ(NAME, name) \
425 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
426 { \
427 static gen_helper_gvec_3 * const fns[4] = { \
428 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
429 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
430 }; \
431 return do_zpz_ool(s, a, fns[a->esz]); \
432 }
433
434 DO_ZPZ(CLS, cls)
435 DO_ZPZ(CLZ, clz)
436 DO_ZPZ(CNT_zpz, cnt_zpz)
437 DO_ZPZ(CNOT, cnot)
438 DO_ZPZ(NOT_zpz, not_zpz)
439 DO_ZPZ(ABS, abs)
440 DO_ZPZ(NEG, neg)
441
442 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
443 {
444 static gen_helper_gvec_3 * const fns[4] = {
445 NULL,
446 gen_helper_sve_fabs_h,
447 gen_helper_sve_fabs_s,
448 gen_helper_sve_fabs_d
449 };
450 return do_zpz_ool(s, a, fns[a->esz]);
451 }
452
453 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
454 {
455 static gen_helper_gvec_3 * const fns[4] = {
456 NULL,
457 gen_helper_sve_fneg_h,
458 gen_helper_sve_fneg_s,
459 gen_helper_sve_fneg_d
460 };
461 return do_zpz_ool(s, a, fns[a->esz]);
462 }
463
464 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
465 {
466 static gen_helper_gvec_3 * const fns[4] = {
467 NULL,
468 gen_helper_sve_sxtb_h,
469 gen_helper_sve_sxtb_s,
470 gen_helper_sve_sxtb_d
471 };
472 return do_zpz_ool(s, a, fns[a->esz]);
473 }
474
475 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
476 {
477 static gen_helper_gvec_3 * const fns[4] = {
478 NULL,
479 gen_helper_sve_uxtb_h,
480 gen_helper_sve_uxtb_s,
481 gen_helper_sve_uxtb_d
482 };
483 return do_zpz_ool(s, a, fns[a->esz]);
484 }
485
486 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
487 {
488 static gen_helper_gvec_3 * const fns[4] = {
489 NULL, NULL,
490 gen_helper_sve_sxth_s,
491 gen_helper_sve_sxth_d
492 };
493 return do_zpz_ool(s, a, fns[a->esz]);
494 }
495
496 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
497 {
498 static gen_helper_gvec_3 * const fns[4] = {
499 NULL, NULL,
500 gen_helper_sve_uxth_s,
501 gen_helper_sve_uxth_d
502 };
503 return do_zpz_ool(s, a, fns[a->esz]);
504 }
505
506 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
507 {
508 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
509 }
510
511 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
512 {
513 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
514 }
515
516 #undef DO_ZPZ
517
518 /*
519 *** SVE Integer Reduction Group
520 */
521
522 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
523 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
524 gen_helper_gvec_reduc *fn)
525 {
526 unsigned vsz = vec_full_reg_size(s);
527 TCGv_ptr t_zn, t_pg;
528 TCGv_i32 desc;
529 TCGv_i64 temp;
530
531 if (fn == NULL) {
532 return false;
533 }
534 if (!sve_access_check(s)) {
535 return true;
536 }
537
538 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
539 temp = tcg_temp_new_i64();
540 t_zn = tcg_temp_new_ptr();
541 t_pg = tcg_temp_new_ptr();
542
543 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
544 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
545 fn(temp, t_zn, t_pg, desc);
546 tcg_temp_free_ptr(t_zn);
547 tcg_temp_free_ptr(t_pg);
548 tcg_temp_free_i32(desc);
549
550 write_fp_dreg(s, a->rd, temp);
551 tcg_temp_free_i64(temp);
552 return true;
553 }
554
555 #define DO_VPZ(NAME, name) \
556 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
557 { \
558 static gen_helper_gvec_reduc * const fns[4] = { \
559 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
560 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
561 }; \
562 return do_vpz_ool(s, a, fns[a->esz]); \
563 }
564
565 DO_VPZ(ORV, orv)
566 DO_VPZ(ANDV, andv)
567 DO_VPZ(EORV, eorv)
568
569 DO_VPZ(UADDV, uaddv)
570 DO_VPZ(SMAXV, smaxv)
571 DO_VPZ(UMAXV, umaxv)
572 DO_VPZ(SMINV, sminv)
573 DO_VPZ(UMINV, uminv)
574
575 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
576 {
577 static gen_helper_gvec_reduc * const fns[4] = {
578 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
579 gen_helper_sve_saddv_s, NULL
580 };
581 return do_vpz_ool(s, a, fns[a->esz]);
582 }
583
584 #undef DO_VPZ
585
586 /*
587 *** SVE Shift by Immediate - Predicated Group
588 */
589
590 /* Store zero into every active element of Zd. We will use this for two
591 * and three-operand predicated instructions for which logic dictates a
592 * zero result.
593 */
594 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
595 {
596 static gen_helper_gvec_2 * const fns[4] = {
597 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
598 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
599 };
600 if (sve_access_check(s)) {
601 unsigned vsz = vec_full_reg_size(s);
602 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
603 pred_full_reg_offset(s, pg),
604 vsz, vsz, 0, fns[esz]);
605 }
606 return true;
607 }
608
609 /* Copy Zn into Zd, storing zeros into inactive elements. */
610 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
611 {
612 static gen_helper_gvec_3 * const fns[4] = {
613 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
614 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
615 };
616 unsigned vsz = vec_full_reg_size(s);
617 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
618 vec_full_reg_offset(s, rn),
619 pred_full_reg_offset(s, pg),
620 vsz, vsz, 0, fns[esz]);
621 }
622
623 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
624 gen_helper_gvec_3 *fn)
625 {
626 if (sve_access_check(s)) {
627 unsigned vsz = vec_full_reg_size(s);
628 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
629 vec_full_reg_offset(s, a->rn),
630 pred_full_reg_offset(s, a->pg),
631 vsz, vsz, a->imm, fn);
632 }
633 return true;
634 }
635
636 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
637 {
638 static gen_helper_gvec_3 * const fns[4] = {
639 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
640 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
641 };
642 if (a->esz < 0) {
643 /* Invalid tsz encoding -- see tszimm_esz. */
644 return false;
645 }
646 /* Shift by element size is architecturally valid. For
647 arithmetic right-shift, it's the same as by one less. */
648 a->imm = MIN(a->imm, (8 << a->esz) - 1);
649 return do_zpzi_ool(s, a, fns[a->esz]);
650 }
651
652 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
653 {
654 static gen_helper_gvec_3 * const fns[4] = {
655 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
656 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
657 };
658 if (a->esz < 0) {
659 return false;
660 }
661 /* Shift by element size is architecturally valid.
662 For logical shifts, it is a zeroing operation. */
663 if (a->imm >= (8 << a->esz)) {
664 return do_clr_zp(s, a->rd, a->pg, a->esz);
665 } else {
666 return do_zpzi_ool(s, a, fns[a->esz]);
667 }
668 }
669
670 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
671 {
672 static gen_helper_gvec_3 * const fns[4] = {
673 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
674 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
675 };
676 if (a->esz < 0) {
677 return false;
678 }
679 /* Shift by element size is architecturally valid.
680 For logical shifts, it is a zeroing operation. */
681 if (a->imm >= (8 << a->esz)) {
682 return do_clr_zp(s, a->rd, a->pg, a->esz);
683 } else {
684 return do_zpzi_ool(s, a, fns[a->esz]);
685 }
686 }
687
688 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
689 {
690 static gen_helper_gvec_3 * const fns[4] = {
691 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
692 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
693 };
694 if (a->esz < 0) {
695 return false;
696 }
697 /* Shift by element size is architecturally valid. For arithmetic
698 right shift for division, it is a zeroing operation. */
699 if (a->imm >= (8 << a->esz)) {
700 return do_clr_zp(s, a->rd, a->pg, a->esz);
701 } else {
702 return do_zpzi_ool(s, a, fns[a->esz]);
703 }
704 }
705
706 /*
707 *** SVE Bitwise Shift - Predicated Group
708 */
709
710 #define DO_ZPZW(NAME, name) \
711 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
712 uint32_t insn) \
713 { \
714 static gen_helper_gvec_4 * const fns[3] = { \
715 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
716 gen_helper_sve_##name##_zpzw_s, \
717 }; \
718 if (a->esz < 0 || a->esz >= 3) { \
719 return false; \
720 } \
721 return do_zpzz_ool(s, a, fns[a->esz]); \
722 }
723
724 DO_ZPZW(ASR, asr)
725 DO_ZPZW(LSR, lsr)
726 DO_ZPZW(LSL, lsl)
727
728 #undef DO_ZPZW
729
730 /*
731 *** SVE Bitwise Shift - Unpredicated Group
732 */
733
734 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
735 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
736 int64_t, uint32_t, uint32_t))
737 {
738 if (a->esz < 0) {
739 /* Invalid tsz encoding -- see tszimm_esz. */
740 return false;
741 }
742 if (sve_access_check(s)) {
743 unsigned vsz = vec_full_reg_size(s);
744 /* Shift by element size is architecturally valid. For
745 arithmetic right-shift, it's the same as by one less.
746 Otherwise it is a zeroing operation. */
747 if (a->imm >= 8 << a->esz) {
748 if (asr) {
749 a->imm = (8 << a->esz) - 1;
750 } else {
751 do_dupi_z(s, a->rd, 0);
752 return true;
753 }
754 }
755 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
756 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
757 }
758 return true;
759 }
760
761 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
762 {
763 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
764 }
765
766 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
767 {
768 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
769 }
770
771 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
772 {
773 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
774 }
775
776 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
777 {
778 if (fn == NULL) {
779 return false;
780 }
781 if (sve_access_check(s)) {
782 unsigned vsz = vec_full_reg_size(s);
783 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
784 vec_full_reg_offset(s, a->rn),
785 vec_full_reg_offset(s, a->rm),
786 vsz, vsz, 0, fn);
787 }
788 return true;
789 }
790
791 #define DO_ZZW(NAME, name) \
792 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
793 uint32_t insn) \
794 { \
795 static gen_helper_gvec_3 * const fns[4] = { \
796 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
797 gen_helper_sve_##name##_zzw_s, NULL \
798 }; \
799 return do_zzw_ool(s, a, fns[a->esz]); \
800 }
801
802 DO_ZZW(ASR, asr)
803 DO_ZZW(LSR, lsr)
804 DO_ZZW(LSL, lsl)
805
806 #undef DO_ZZW
807
808 /*
809 *** SVE Integer Multiply-Add Group
810 */
811
812 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
813 gen_helper_gvec_5 *fn)
814 {
815 if (sve_access_check(s)) {
816 unsigned vsz = vec_full_reg_size(s);
817 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
818 vec_full_reg_offset(s, a->ra),
819 vec_full_reg_offset(s, a->rn),
820 vec_full_reg_offset(s, a->rm),
821 pred_full_reg_offset(s, a->pg),
822 vsz, vsz, 0, fn);
823 }
824 return true;
825 }
826
827 #define DO_ZPZZZ(NAME, name) \
828 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
829 { \
830 static gen_helper_gvec_5 * const fns[4] = { \
831 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
832 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
833 }; \
834 return do_zpzzz_ool(s, a, fns[a->esz]); \
835 }
836
837 DO_ZPZZZ(MLA, mla)
838 DO_ZPZZZ(MLS, mls)
839
840 #undef DO_ZPZZZ
841
842 /*
843 *** SVE Index Generation Group
844 */
845
846 static void do_index(DisasContext *s, int esz, int rd,
847 TCGv_i64 start, TCGv_i64 incr)
848 {
849 unsigned vsz = vec_full_reg_size(s);
850 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
851 TCGv_ptr t_zd = tcg_temp_new_ptr();
852
853 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
854 if (esz == 3) {
855 gen_helper_sve_index_d(t_zd, start, incr, desc);
856 } else {
857 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
858 static index_fn * const fns[3] = {
859 gen_helper_sve_index_b,
860 gen_helper_sve_index_h,
861 gen_helper_sve_index_s,
862 };
863 TCGv_i32 s32 = tcg_temp_new_i32();
864 TCGv_i32 i32 = tcg_temp_new_i32();
865
866 tcg_gen_extrl_i64_i32(s32, start);
867 tcg_gen_extrl_i64_i32(i32, incr);
868 fns[esz](t_zd, s32, i32, desc);
869
870 tcg_temp_free_i32(s32);
871 tcg_temp_free_i32(i32);
872 }
873 tcg_temp_free_ptr(t_zd);
874 tcg_temp_free_i32(desc);
875 }
876
877 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
878 {
879 if (sve_access_check(s)) {
880 TCGv_i64 start = tcg_const_i64(a->imm1);
881 TCGv_i64 incr = tcg_const_i64(a->imm2);
882 do_index(s, a->esz, a->rd, start, incr);
883 tcg_temp_free_i64(start);
884 tcg_temp_free_i64(incr);
885 }
886 return true;
887 }
888
889 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
890 {
891 if (sve_access_check(s)) {
892 TCGv_i64 start = tcg_const_i64(a->imm);
893 TCGv_i64 incr = cpu_reg(s, a->rm);
894 do_index(s, a->esz, a->rd, start, incr);
895 tcg_temp_free_i64(start);
896 }
897 return true;
898 }
899
900 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
901 {
902 if (sve_access_check(s)) {
903 TCGv_i64 start = cpu_reg(s, a->rn);
904 TCGv_i64 incr = tcg_const_i64(a->imm);
905 do_index(s, a->esz, a->rd, start, incr);
906 tcg_temp_free_i64(incr);
907 }
908 return true;
909 }
910
911 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
912 {
913 if (sve_access_check(s)) {
914 TCGv_i64 start = cpu_reg(s, a->rn);
915 TCGv_i64 incr = cpu_reg(s, a->rm);
916 do_index(s, a->esz, a->rd, start, incr);
917 }
918 return true;
919 }
920
921 /*
922 *** SVE Stack Allocation Group
923 */
924
925 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
926 {
927 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
928 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
929 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
930 return true;
931 }
932
933 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
934 {
935 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
936 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
937 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
938 return true;
939 }
940
941 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
942 {
943 TCGv_i64 reg = cpu_reg(s, a->rd);
944 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
945 return true;
946 }
947
948 /*
949 *** SVE Compute Vector Address Group
950 */
951
952 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
953 {
954 if (sve_access_check(s)) {
955 unsigned vsz = vec_full_reg_size(s);
956 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
957 vec_full_reg_offset(s, a->rn),
958 vec_full_reg_offset(s, a->rm),
959 vsz, vsz, a->imm, fn);
960 }
961 return true;
962 }
963
964 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
965 {
966 return do_adr(s, a, gen_helper_sve_adr_p32);
967 }
968
969 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
970 {
971 return do_adr(s, a, gen_helper_sve_adr_p64);
972 }
973
974 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
975 {
976 return do_adr(s, a, gen_helper_sve_adr_s32);
977 }
978
979 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
980 {
981 return do_adr(s, a, gen_helper_sve_adr_u32);
982 }
983
984 /*
985 *** SVE Integer Misc - Unpredicated Group
986 */
987
988 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
989 {
990 static gen_helper_gvec_2 * const fns[4] = {
991 NULL,
992 gen_helper_sve_fexpa_h,
993 gen_helper_sve_fexpa_s,
994 gen_helper_sve_fexpa_d,
995 };
996 if (a->esz == 0) {
997 return false;
998 }
999 if (sve_access_check(s)) {
1000 unsigned vsz = vec_full_reg_size(s);
1001 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1002 vec_full_reg_offset(s, a->rn),
1003 vsz, vsz, 0, fns[a->esz]);
1004 }
1005 return true;
1006 }
1007
1008 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1009 {
1010 static gen_helper_gvec_3 * const fns[4] = {
1011 NULL,
1012 gen_helper_sve_ftssel_h,
1013 gen_helper_sve_ftssel_s,
1014 gen_helper_sve_ftssel_d,
1015 };
1016 if (a->esz == 0) {
1017 return false;
1018 }
1019 if (sve_access_check(s)) {
1020 unsigned vsz = vec_full_reg_size(s);
1021 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1022 vec_full_reg_offset(s, a->rn),
1023 vec_full_reg_offset(s, a->rm),
1024 vsz, vsz, 0, fns[a->esz]);
1025 }
1026 return true;
1027 }
1028
1029 /*
1030 *** SVE Predicate Logical Operations Group
1031 */
1032
1033 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1034 const GVecGen4 *gvec_op)
1035 {
1036 if (!sve_access_check(s)) {
1037 return true;
1038 }
1039
1040 unsigned psz = pred_gvec_reg_size(s);
1041 int dofs = pred_full_reg_offset(s, a->rd);
1042 int nofs = pred_full_reg_offset(s, a->rn);
1043 int mofs = pred_full_reg_offset(s, a->rm);
1044 int gofs = pred_full_reg_offset(s, a->pg);
1045
1046 if (psz == 8) {
1047 /* Do the operation and the flags generation in temps. */
1048 TCGv_i64 pd = tcg_temp_new_i64();
1049 TCGv_i64 pn = tcg_temp_new_i64();
1050 TCGv_i64 pm = tcg_temp_new_i64();
1051 TCGv_i64 pg = tcg_temp_new_i64();
1052
1053 tcg_gen_ld_i64(pn, cpu_env, nofs);
1054 tcg_gen_ld_i64(pm, cpu_env, mofs);
1055 tcg_gen_ld_i64(pg, cpu_env, gofs);
1056
1057 gvec_op->fni8(pd, pn, pm, pg);
1058 tcg_gen_st_i64(pd, cpu_env, dofs);
1059
1060 do_predtest1(pd, pg);
1061
1062 tcg_temp_free_i64(pd);
1063 tcg_temp_free_i64(pn);
1064 tcg_temp_free_i64(pm);
1065 tcg_temp_free_i64(pg);
1066 } else {
1067 /* The operation and flags generation is large. The computation
1068 * of the flags depends on the original contents of the guarding
1069 * predicate. If the destination overwrites the guarding predicate,
1070 * then the easiest way to get this right is to save a copy.
1071 */
1072 int tofs = gofs;
1073 if (a->rd == a->pg) {
1074 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1075 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1076 }
1077
1078 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1079 do_predtest(s, dofs, tofs, psz / 8);
1080 }
1081 return true;
1082 }
1083
1084 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1085 {
1086 tcg_gen_and_i64(pd, pn, pm);
1087 tcg_gen_and_i64(pd, pd, pg);
1088 }
1089
1090 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1091 TCGv_vec pm, TCGv_vec pg)
1092 {
1093 tcg_gen_and_vec(vece, pd, pn, pm);
1094 tcg_gen_and_vec(vece, pd, pd, pg);
1095 }
1096
1097 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1098 {
1099 static const GVecGen4 op = {
1100 .fni8 = gen_and_pg_i64,
1101 .fniv = gen_and_pg_vec,
1102 .fno = gen_helper_sve_and_pppp,
1103 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1104 };
1105 if (a->s) {
1106 return do_pppp_flags(s, a, &op);
1107 } else if (a->rn == a->rm) {
1108 if (a->pg == a->rn) {
1109 return do_mov_p(s, a->rd, a->rn);
1110 } else {
1111 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1112 }
1113 } else if (a->pg == a->rn || a->pg == a->rm) {
1114 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1115 } else {
1116 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1117 }
1118 }
1119
1120 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1121 {
1122 tcg_gen_andc_i64(pd, pn, pm);
1123 tcg_gen_and_i64(pd, pd, pg);
1124 }
1125
1126 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1127 TCGv_vec pm, TCGv_vec pg)
1128 {
1129 tcg_gen_andc_vec(vece, pd, pn, pm);
1130 tcg_gen_and_vec(vece, pd, pd, pg);
1131 }
1132
1133 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1134 {
1135 static const GVecGen4 op = {
1136 .fni8 = gen_bic_pg_i64,
1137 .fniv = gen_bic_pg_vec,
1138 .fno = gen_helper_sve_bic_pppp,
1139 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1140 };
1141 if (a->s) {
1142 return do_pppp_flags(s, a, &op);
1143 } else if (a->pg == a->rn) {
1144 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1145 } else {
1146 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1147 }
1148 }
1149
1150 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1151 {
1152 tcg_gen_xor_i64(pd, pn, pm);
1153 tcg_gen_and_i64(pd, pd, pg);
1154 }
1155
1156 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1157 TCGv_vec pm, TCGv_vec pg)
1158 {
1159 tcg_gen_xor_vec(vece, pd, pn, pm);
1160 tcg_gen_and_vec(vece, pd, pd, pg);
1161 }
1162
1163 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1164 {
1165 static const GVecGen4 op = {
1166 .fni8 = gen_eor_pg_i64,
1167 .fniv = gen_eor_pg_vec,
1168 .fno = gen_helper_sve_eor_pppp,
1169 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1170 };
1171 if (a->s) {
1172 return do_pppp_flags(s, a, &op);
1173 } else {
1174 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1175 }
1176 }
1177
1178 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1179 {
1180 tcg_gen_and_i64(pn, pn, pg);
1181 tcg_gen_andc_i64(pm, pm, pg);
1182 tcg_gen_or_i64(pd, pn, pm);
1183 }
1184
1185 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1186 TCGv_vec pm, TCGv_vec pg)
1187 {
1188 tcg_gen_and_vec(vece, pn, pn, pg);
1189 tcg_gen_andc_vec(vece, pm, pm, pg);
1190 tcg_gen_or_vec(vece, pd, pn, pm);
1191 }
1192
1193 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1194 {
1195 static const GVecGen4 op = {
1196 .fni8 = gen_sel_pg_i64,
1197 .fniv = gen_sel_pg_vec,
1198 .fno = gen_helper_sve_sel_pppp,
1199 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1200 };
1201 if (a->s) {
1202 return false;
1203 } else {
1204 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1205 }
1206 }
1207
1208 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1209 {
1210 tcg_gen_or_i64(pd, pn, pm);
1211 tcg_gen_and_i64(pd, pd, pg);
1212 }
1213
1214 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1215 TCGv_vec pm, TCGv_vec pg)
1216 {
1217 tcg_gen_or_vec(vece, pd, pn, pm);
1218 tcg_gen_and_vec(vece, pd, pd, pg);
1219 }
1220
1221 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1222 {
1223 static const GVecGen4 op = {
1224 .fni8 = gen_orr_pg_i64,
1225 .fniv = gen_orr_pg_vec,
1226 .fno = gen_helper_sve_orr_pppp,
1227 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1228 };
1229 if (a->s) {
1230 return do_pppp_flags(s, a, &op);
1231 } else if (a->pg == a->rn && a->rn == a->rm) {
1232 return do_mov_p(s, a->rd, a->rn);
1233 } else {
1234 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1235 }
1236 }
1237
1238 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1239 {
1240 tcg_gen_orc_i64(pd, pn, pm);
1241 tcg_gen_and_i64(pd, pd, pg);
1242 }
1243
1244 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1245 TCGv_vec pm, TCGv_vec pg)
1246 {
1247 tcg_gen_orc_vec(vece, pd, pn, pm);
1248 tcg_gen_and_vec(vece, pd, pd, pg);
1249 }
1250
1251 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1252 {
1253 static const GVecGen4 op = {
1254 .fni8 = gen_orn_pg_i64,
1255 .fniv = gen_orn_pg_vec,
1256 .fno = gen_helper_sve_orn_pppp,
1257 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1258 };
1259 if (a->s) {
1260 return do_pppp_flags(s, a, &op);
1261 } else {
1262 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1263 }
1264 }
1265
1266 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1267 {
1268 tcg_gen_or_i64(pd, pn, pm);
1269 tcg_gen_andc_i64(pd, pg, pd);
1270 }
1271
1272 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1273 TCGv_vec pm, TCGv_vec pg)
1274 {
1275 tcg_gen_or_vec(vece, pd, pn, pm);
1276 tcg_gen_andc_vec(vece, pd, pg, pd);
1277 }
1278
1279 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1280 {
1281 static const GVecGen4 op = {
1282 .fni8 = gen_nor_pg_i64,
1283 .fniv = gen_nor_pg_vec,
1284 .fno = gen_helper_sve_nor_pppp,
1285 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1286 };
1287 if (a->s) {
1288 return do_pppp_flags(s, a, &op);
1289 } else {
1290 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1291 }
1292 }
1293
1294 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1295 {
1296 tcg_gen_and_i64(pd, pn, pm);
1297 tcg_gen_andc_i64(pd, pg, pd);
1298 }
1299
1300 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1301 TCGv_vec pm, TCGv_vec pg)
1302 {
1303 tcg_gen_and_vec(vece, pd, pn, pm);
1304 tcg_gen_andc_vec(vece, pd, pg, pd);
1305 }
1306
1307 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1308 {
1309 static const GVecGen4 op = {
1310 .fni8 = gen_nand_pg_i64,
1311 .fniv = gen_nand_pg_vec,
1312 .fno = gen_helper_sve_nand_pppp,
1313 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1314 };
1315 if (a->s) {
1316 return do_pppp_flags(s, a, &op);
1317 } else {
1318 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1319 }
1320 }
1321
1322 /*
1323 *** SVE Predicate Misc Group
1324 */
1325
1326 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1327 {
1328 if (sve_access_check(s)) {
1329 int nofs = pred_full_reg_offset(s, a->rn);
1330 int gofs = pred_full_reg_offset(s, a->pg);
1331 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1332
1333 if (words == 1) {
1334 TCGv_i64 pn = tcg_temp_new_i64();
1335 TCGv_i64 pg = tcg_temp_new_i64();
1336
1337 tcg_gen_ld_i64(pn, cpu_env, nofs);
1338 tcg_gen_ld_i64(pg, cpu_env, gofs);
1339 do_predtest1(pn, pg);
1340
1341 tcg_temp_free_i64(pn);
1342 tcg_temp_free_i64(pg);
1343 } else {
1344 do_predtest(s, nofs, gofs, words);
1345 }
1346 }
1347 return true;
1348 }
1349
1350 /* See the ARM pseudocode DecodePredCount. */
1351 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1352 {
1353 unsigned elements = fullsz >> esz;
1354 unsigned bound;
1355
1356 switch (pattern) {
1357 case 0x0: /* POW2 */
1358 return pow2floor(elements);
1359 case 0x1: /* VL1 */
1360 case 0x2: /* VL2 */
1361 case 0x3: /* VL3 */
1362 case 0x4: /* VL4 */
1363 case 0x5: /* VL5 */
1364 case 0x6: /* VL6 */
1365 case 0x7: /* VL7 */
1366 case 0x8: /* VL8 */
1367 bound = pattern;
1368 break;
1369 case 0x9: /* VL16 */
1370 case 0xa: /* VL32 */
1371 case 0xb: /* VL64 */
1372 case 0xc: /* VL128 */
1373 case 0xd: /* VL256 */
1374 bound = 16 << (pattern - 9);
1375 break;
1376 case 0x1d: /* MUL4 */
1377 return elements - elements % 4;
1378 case 0x1e: /* MUL3 */
1379 return elements - elements % 3;
1380 case 0x1f: /* ALL */
1381 return elements;
1382 default: /* #uimm5 */
1383 return 0;
1384 }
1385 return elements >= bound ? bound : 0;
1386 }
1387
1388 /* This handles all of the predicate initialization instructions,
1389 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1390 * so that decode_pred_count returns 0. For SETFFR, we will have
1391 * set RD == 16 == FFR.
1392 */
1393 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1394 {
1395 if (!sve_access_check(s)) {
1396 return true;
1397 }
1398
1399 unsigned fullsz = vec_full_reg_size(s);
1400 unsigned ofs = pred_full_reg_offset(s, rd);
1401 unsigned numelem, setsz, i;
1402 uint64_t word, lastword;
1403 TCGv_i64 t;
1404
1405 numelem = decode_pred_count(fullsz, pat, esz);
1406
1407 /* Determine what we must store into each bit, and how many. */
1408 if (numelem == 0) {
1409 lastword = word = 0;
1410 setsz = fullsz;
1411 } else {
1412 setsz = numelem << esz;
1413 lastword = word = pred_esz_masks[esz];
1414 if (setsz % 64) {
1415 lastword &= ~(-1ull << (setsz % 64));
1416 }
1417 }
1418
1419 t = tcg_temp_new_i64();
1420 if (fullsz <= 64) {
1421 tcg_gen_movi_i64(t, lastword);
1422 tcg_gen_st_i64(t, cpu_env, ofs);
1423 goto done;
1424 }
1425
1426 if (word == lastword) {
1427 unsigned maxsz = size_for_gvec(fullsz / 8);
1428 unsigned oprsz = size_for_gvec(setsz / 8);
1429
1430 if (oprsz * 8 == setsz) {
1431 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1432 goto done;
1433 }
1434 if (oprsz * 8 == setsz + 8) {
1435 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1436 tcg_gen_movi_i64(t, 0);
1437 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1438 goto done;
1439 }
1440 }
1441
1442 setsz /= 8;
1443 fullsz /= 8;
1444
1445 tcg_gen_movi_i64(t, word);
1446 for (i = 0; i < setsz; i += 8) {
1447 tcg_gen_st_i64(t, cpu_env, ofs + i);
1448 }
1449 if (lastword != word) {
1450 tcg_gen_movi_i64(t, lastword);
1451 tcg_gen_st_i64(t, cpu_env, ofs + i);
1452 i += 8;
1453 }
1454 if (i < fullsz) {
1455 tcg_gen_movi_i64(t, 0);
1456 for (; i < fullsz; i += 8) {
1457 tcg_gen_st_i64(t, cpu_env, ofs + i);
1458 }
1459 }
1460
1461 done:
1462 tcg_temp_free_i64(t);
1463
1464 /* PTRUES */
1465 if (setflag) {
1466 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1467 tcg_gen_movi_i32(cpu_CF, word == 0);
1468 tcg_gen_movi_i32(cpu_VF, 0);
1469 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1470 }
1471 return true;
1472 }
1473
1474 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1475 {
1476 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1477 }
1478
1479 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1480 {
1481 /* Note pat == 31 is #all, to set all elements. */
1482 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1483 }
1484
1485 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1486 {
1487 /* Note pat == 32 is #unimp, to set no elements. */
1488 return do_predset(s, 0, a->rd, 32, false);
1489 }
1490
1491 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1492 {
1493 /* The path through do_pppp_flags is complicated enough to want to avoid
1494 * duplication. Frob the arguments into the form of a predicated AND.
1495 */
1496 arg_rprr_s alt_a = {
1497 .rd = a->rd, .pg = a->pg, .s = a->s,
1498 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1499 };
1500 return trans_AND_pppp(s, &alt_a, insn);
1501 }
1502
1503 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1504 {
1505 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1506 }
1507
1508 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1509 {
1510 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1511 }
1512
1513 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1514 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1515 TCGv_ptr, TCGv_i32))
1516 {
1517 if (!sve_access_check(s)) {
1518 return true;
1519 }
1520
1521 TCGv_ptr t_pd = tcg_temp_new_ptr();
1522 TCGv_ptr t_pg = tcg_temp_new_ptr();
1523 TCGv_i32 t;
1524 unsigned desc;
1525
1526 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1527 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1528
1529 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1530 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1531 t = tcg_const_i32(desc);
1532
1533 gen_fn(t, t_pd, t_pg, t);
1534 tcg_temp_free_ptr(t_pd);
1535 tcg_temp_free_ptr(t_pg);
1536
1537 do_pred_flags(t);
1538 tcg_temp_free_i32(t);
1539 return true;
1540 }
1541
1542 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1543 {
1544 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1545 }
1546
1547 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1548 {
1549 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1550 }
1551
1552 /*
1553 *** SVE Element Count Group
1554 */
1555
1556 /* Perform an inline saturating addition of a 32-bit value within
1557 * a 64-bit register. The second operand is known to be positive,
1558 * which halves the comparisions we must perform to bound the result.
1559 */
1560 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1561 {
1562 int64_t ibound;
1563 TCGv_i64 bound;
1564 TCGCond cond;
1565
1566 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1567 if (u) {
1568 tcg_gen_ext32u_i64(reg, reg);
1569 } else {
1570 tcg_gen_ext32s_i64(reg, reg);
1571 }
1572 if (d) {
1573 tcg_gen_sub_i64(reg, reg, val);
1574 ibound = (u ? 0 : INT32_MIN);
1575 cond = TCG_COND_LT;
1576 } else {
1577 tcg_gen_add_i64(reg, reg, val);
1578 ibound = (u ? UINT32_MAX : INT32_MAX);
1579 cond = TCG_COND_GT;
1580 }
1581 bound = tcg_const_i64(ibound);
1582 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1583 tcg_temp_free_i64(bound);
1584 }
1585
1586 /* Similarly with 64-bit values. */
1587 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1588 {
1589 TCGv_i64 t0 = tcg_temp_new_i64();
1590 TCGv_i64 t1 = tcg_temp_new_i64();
1591 TCGv_i64 t2;
1592
1593 if (u) {
1594 if (d) {
1595 tcg_gen_sub_i64(t0, reg, val);
1596 tcg_gen_movi_i64(t1, 0);
1597 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1598 } else {
1599 tcg_gen_add_i64(t0, reg, val);
1600 tcg_gen_movi_i64(t1, -1);
1601 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1602 }
1603 } else {
1604 if (d) {
1605 /* Detect signed overflow for subtraction. */
1606 tcg_gen_xor_i64(t0, reg, val);
1607 tcg_gen_sub_i64(t1, reg, val);
1608 tcg_gen_xor_i64(reg, reg, t0);
1609 tcg_gen_and_i64(t0, t0, reg);
1610
1611 /* Bound the result. */
1612 tcg_gen_movi_i64(reg, INT64_MIN);
1613 t2 = tcg_const_i64(0);
1614 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1615 } else {
1616 /* Detect signed overflow for addition. */
1617 tcg_gen_xor_i64(t0, reg, val);
1618 tcg_gen_add_i64(reg, reg, val);
1619 tcg_gen_xor_i64(t1, reg, val);
1620 tcg_gen_andc_i64(t0, t1, t0);
1621
1622 /* Bound the result. */
1623 tcg_gen_movi_i64(t1, INT64_MAX);
1624 t2 = tcg_const_i64(0);
1625 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1626 }
1627 tcg_temp_free_i64(t2);
1628 }
1629 tcg_temp_free_i64(t0);
1630 tcg_temp_free_i64(t1);
1631 }
1632
1633 /* Similarly with a vector and a scalar operand. */
1634 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1635 TCGv_i64 val, bool u, bool d)
1636 {
1637 unsigned vsz = vec_full_reg_size(s);
1638 TCGv_ptr dptr, nptr;
1639 TCGv_i32 t32, desc;
1640 TCGv_i64 t64;
1641
1642 dptr = tcg_temp_new_ptr();
1643 nptr = tcg_temp_new_ptr();
1644 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1645 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1646 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1647
1648 switch (esz) {
1649 case MO_8:
1650 t32 = tcg_temp_new_i32();
1651 tcg_gen_extrl_i64_i32(t32, val);
1652 if (d) {
1653 tcg_gen_neg_i32(t32, t32);
1654 }
1655 if (u) {
1656 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1657 } else {
1658 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1659 }
1660 tcg_temp_free_i32(t32);
1661 break;
1662
1663 case MO_16:
1664 t32 = tcg_temp_new_i32();
1665 tcg_gen_extrl_i64_i32(t32, val);
1666 if (d) {
1667 tcg_gen_neg_i32(t32, t32);
1668 }
1669 if (u) {
1670 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1671 } else {
1672 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1673 }
1674 tcg_temp_free_i32(t32);
1675 break;
1676
1677 case MO_32:
1678 t64 = tcg_temp_new_i64();
1679 if (d) {
1680 tcg_gen_neg_i64(t64, val);
1681 } else {
1682 tcg_gen_mov_i64(t64, val);
1683 }
1684 if (u) {
1685 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1686 } else {
1687 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1688 }
1689 tcg_temp_free_i64(t64);
1690 break;
1691
1692 case MO_64:
1693 if (u) {
1694 if (d) {
1695 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1696 } else {
1697 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1698 }
1699 } else if (d) {
1700 t64 = tcg_temp_new_i64();
1701 tcg_gen_neg_i64(t64, val);
1702 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1703 tcg_temp_free_i64(t64);
1704 } else {
1705 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1706 }
1707 break;
1708
1709 default:
1710 g_assert_not_reached();
1711 }
1712
1713 tcg_temp_free_ptr(dptr);
1714 tcg_temp_free_ptr(nptr);
1715 tcg_temp_free_i32(desc);
1716 }
1717
1718 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1719 {
1720 if (sve_access_check(s)) {
1721 unsigned fullsz = vec_full_reg_size(s);
1722 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1723 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1724 }
1725 return true;
1726 }
1727
1728 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1729 {
1730 if (sve_access_check(s)) {
1731 unsigned fullsz = vec_full_reg_size(s);
1732 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1733 int inc = numelem * a->imm * (a->d ? -1 : 1);
1734 TCGv_i64 reg = cpu_reg(s, a->rd);
1735
1736 tcg_gen_addi_i64(reg, reg, inc);
1737 }
1738 return true;
1739 }
1740
1741 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1742 uint32_t insn)
1743 {
1744 if (!sve_access_check(s)) {
1745 return true;
1746 }
1747
1748 unsigned fullsz = vec_full_reg_size(s);
1749 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1750 int inc = numelem * a->imm;
1751 TCGv_i64 reg = cpu_reg(s, a->rd);
1752
1753 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1754 if (inc == 0) {
1755 if (a->u) {
1756 tcg_gen_ext32u_i64(reg, reg);
1757 } else {
1758 tcg_gen_ext32s_i64(reg, reg);
1759 }
1760 } else {
1761 TCGv_i64 t = tcg_const_i64(inc);
1762 do_sat_addsub_32(reg, t, a->u, a->d);
1763 tcg_temp_free_i64(t);
1764 }
1765 return true;
1766 }
1767
1768 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1769 uint32_t insn)
1770 {
1771 if (!sve_access_check(s)) {
1772 return true;
1773 }
1774
1775 unsigned fullsz = vec_full_reg_size(s);
1776 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1777 int inc = numelem * a->imm;
1778 TCGv_i64 reg = cpu_reg(s, a->rd);
1779
1780 if (inc != 0) {
1781 TCGv_i64 t = tcg_const_i64(inc);
1782 do_sat_addsub_64(reg, t, a->u, a->d);
1783 tcg_temp_free_i64(t);
1784 }
1785 return true;
1786 }
1787
1788 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1789 {
1790 if (a->esz == 0) {
1791 return false;
1792 }
1793
1794 unsigned fullsz = vec_full_reg_size(s);
1795 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1796 int inc = numelem * a->imm;
1797
1798 if (inc != 0) {
1799 if (sve_access_check(s)) {
1800 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1801 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1802 vec_full_reg_offset(s, a->rn),
1803 t, fullsz, fullsz);
1804 tcg_temp_free_i64(t);
1805 }
1806 } else {
1807 do_mov_z(s, a->rd, a->rn);
1808 }
1809 return true;
1810 }
1811
1812 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1813 uint32_t insn)
1814 {
1815 if (a->esz == 0) {
1816 return false;
1817 }
1818
1819 unsigned fullsz = vec_full_reg_size(s);
1820 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1821 int inc = numelem * a->imm;
1822
1823 if (inc != 0) {
1824 if (sve_access_check(s)) {
1825 TCGv_i64 t = tcg_const_i64(inc);
1826 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1827 tcg_temp_free_i64(t);
1828 }
1829 } else {
1830 do_mov_z(s, a->rd, a->rn);
1831 }
1832 return true;
1833 }
1834
1835 /*
1836 *** SVE Bitwise Immediate Group
1837 */
1838
1839 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1840 {
1841 uint64_t imm;
1842 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1843 extract32(a->dbm, 0, 6),
1844 extract32(a->dbm, 6, 6))) {
1845 return false;
1846 }
1847 if (sve_access_check(s)) {
1848 unsigned vsz = vec_full_reg_size(s);
1849 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1850 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1851 }
1852 return true;
1853 }
1854
1855 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1856 {
1857 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1858 }
1859
1860 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1861 {
1862 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1863 }
1864
1865 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1866 {
1867 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1868 }
1869
1870 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1871 {
1872 uint64_t imm;
1873 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1874 extract32(a->dbm, 0, 6),
1875 extract32(a->dbm, 6, 6))) {
1876 return false;
1877 }
1878 if (sve_access_check(s)) {
1879 do_dupi_z(s, a->rd, imm);
1880 }
1881 return true;
1882 }
1883
1884 /*
1885 *** SVE Integer Wide Immediate - Predicated Group
1886 */
1887
1888 /* Implement all merging copies. This is used for CPY (immediate),
1889 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1890 */
1891 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1892 TCGv_i64 val)
1893 {
1894 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1895 static gen_cpy * const fns[4] = {
1896 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1897 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1898 };
1899 unsigned vsz = vec_full_reg_size(s);
1900 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1901 TCGv_ptr t_zd = tcg_temp_new_ptr();
1902 TCGv_ptr t_zn = tcg_temp_new_ptr();
1903 TCGv_ptr t_pg = tcg_temp_new_ptr();
1904
1905 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1906 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1907 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1908
1909 fns[esz](t_zd, t_zn, t_pg, val, desc);
1910
1911 tcg_temp_free_ptr(t_zd);
1912 tcg_temp_free_ptr(t_zn);
1913 tcg_temp_free_ptr(t_pg);
1914 tcg_temp_free_i32(desc);
1915 }
1916
1917 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1918 {
1919 if (a->esz == 0) {
1920 return false;
1921 }
1922 if (sve_access_check(s)) {
1923 /* Decode the VFP immediate. */
1924 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1925 TCGv_i64 t_imm = tcg_const_i64(imm);
1926 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1927 tcg_temp_free_i64(t_imm);
1928 }
1929 return true;
1930 }
1931
1932 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1933 {
1934 if (a->esz == 0 && extract32(insn, 13, 1)) {
1935 return false;
1936 }
1937 if (sve_access_check(s)) {
1938 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1939 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1940 tcg_temp_free_i64(t_imm);
1941 }
1942 return true;
1943 }
1944
1945 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1946 {
1947 static gen_helper_gvec_2i * const fns[4] = {
1948 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1949 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1950 };
1951
1952 if (a->esz == 0 && extract32(insn, 13, 1)) {
1953 return false;
1954 }
1955 if (sve_access_check(s)) {
1956 unsigned vsz = vec_full_reg_size(s);
1957 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1958 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1959 pred_full_reg_offset(s, a->pg),
1960 t_imm, vsz, vsz, 0, fns[a->esz]);
1961 tcg_temp_free_i64(t_imm);
1962 }
1963 return true;
1964 }
1965
1966 /*
1967 *** SVE Permute Extract Group
1968 */
1969
1970 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1971 {
1972 if (!sve_access_check(s)) {
1973 return true;
1974 }
1975
1976 unsigned vsz = vec_full_reg_size(s);
1977 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1978 unsigned n_siz = vsz - n_ofs;
1979 unsigned d = vec_full_reg_offset(s, a->rd);
1980 unsigned n = vec_full_reg_offset(s, a->rn);
1981 unsigned m = vec_full_reg_offset(s, a->rm);
1982
1983 /* Use host vector move insns if we have appropriate sizes
1984 * and no unfortunate overlap.
1985 */
1986 if (m != d
1987 && n_ofs == size_for_gvec(n_ofs)
1988 && n_siz == size_for_gvec(n_siz)
1989 && (d != n || n_siz <= n_ofs)) {
1990 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1991 if (n_ofs != 0) {
1992 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1993 }
1994 } else {
1995 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1996 }
1997 return true;
1998 }
1999
2000 /*
2001 *** SVE Permute - Unpredicated Group
2002 */
2003
2004 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2005 {
2006 if (sve_access_check(s)) {
2007 unsigned vsz = vec_full_reg_size(s);
2008 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2009 vsz, vsz, cpu_reg_sp(s, a->rn));
2010 }
2011 return true;
2012 }
2013
2014 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2015 {
2016 if ((a->imm & 0x1f) == 0) {
2017 return false;
2018 }
2019 if (sve_access_check(s)) {
2020 unsigned vsz = vec_full_reg_size(s);
2021 unsigned dofs = vec_full_reg_offset(s, a->rd);
2022 unsigned esz, index;
2023
2024 esz = ctz32(a->imm);
2025 index = a->imm >> (esz + 1);
2026
2027 if ((index << esz) < vsz) {
2028 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2029 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2030 } else {
2031 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2032 }
2033 }
2034 return true;
2035 }
2036
2037 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2038 {
2039 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2040 static gen_insr * const fns[4] = {
2041 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2042 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2043 };
2044 unsigned vsz = vec_full_reg_size(s);
2045 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2046 TCGv_ptr t_zd = tcg_temp_new_ptr();
2047 TCGv_ptr t_zn = tcg_temp_new_ptr();
2048
2049 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2050 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2051
2052 fns[a->esz](t_zd, t_zn, val, desc);
2053
2054 tcg_temp_free_ptr(t_zd);
2055 tcg_temp_free_ptr(t_zn);
2056 tcg_temp_free_i32(desc);
2057 }
2058
2059 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2060 {
2061 if (sve_access_check(s)) {
2062 TCGv_i64 t = tcg_temp_new_i64();
2063 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2064 do_insr_i64(s, a, t);
2065 tcg_temp_free_i64(t);
2066 }
2067 return true;
2068 }
2069
2070 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2071 {
2072 if (sve_access_check(s)) {
2073 do_insr_i64(s, a, cpu_reg(s, a->rm));
2074 }
2075 return true;
2076 }
2077
2078 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2079 {
2080 static gen_helper_gvec_2 * const fns[4] = {
2081 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2082 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2083 };
2084
2085 if (sve_access_check(s)) {
2086 unsigned vsz = vec_full_reg_size(s);
2087 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2088 vec_full_reg_offset(s, a->rn),
2089 vsz, vsz, 0, fns[a->esz]);
2090 }
2091 return true;
2092 }
2093
2094 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2095 {
2096 static gen_helper_gvec_3 * const fns[4] = {
2097 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2098 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2099 };
2100
2101 if (sve_access_check(s)) {
2102 unsigned vsz = vec_full_reg_size(s);
2103 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2104 vec_full_reg_offset(s, a->rn),
2105 vec_full_reg_offset(s, a->rm),
2106 vsz, vsz, 0, fns[a->esz]);
2107 }
2108 return true;
2109 }
2110
2111 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2112 {
2113 static gen_helper_gvec_2 * const fns[4][2] = {
2114 { NULL, NULL },
2115 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2116 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2117 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2118 };
2119
2120 if (a->esz == 0) {
2121 return false;
2122 }
2123 if (sve_access_check(s)) {
2124 unsigned vsz = vec_full_reg_size(s);
2125 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2126 vec_full_reg_offset(s, a->rn)
2127 + (a->h ? vsz / 2 : 0),
2128 vsz, vsz, 0, fns[a->esz][a->u]);
2129 }
2130 return true;
2131 }
2132
2133 /*
2134 *** SVE Permute - Predicates Group
2135 */
2136
2137 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2138 gen_helper_gvec_3 *fn)
2139 {
2140 if (!sve_access_check(s)) {
2141 return true;
2142 }
2143
2144 unsigned vsz = pred_full_reg_size(s);
2145
2146 /* Predicate sizes may be smaller and cannot use simd_desc.
2147 We cannot round up, as we do elsewhere, because we need
2148 the exact size for ZIP2 and REV. We retain the style for
2149 the other helpers for consistency. */
2150 TCGv_ptr t_d = tcg_temp_new_ptr();
2151 TCGv_ptr t_n = tcg_temp_new_ptr();
2152 TCGv_ptr t_m = tcg_temp_new_ptr();
2153 TCGv_i32 t_desc;
2154 int desc;
2155
2156 desc = vsz - 2;
2157 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2158 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2159
2160 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2161 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2162 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2163 t_desc = tcg_const_i32(desc);
2164
2165 fn(t_d, t_n, t_m, t_desc);
2166
2167 tcg_temp_free_ptr(t_d);
2168 tcg_temp_free_ptr(t_n);
2169 tcg_temp_free_ptr(t_m);
2170 tcg_temp_free_i32(t_desc);
2171 return true;
2172 }
2173
2174 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2175 gen_helper_gvec_2 *fn)
2176 {
2177 if (!sve_access_check(s)) {
2178 return true;
2179 }
2180
2181 unsigned vsz = pred_full_reg_size(s);
2182 TCGv_ptr t_d = tcg_temp_new_ptr();
2183 TCGv_ptr t_n = tcg_temp_new_ptr();
2184 TCGv_i32 t_desc;
2185 int desc;
2186
2187 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2188 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2189
2190 /* Predicate sizes may be smaller and cannot use simd_desc.
2191 We cannot round up, as we do elsewhere, because we need
2192 the exact size for ZIP2 and REV. We retain the style for
2193 the other helpers for consistency. */
2194
2195 desc = vsz - 2;
2196 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2197 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2198 t_desc = tcg_const_i32(desc);
2199
2200 fn(t_d, t_n, t_desc);
2201
2202 tcg_temp_free_i32(t_desc);
2203 tcg_temp_free_ptr(t_d);
2204 tcg_temp_free_ptr(t_n);
2205 return true;
2206 }
2207
2208 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2209 {
2210 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2211 }
2212
2213 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2214 {
2215 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2216 }
2217
2218 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2219 {
2220 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2221 }
2222
2223 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2224 {
2225 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2226 }
2227
2228 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2229 {
2230 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2231 }
2232
2233 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2234 {
2235 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2236 }
2237
2238 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2239 {
2240 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2241 }
2242
2243 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2244 {
2245 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2246 }
2247
2248 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2249 {
2250 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2251 }
2252
2253 /*
2254 *** SVE Permute - Interleaving Group
2255 */
2256
2257 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2258 {
2259 static gen_helper_gvec_3 * const fns[4] = {
2260 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2261 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2262 };
2263
2264 if (sve_access_check(s)) {
2265 unsigned vsz = vec_full_reg_size(s);
2266 unsigned high_ofs = high ? vsz / 2 : 0;
2267 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2268 vec_full_reg_offset(s, a->rn) + high_ofs,
2269 vec_full_reg_offset(s, a->rm) + high_ofs,
2270 vsz, vsz, 0, fns[a->esz]);
2271 }
2272 return true;
2273 }
2274
2275 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2276 gen_helper_gvec_3 *fn)
2277 {
2278 if (sve_access_check(s)) {
2279 unsigned vsz = vec_full_reg_size(s);
2280 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2281 vec_full_reg_offset(s, a->rn),
2282 vec_full_reg_offset(s, a->rm),
2283 vsz, vsz, data, fn);
2284 }
2285 return true;
2286 }
2287
2288 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2289 {
2290 return do_zip(s, a, false);
2291 }
2292
2293 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2294 {
2295 return do_zip(s, a, true);
2296 }
2297
2298 static gen_helper_gvec_3 * const uzp_fns[4] = {
2299 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2300 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2301 };
2302
2303 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2304 {
2305 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2306 }
2307
2308 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2309 {
2310 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2311 }
2312
2313 static gen_helper_gvec_3 * const trn_fns[4] = {
2314 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2315 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2316 };
2317
2318 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2319 {
2320 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2321 }
2322
2323 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2324 {
2325 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2326 }
2327
2328 /*
2329 *** SVE Permute Vector - Predicated Group
2330 */
2331
2332 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2333 {
2334 static gen_helper_gvec_3 * const fns[4] = {
2335 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2336 };
2337 return do_zpz_ool(s, a, fns[a->esz]);
2338 }
2339
2340 /* Call the helper that computes the ARM LastActiveElement pseudocode
2341 * function, scaled by the element size. This includes the not found
2342 * indication; e.g. not found for esz=3 is -8.
2343 */
2344 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2345 {
2346 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2347 * round up, as we do elsewhere, because we need the exact size.
2348 */
2349 TCGv_ptr t_p = tcg_temp_new_ptr();
2350 TCGv_i32 t_desc;
2351 unsigned vsz = pred_full_reg_size(s);
2352 unsigned desc;
2353
2354 desc = vsz - 2;
2355 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2356
2357 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2358 t_desc = tcg_const_i32(desc);
2359
2360 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2361
2362 tcg_temp_free_i32(t_desc);
2363 tcg_temp_free_ptr(t_p);
2364 }
2365
2366 /* Increment LAST to the offset of the next element in the vector,
2367 * wrapping around to 0.
2368 */
2369 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2370 {
2371 unsigned vsz = vec_full_reg_size(s);
2372
2373 tcg_gen_addi_i32(last, last, 1 << esz);
2374 if (is_power_of_2(vsz)) {
2375 tcg_gen_andi_i32(last, last, vsz - 1);
2376 } else {
2377 TCGv_i32 max = tcg_const_i32(vsz);
2378 TCGv_i32 zero = tcg_const_i32(0);
2379 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2380 tcg_temp_free_i32(max);
2381 tcg_temp_free_i32(zero);
2382 }
2383 }
2384
2385 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2386 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2387 {
2388 unsigned vsz = vec_full_reg_size(s);
2389
2390 if (is_power_of_2(vsz)) {
2391 tcg_gen_andi_i32(last, last, vsz - 1);
2392 } else {
2393 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2394 TCGv_i32 zero = tcg_const_i32(0);
2395 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2396 tcg_temp_free_i32(max);
2397 tcg_temp_free_i32(zero);
2398 }
2399 }
2400
2401 /* Load an unsigned element of ESZ from BASE+OFS. */
2402 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2403 {
2404 TCGv_i64 r = tcg_temp_new_i64();
2405
2406 switch (esz) {
2407 case 0:
2408 tcg_gen_ld8u_i64(r, base, ofs);
2409 break;
2410 case 1:
2411 tcg_gen_ld16u_i64(r, base, ofs);
2412 break;
2413 case 2:
2414 tcg_gen_ld32u_i64(r, base, ofs);
2415 break;
2416 case 3:
2417 tcg_gen_ld_i64(r, base, ofs);
2418 break;
2419 default:
2420 g_assert_not_reached();
2421 }
2422 return r;
2423 }
2424
2425 /* Load an unsigned element of ESZ from RM[LAST]. */
2426 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2427 int rm, int esz)
2428 {
2429 TCGv_ptr p = tcg_temp_new_ptr();
2430 TCGv_i64 r;
2431
2432 /* Convert offset into vector into offset into ENV.
2433 * The final adjustment for the vector register base
2434 * is added via constant offset to the load.
2435 */
2436 #ifdef HOST_WORDS_BIGENDIAN
2437 /* Adjust for element ordering. See vec_reg_offset. */
2438 if (esz < 3) {
2439 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2440 }
2441 #endif
2442 tcg_gen_ext_i32_ptr(p, last);
2443 tcg_gen_add_ptr(p, p, cpu_env);
2444
2445 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2446 tcg_temp_free_ptr(p);
2447
2448 return r;
2449 }
2450
2451 /* Compute CLAST for a Zreg. */
2452 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2453 {
2454 TCGv_i32 last;
2455 TCGLabel *over;
2456 TCGv_i64 ele;
2457 unsigned vsz, esz = a->esz;
2458
2459 if (!sve_access_check(s)) {
2460 return true;
2461 }
2462
2463 last = tcg_temp_local_new_i32();
2464 over = gen_new_label();
2465
2466 find_last_active(s, last, esz, a->pg);
2467
2468 /* There is of course no movcond for a 2048-bit vector,
2469 * so we must branch over the actual store.
2470 */
2471 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2472
2473 if (!before) {
2474 incr_last_active(s, last, esz);
2475 }
2476
2477 ele = load_last_active(s, last, a->rm, esz);
2478 tcg_temp_free_i32(last);
2479
2480 vsz = vec_full_reg_size(s);
2481 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2482 tcg_temp_free_i64(ele);
2483
2484 /* If this insn used MOVPRFX, we may need a second move. */
2485 if (a->rd != a->rn) {
2486 TCGLabel *done = gen_new_label();
2487 tcg_gen_br(done);
2488
2489 gen_set_label(over);
2490 do_mov_z(s, a->rd, a->rn);
2491
2492 gen_set_label(done);
2493 } else {
2494 gen_set_label(over);
2495 }
2496 return true;
2497 }
2498
2499 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2500 {
2501 return do_clast_vector(s, a, false);
2502 }
2503
2504 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2505 {
2506 return do_clast_vector(s, a, true);
2507 }
2508
2509 /* Compute CLAST for a scalar. */
2510 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2511 bool before, TCGv_i64 reg_val)
2512 {
2513 TCGv_i32 last = tcg_temp_new_i32();
2514 TCGv_i64 ele, cmp, zero;
2515
2516 find_last_active(s, last, esz, pg);
2517
2518 /* Extend the original value of last prior to incrementing. */
2519 cmp = tcg_temp_new_i64();
2520 tcg_gen_ext_i32_i64(cmp, last);
2521
2522 if (!before) {
2523 incr_last_active(s, last, esz);
2524 }
2525
2526 /* The conceit here is that while last < 0 indicates not found, after
2527 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2528 * from which we can load garbage. We then discard the garbage with
2529 * a conditional move.
2530 */
2531 ele = load_last_active(s, last, rm, esz);
2532 tcg_temp_free_i32(last);
2533
2534 zero = tcg_const_i64(0);
2535 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2536
2537 tcg_temp_free_i64(zero);
2538 tcg_temp_free_i64(cmp);
2539 tcg_temp_free_i64(ele);
2540 }
2541
2542 /* Compute CLAST for a Vreg. */
2543 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2544 {
2545 if (sve_access_check(s)) {
2546 int esz = a->esz;
2547 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2548 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2549
2550 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2551 write_fp_dreg(s, a->rd, reg);
2552 tcg_temp_free_i64(reg);
2553 }
2554 return true;
2555 }
2556
2557 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2558 {
2559 return do_clast_fp(s, a, false);
2560 }
2561
2562 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2563 {
2564 return do_clast_fp(s, a, true);
2565 }
2566
2567 /* Compute CLAST for a Xreg. */
2568 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2569 {
2570 TCGv_i64 reg;
2571
2572 if (!sve_access_check(s)) {
2573 return true;
2574 }
2575
2576 reg = cpu_reg(s, a->rd);
2577 switch (a->esz) {
2578 case 0:
2579 tcg_gen_ext8u_i64(reg, reg);
2580 break;
2581 case 1:
2582 tcg_gen_ext16u_i64(reg, reg);
2583 break;
2584 case 2:
2585 tcg_gen_ext32u_i64(reg, reg);
2586 break;
2587 case 3:
2588 break;
2589 default:
2590 g_assert_not_reached();
2591 }
2592
2593 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2594 return true;
2595 }
2596
2597 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2598 {
2599 return do_clast_general(s, a, false);
2600 }
2601
2602 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2603 {
2604 return do_clast_general(s, a, true);
2605 }
2606
2607 /* Compute LAST for a scalar. */
2608 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2609 int pg, int rm, bool before)
2610 {
2611 TCGv_i32 last = tcg_temp_new_i32();
2612 TCGv_i64 ret;
2613
2614 find_last_active(s, last, esz, pg);
2615 if (before) {
2616 wrap_last_active(s, last, esz);
2617 } else {
2618 incr_last_active(s, last, esz);
2619 }
2620
2621 ret = load_last_active(s, last, rm, esz);
2622 tcg_temp_free_i32(last);
2623 return ret;
2624 }
2625
2626 /* Compute LAST for a Vreg. */
2627 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2628 {
2629 if (sve_access_check(s)) {
2630 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2631 write_fp_dreg(s, a->rd, val);
2632 tcg_temp_free_i64(val);
2633 }
2634 return true;
2635 }
2636
2637 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2638 {
2639 return do_last_fp(s, a, false);
2640 }
2641
2642 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2643 {
2644 return do_last_fp(s, a, true);
2645 }
2646
2647 /* Compute LAST for a Xreg. */
2648 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2649 {
2650 if (sve_access_check(s)) {
2651 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2652 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2653 tcg_temp_free_i64(val);
2654 }
2655 return true;
2656 }
2657
2658 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2659 {
2660 return do_last_general(s, a, false);
2661 }
2662
2663 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2664 {
2665 return do_last_general(s, a, true);
2666 }
2667
2668 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2669 {
2670 if (sve_access_check(s)) {
2671 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2672 }
2673 return true;
2674 }
2675
2676 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2677 {
2678 if (sve_access_check(s)) {
2679 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2680 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2681 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2682 tcg_temp_free_i64(t);
2683 }
2684 return true;
2685 }
2686
2687 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2688 {
2689 static gen_helper_gvec_3 * const fns[4] = {
2690 NULL,
2691 gen_helper_sve_revb_h,
2692 gen_helper_sve_revb_s,
2693 gen_helper_sve_revb_d,
2694 };
2695 return do_zpz_ool(s, a, fns[a->esz]);
2696 }
2697
2698 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2699 {
2700 static gen_helper_gvec_3 * const fns[4] = {
2701 NULL,
2702 NULL,
2703 gen_helper_sve_revh_s,
2704 gen_helper_sve_revh_d,
2705 };
2706 return do_zpz_ool(s, a, fns[a->esz]);
2707 }
2708
2709 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2710 {
2711 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2712 }
2713
2714 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2715 {
2716 static gen_helper_gvec_3 * const fns[4] = {
2717 gen_helper_sve_rbit_b,
2718 gen_helper_sve_rbit_h,
2719 gen_helper_sve_rbit_s,
2720 gen_helper_sve_rbit_d,
2721 };
2722 return do_zpz_ool(s, a, fns[a->esz]);
2723 }
2724
2725 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2726 {
2727 if (sve_access_check(s)) {
2728 unsigned vsz = vec_full_reg_size(s);
2729 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2730 vec_full_reg_offset(s, a->rn),
2731 vec_full_reg_offset(s, a->rm),
2732 pred_full_reg_offset(s, a->pg),
2733 vsz, vsz, a->esz, gen_helper_sve_splice);
2734 }
2735 return true;
2736 }
2737
2738 /*
2739 *** SVE Integer Compare - Vectors Group
2740 */
2741
2742 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2743 gen_helper_gvec_flags_4 *gen_fn)
2744 {
2745 TCGv_ptr pd, zn, zm, pg;
2746 unsigned vsz;
2747 TCGv_i32 t;
2748
2749 if (gen_fn == NULL) {
2750 return false;
2751 }
2752 if (!sve_access_check(s)) {
2753 return true;
2754 }
2755
2756 vsz = vec_full_reg_size(s);
2757 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2758 pd = tcg_temp_new_ptr();
2759 zn = tcg_temp_new_ptr();
2760 zm = tcg_temp_new_ptr();
2761 pg = tcg_temp_new_ptr();
2762
2763 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2764 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2765 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2766 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2767
2768 gen_fn(t, pd, zn, zm, pg, t);
2769
2770 tcg_temp_free_ptr(pd);
2771 tcg_temp_free_ptr(zn);
2772 tcg_temp_free_ptr(zm);
2773 tcg_temp_free_ptr(pg);
2774
2775 do_pred_flags(t);
2776
2777 tcg_temp_free_i32(t);
2778 return true;
2779 }
2780
2781 #define DO_PPZZ(NAME, name) \
2782 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2783 uint32_t insn) \
2784 { \
2785 static gen_helper_gvec_flags_4 * const fns[4] = { \
2786 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2787 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2788 }; \
2789 return do_ppzz_flags(s, a, fns[a->esz]); \
2790 }
2791
2792 DO_PPZZ(CMPEQ, cmpeq)
2793 DO_PPZZ(CMPNE, cmpne)
2794 DO_PPZZ(CMPGT, cmpgt)
2795 DO_PPZZ(CMPGE, cmpge)
2796 DO_PPZZ(CMPHI, cmphi)
2797 DO_PPZZ(CMPHS, cmphs)
2798
2799 #undef DO_PPZZ
2800
2801 #define DO_PPZW(NAME, name) \
2802 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2803 uint32_t insn) \
2804 { \
2805 static gen_helper_gvec_flags_4 * const fns[4] = { \
2806 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2807 gen_helper_sve_##name##_ppzw_s, NULL \
2808 }; \
2809 return do_ppzz_flags(s, a, fns[a->esz]); \
2810 }
2811
2812 DO_PPZW(CMPEQ, cmpeq)
2813 DO_PPZW(CMPNE, cmpne)
2814 DO_PPZW(CMPGT, cmpgt)
2815 DO_PPZW(CMPGE, cmpge)
2816 DO_PPZW(CMPHI, cmphi)
2817 DO_PPZW(CMPHS, cmphs)
2818 DO_PPZW(CMPLT, cmplt)
2819 DO_PPZW(CMPLE, cmple)
2820 DO_PPZW(CMPLO, cmplo)
2821 DO_PPZW(CMPLS, cmpls)
2822
2823 #undef DO_PPZW
2824
2825 /*
2826 *** SVE Integer Compare - Immediate Groups
2827 */
2828
2829 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2830 gen_helper_gvec_flags_3 *gen_fn)
2831 {
2832 TCGv_ptr pd, zn, pg;
2833 unsigned vsz;
2834 TCGv_i32 t;
2835
2836 if (gen_fn == NULL) {
2837 return false;
2838 }
2839 if (!sve_access_check(s)) {
2840 return true;
2841 }
2842
2843 vsz = vec_full_reg_size(s);
2844 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2845 pd = tcg_temp_new_ptr();
2846 zn = tcg_temp_new_ptr();
2847 pg = tcg_temp_new_ptr();
2848
2849 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2850 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2851 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2852
2853 gen_fn(t, pd, zn, pg, t);
2854
2855 tcg_temp_free_ptr(pd);
2856 tcg_temp_free_ptr(zn);
2857 tcg_temp_free_ptr(pg);
2858
2859 do_pred_flags(t);
2860
2861 tcg_temp_free_i32(t);
2862 return true;
2863 }
2864
2865 #define DO_PPZI(NAME, name) \
2866 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2867 uint32_t insn) \
2868 { \
2869 static gen_helper_gvec_flags_3 * const fns[4] = { \
2870 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2871 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2872 }; \
2873 return do_ppzi_flags(s, a, fns[a->esz]); \
2874 }
2875
2876 DO_PPZI(CMPEQ, cmpeq)
2877 DO_PPZI(CMPNE, cmpne)
2878 DO_PPZI(CMPGT, cmpgt)
2879 DO_PPZI(CMPGE, cmpge)
2880 DO_PPZI(CMPHI, cmphi)
2881 DO_PPZI(CMPHS, cmphs)
2882 DO_PPZI(CMPLT, cmplt)
2883 DO_PPZI(CMPLE, cmple)
2884 DO_PPZI(CMPLO, cmplo)
2885 DO_PPZI(CMPLS, cmpls)
2886
2887 #undef DO_PPZI
2888
2889 /*
2890 *** SVE Partition Break Group
2891 */
2892
2893 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2894 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2895 {
2896 if (!sve_access_check(s)) {
2897 return true;
2898 }
2899
2900 unsigned vsz = pred_full_reg_size(s);
2901
2902 /* Predicate sizes may be smaller and cannot use simd_desc. */
2903 TCGv_ptr d = tcg_temp_new_ptr();
2904 TCGv_ptr n = tcg_temp_new_ptr();
2905 TCGv_ptr m = tcg_temp_new_ptr();
2906 TCGv_ptr g = tcg_temp_new_ptr();
2907 TCGv_i32 t = tcg_const_i32(vsz - 2);
2908
2909 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2910 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2911 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2912 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2913
2914 if (a->s) {
2915 fn_s(t, d, n, m, g, t);
2916 do_pred_flags(t);
2917 } else {
2918 fn(d, n, m, g, t);
2919 }
2920 tcg_temp_free_ptr(d);
2921 tcg_temp_free_ptr(n);
2922 tcg_temp_free_ptr(m);
2923 tcg_temp_free_ptr(g);
2924 tcg_temp_free_i32(t);
2925 return true;
2926 }
2927
2928 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2929 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2930 {
2931 if (!sve_access_check(s)) {
2932 return true;
2933 }
2934
2935 unsigned vsz = pred_full_reg_size(s);
2936
2937 /* Predicate sizes may be smaller and cannot use simd_desc. */
2938 TCGv_ptr d = tcg_temp_new_ptr();
2939 TCGv_ptr n = tcg_temp_new_ptr();
2940 TCGv_ptr g = tcg_temp_new_ptr();
2941 TCGv_i32 t = tcg_const_i32(vsz - 2);
2942
2943 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2944 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2945 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2946
2947 if (a->s) {
2948 fn_s(t, d, n, g, t);
2949 do_pred_flags(t);
2950 } else {
2951 fn(d, n, g, t);
2952 }
2953 tcg_temp_free_ptr(d);
2954 tcg_temp_free_ptr(n);
2955 tcg_temp_free_ptr(g);
2956 tcg_temp_free_i32(t);
2957 return true;
2958 }
2959
2960 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2961 {
2962 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2963 }
2964
2965 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2966 {
2967 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2968 }
2969
2970 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2971 {
2972 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2973 }
2974
2975 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2976 {
2977 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2978 }
2979
2980 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2981 {
2982 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2983 }
2984
2985 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2986 {
2987 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2988 }
2989
2990 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2991 {
2992 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2993 }
2994
2995 /*
2996 *** SVE Predicate Count Group
2997 */
2998
2999 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3000 {
3001 unsigned psz = pred_full_reg_size(s);
3002
3003 if (psz <= 8) {
3004 uint64_t psz_mask;
3005
3006 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3007 if (pn != pg) {
3008 TCGv_i64 g = tcg_temp_new_i64();
3009 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3010 tcg_gen_and_i64(val, val, g);
3011 tcg_temp_free_i64(g);
3012 }
3013
3014 /* Reduce the pred_esz_masks value simply to reduce the
3015 * size of the code generated here.
3016 */
3017 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3018 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3019
3020 tcg_gen_ctpop_i64(val, val);
3021 } else {
3022 TCGv_ptr t_pn = tcg_temp_new_ptr();
3023 TCGv_ptr t_pg = tcg_temp_new_ptr();
3024 unsigned desc;
3025 TCGv_i32 t_desc;
3026
3027 desc = psz - 2;
3028 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3029
3030 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3031 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3032 t_desc = tcg_const_i32(desc);
3033
3034 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3035 tcg_temp_free_ptr(t_pn);
3036 tcg_temp_free_ptr(t_pg);
3037 tcg_temp_free_i32(t_desc);
3038 }
3039 }
3040
3041 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3042 {
3043 if (sve_access_check(s)) {
3044 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3045 }
3046 return true;
3047 }
3048
3049 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3050 uint32_t insn)
3051 {
3052 if (sve_access_check(s)) {
3053 TCGv_i64 reg = cpu_reg(s, a->rd);
3054 TCGv_i64 val = tcg_temp_new_i64();
3055
3056 do_cntp(s, val, a->esz, a->pg, a->pg);
3057 if (a->d) {
3058 tcg_gen_sub_i64(reg, reg, val);
3059 } else {
3060 tcg_gen_add_i64(reg, reg, val);
3061 }
3062 tcg_temp_free_i64(val);
3063 }
3064 return true;
3065 }
3066
3067 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3068 uint32_t insn)
3069 {
3070 if (a->esz == 0) {
3071 return false;
3072 }
3073 if (sve_access_check(s)) {
3074 unsigned vsz = vec_full_reg_size(s);
3075 TCGv_i64 val = tcg_temp_new_i64();
3076 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3077
3078 do_cntp(s, val, a->esz, a->pg, a->pg);
3079 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3080 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3081 }
3082 return true;
3083 }
3084
3085 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3086 uint32_t insn)
3087 {
3088 if (sve_access_check(s)) {
3089 TCGv_i64 reg = cpu_reg(s, a->rd);
3090 TCGv_i64 val = tcg_temp_new_i64();
3091
3092 do_cntp(s, val, a->esz, a->pg, a->pg);
3093 do_sat_addsub_32(reg, val, a->u, a->d);
3094 }
3095 return true;
3096 }
3097
3098 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3099 uint32_t insn)
3100 {
3101 if (sve_access_check(s)) {
3102 TCGv_i64 reg = cpu_reg(s, a->rd);
3103 TCGv_i64 val = tcg_temp_new_i64();
3104
3105 do_cntp(s, val, a->esz, a->pg, a->pg);
3106 do_sat_addsub_64(reg, val, a->u, a->d);
3107 }
3108 return true;
3109 }
3110
3111 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3112 uint32_t insn)
3113 {
3114 if (a->esz == 0) {
3115 return false;
3116 }
3117 if (sve_access_check(s)) {
3118 TCGv_i64 val = tcg_temp_new_i64();
3119 do_cntp(s, val, a->esz, a->pg, a->pg);
3120 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3121 }
3122 return true;
3123 }
3124
3125 /*
3126 *** SVE Integer Compare Scalars Group
3127 */
3128
3129 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3130 {
3131 if (!sve_access_check(s)) {
3132 return true;
3133 }
3134
3135 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3136 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3137 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3138 TCGv_i64 cmp = tcg_temp_new_i64();
3139
3140 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3141 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3142 tcg_temp_free_i64(cmp);
3143
3144 /* VF = !NF & !CF. */
3145 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3146 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3147
3148 /* Both NF and VF actually look at bit 31. */
3149 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3150 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3151 return true;
3152 }
3153
3154 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3155 {
3156 if (!sve_access_check(s)) {
3157 return true;
3158 }
3159
3160 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3161 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3162 TCGv_i64 t0 = tcg_temp_new_i64();
3163 TCGv_i64 t1 = tcg_temp_new_i64();
3164 TCGv_i32 t2, t3;
3165 TCGv_ptr ptr;
3166 unsigned desc, vsz = vec_full_reg_size(s);
3167 TCGCond cond;
3168
3169 if (!a->sf) {
3170 if (a->u) {
3171 tcg_gen_ext32u_i64(op0, op0);
3172 tcg_gen_ext32u_i64(op1, op1);
3173 } else {
3174 tcg_gen_ext32s_i64(op0, op0);
3175 tcg_gen_ext32s_i64(op1, op1);
3176 }
3177 }
3178
3179 /* For the helper, compress the different conditions into a computation
3180 * of how many iterations for which the condition is true.
3181 *
3182 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3183 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3184 * aren't that large, so any value >= predicate size is sufficient.
3185 */