target/arm: Implement SVE gather loads
[qemu.git] / target / arm / translate-sve.c
1 /*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg-op.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35
36
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
39
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
41 TCGv_ptr, TCGv_i32);
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
44
45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
46 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
48
49 /*
50 * Helpers for extracting complex instruction fields.
51 */
52
53 /* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
55 */
56 static int tszimm_esz(int x)
57 {
58 x >>= 3; /* discard imm3 */
59 return 31 - clz32(x);
60 }
61
62 static int tszimm_shr(int x)
63 {
64 return (16 << tszimm_esz(x)) - x;
65 }
66
67 /* See e.g. LSL (immediate, predicated). */
68 static int tszimm_shl(int x)
69 {
70 return x - (8 << tszimm_esz(x));
71 }
72
73 static inline int plus1(int x)
74 {
75 return x + 1;
76 }
77
78 /* The SH bit is in bit 8. Extract the low 8 and shift. */
79 static inline int expand_imm_sh8s(int x)
80 {
81 return (int8_t)x << (x & 0x100 ? 8 : 0);
82 }
83
84 static inline int expand_imm_sh8u(int x)
85 {
86 return (uint8_t)x << (x & 0x100 ? 8 : 0);
87 }
88
89 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
90 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
91 */
92 static inline int msz_dtype(int msz)
93 {
94 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
95 return dtype[msz];
96 }
97
98 /*
99 * Include the generated decoder.
100 */
101
102 #include "decode-sve.inc.c"
103
104 /*
105 * Implement all of the translator functions referenced by the decoder.
106 */
107
108 /* Return the offset info CPUARMState of the predicate vector register Pn.
109 * Note for this purpose, FFR is P16.
110 */
111 static inline int pred_full_reg_offset(DisasContext *s, int regno)
112 {
113 return offsetof(CPUARMState, vfp.pregs[regno]);
114 }
115
116 /* Return the byte size of the whole predicate register, VL / 64. */
117 static inline int pred_full_reg_size(DisasContext *s)
118 {
119 return s->sve_len >> 3;
120 }
121
122 /* Round up the size of a register to a size allowed by
123 * the tcg vector infrastructure. Any operation which uses this
124 * size may assume that the bits above pred_full_reg_size are zero,
125 * and must leave them the same way.
126 *
127 * Note that this is not needed for the vector registers as they
128 * are always properly sized for tcg vectors.
129 */
130 static int size_for_gvec(int size)
131 {
132 if (size <= 8) {
133 return 8;
134 } else {
135 return QEMU_ALIGN_UP(size, 16);
136 }
137 }
138
139 static int pred_gvec_reg_size(DisasContext *s)
140 {
141 return size_for_gvec(pred_full_reg_size(s));
142 }
143
144 /* Invoke a vector expander on two Zregs. */
145 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
146 int esz, int rd, int rn)
147 {
148 if (sve_access_check(s)) {
149 unsigned vsz = vec_full_reg_size(s);
150 gvec_fn(esz, vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn), vsz, vsz);
152 }
153 return true;
154 }
155
156 /* Invoke a vector expander on three Zregs. */
157 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
158 int esz, int rd, int rn, int rm)
159 {
160 if (sve_access_check(s)) {
161 unsigned vsz = vec_full_reg_size(s);
162 gvec_fn(esz, vec_full_reg_offset(s, rd),
163 vec_full_reg_offset(s, rn),
164 vec_full_reg_offset(s, rm), vsz, vsz);
165 }
166 return true;
167 }
168
169 /* Invoke a vector move on two Zregs. */
170 static bool do_mov_z(DisasContext *s, int rd, int rn)
171 {
172 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
173 }
174
175 /* Initialize a Zreg with replications of a 64-bit immediate. */
176 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
177 {
178 unsigned vsz = vec_full_reg_size(s);
179 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
180 }
181
182 /* Invoke a vector expander on two Pregs. */
183 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
184 int esz, int rd, int rn)
185 {
186 if (sve_access_check(s)) {
187 unsigned psz = pred_gvec_reg_size(s);
188 gvec_fn(esz, pred_full_reg_offset(s, rd),
189 pred_full_reg_offset(s, rn), psz, psz);
190 }
191 return true;
192 }
193
194 /* Invoke a vector expander on three Pregs. */
195 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
196 int esz, int rd, int rn, int rm)
197 {
198 if (sve_access_check(s)) {
199 unsigned psz = pred_gvec_reg_size(s);
200 gvec_fn(esz, pred_full_reg_offset(s, rd),
201 pred_full_reg_offset(s, rn),
202 pred_full_reg_offset(s, rm), psz, psz);
203 }
204 return true;
205 }
206
207 /* Invoke a vector operation on four Pregs. */
208 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
209 int rd, int rn, int rm, int rg)
210 {
211 if (sve_access_check(s)) {
212 unsigned psz = pred_gvec_reg_size(s);
213 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
214 pred_full_reg_offset(s, rn),
215 pred_full_reg_offset(s, rm),
216 pred_full_reg_offset(s, rg),
217 psz, psz, gvec_op);
218 }
219 return true;
220 }
221
222 /* Invoke a vector move on two Pregs. */
223 static bool do_mov_p(DisasContext *s, int rd, int rn)
224 {
225 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
226 }
227
228 /* Set the cpu flags as per a return from an SVE helper. */
229 static void do_pred_flags(TCGv_i32 t)
230 {
231 tcg_gen_mov_i32(cpu_NF, t);
232 tcg_gen_andi_i32(cpu_ZF, t, 2);
233 tcg_gen_andi_i32(cpu_CF, t, 1);
234 tcg_gen_movi_i32(cpu_VF, 0);
235 }
236
237 /* Subroutines computing the ARM PredTest psuedofunction. */
238 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
239 {
240 TCGv_i32 t = tcg_temp_new_i32();
241
242 gen_helper_sve_predtest1(t, d, g);
243 do_pred_flags(t);
244 tcg_temp_free_i32(t);
245 }
246
247 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
248 {
249 TCGv_ptr dptr = tcg_temp_new_ptr();
250 TCGv_ptr gptr = tcg_temp_new_ptr();
251 TCGv_i32 t;
252
253 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
254 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
255 t = tcg_const_i32(words);
256
257 gen_helper_sve_predtest(t, dptr, gptr, t);
258 tcg_temp_free_ptr(dptr);
259 tcg_temp_free_ptr(gptr);
260
261 do_pred_flags(t);
262 tcg_temp_free_i32(t);
263 }
264
265 /* For each element size, the bits within a predicate word that are active. */
266 const uint64_t pred_esz_masks[4] = {
267 0xffffffffffffffffull, 0x5555555555555555ull,
268 0x1111111111111111ull, 0x0101010101010101ull
269 };
270
271 /*
272 *** SVE Logical - Unpredicated Group
273 */
274
275 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
276 {
277 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
278 }
279
280 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
281 {
282 if (a->rn == a->rm) { /* MOV */
283 return do_mov_z(s, a->rd, a->rn);
284 } else {
285 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
286 }
287 }
288
289 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
290 {
291 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
292 }
293
294 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
295 {
296 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
297 }
298
299 /*
300 *** SVE Integer Arithmetic - Unpredicated Group
301 */
302
303 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
304 {
305 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
306 }
307
308 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
309 {
310 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
311 }
312
313 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
314 {
315 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
316 }
317
318 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
319 {
320 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
321 }
322
323 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
324 {
325 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
326 }
327
328 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
329 {
330 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
331 }
332
333 /*
334 *** SVE Integer Arithmetic - Binary Predicated Group
335 */
336
337 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
338 {
339 unsigned vsz = vec_full_reg_size(s);
340 if (fn == NULL) {
341 return false;
342 }
343 if (sve_access_check(s)) {
344 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
345 vec_full_reg_offset(s, a->rn),
346 vec_full_reg_offset(s, a->rm),
347 pred_full_reg_offset(s, a->pg),
348 vsz, vsz, 0, fn);
349 }
350 return true;
351 }
352
353 #define DO_ZPZZ(NAME, name) \
354 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
355 uint32_t insn) \
356 { \
357 static gen_helper_gvec_4 * const fns[4] = { \
358 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
359 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
360 }; \
361 return do_zpzz_ool(s, a, fns[a->esz]); \
362 }
363
364 DO_ZPZZ(AND, and)
365 DO_ZPZZ(EOR, eor)
366 DO_ZPZZ(ORR, orr)
367 DO_ZPZZ(BIC, bic)
368
369 DO_ZPZZ(ADD, add)
370 DO_ZPZZ(SUB, sub)
371
372 DO_ZPZZ(SMAX, smax)
373 DO_ZPZZ(UMAX, umax)
374 DO_ZPZZ(SMIN, smin)
375 DO_ZPZZ(UMIN, umin)
376 DO_ZPZZ(SABD, sabd)
377 DO_ZPZZ(UABD, uabd)
378
379 DO_ZPZZ(MUL, mul)
380 DO_ZPZZ(SMULH, smulh)
381 DO_ZPZZ(UMULH, umulh)
382
383 DO_ZPZZ(ASR, asr)
384 DO_ZPZZ(LSR, lsr)
385 DO_ZPZZ(LSL, lsl)
386
387 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
388 {
389 static gen_helper_gvec_4 * const fns[4] = {
390 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
391 };
392 return do_zpzz_ool(s, a, fns[a->esz]);
393 }
394
395 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
396 {
397 static gen_helper_gvec_4 * const fns[4] = {
398 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
399 };
400 return do_zpzz_ool(s, a, fns[a->esz]);
401 }
402
403 DO_ZPZZ(SEL, sel)
404
405 #undef DO_ZPZZ
406
407 /*
408 *** SVE Integer Arithmetic - Unary Predicated Group
409 */
410
411 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
412 {
413 if (fn == NULL) {
414 return false;
415 }
416 if (sve_access_check(s)) {
417 unsigned vsz = vec_full_reg_size(s);
418 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
419 vec_full_reg_offset(s, a->rn),
420 pred_full_reg_offset(s, a->pg),
421 vsz, vsz, 0, fn);
422 }
423 return true;
424 }
425
426 #define DO_ZPZ(NAME, name) \
427 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
428 { \
429 static gen_helper_gvec_3 * const fns[4] = { \
430 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
431 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
432 }; \
433 return do_zpz_ool(s, a, fns[a->esz]); \
434 }
435
436 DO_ZPZ(CLS, cls)
437 DO_ZPZ(CLZ, clz)
438 DO_ZPZ(CNT_zpz, cnt_zpz)
439 DO_ZPZ(CNOT, cnot)
440 DO_ZPZ(NOT_zpz, not_zpz)
441 DO_ZPZ(ABS, abs)
442 DO_ZPZ(NEG, neg)
443
444 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
445 {
446 static gen_helper_gvec_3 * const fns[4] = {
447 NULL,
448 gen_helper_sve_fabs_h,
449 gen_helper_sve_fabs_s,
450 gen_helper_sve_fabs_d
451 };
452 return do_zpz_ool(s, a, fns[a->esz]);
453 }
454
455 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
456 {
457 static gen_helper_gvec_3 * const fns[4] = {
458 NULL,
459 gen_helper_sve_fneg_h,
460 gen_helper_sve_fneg_s,
461 gen_helper_sve_fneg_d
462 };
463 return do_zpz_ool(s, a, fns[a->esz]);
464 }
465
466 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
467 {
468 static gen_helper_gvec_3 * const fns[4] = {
469 NULL,
470 gen_helper_sve_sxtb_h,
471 gen_helper_sve_sxtb_s,
472 gen_helper_sve_sxtb_d
473 };
474 return do_zpz_ool(s, a, fns[a->esz]);
475 }
476
477 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
478 {
479 static gen_helper_gvec_3 * const fns[4] = {
480 NULL,
481 gen_helper_sve_uxtb_h,
482 gen_helper_sve_uxtb_s,
483 gen_helper_sve_uxtb_d
484 };
485 return do_zpz_ool(s, a, fns[a->esz]);
486 }
487
488 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
489 {
490 static gen_helper_gvec_3 * const fns[4] = {
491 NULL, NULL,
492 gen_helper_sve_sxth_s,
493 gen_helper_sve_sxth_d
494 };
495 return do_zpz_ool(s, a, fns[a->esz]);
496 }
497
498 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
499 {
500 static gen_helper_gvec_3 * const fns[4] = {
501 NULL, NULL,
502 gen_helper_sve_uxth_s,
503 gen_helper_sve_uxth_d
504 };
505 return do_zpz_ool(s, a, fns[a->esz]);
506 }
507
508 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
509 {
510 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
511 }
512
513 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
514 {
515 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
516 }
517
518 #undef DO_ZPZ
519
520 /*
521 *** SVE Integer Reduction Group
522 */
523
524 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
525 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
526 gen_helper_gvec_reduc *fn)
527 {
528 unsigned vsz = vec_full_reg_size(s);
529 TCGv_ptr t_zn, t_pg;
530 TCGv_i32 desc;
531 TCGv_i64 temp;
532
533 if (fn == NULL) {
534 return false;
535 }
536 if (!sve_access_check(s)) {
537 return true;
538 }
539
540 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
541 temp = tcg_temp_new_i64();
542 t_zn = tcg_temp_new_ptr();
543 t_pg = tcg_temp_new_ptr();
544
545 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
546 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
547 fn(temp, t_zn, t_pg, desc);
548 tcg_temp_free_ptr(t_zn);
549 tcg_temp_free_ptr(t_pg);
550 tcg_temp_free_i32(desc);
551
552 write_fp_dreg(s, a->rd, temp);
553 tcg_temp_free_i64(temp);
554 return true;
555 }
556
557 #define DO_VPZ(NAME, name) \
558 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
559 { \
560 static gen_helper_gvec_reduc * const fns[4] = { \
561 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
562 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
563 }; \
564 return do_vpz_ool(s, a, fns[a->esz]); \
565 }
566
567 DO_VPZ(ORV, orv)
568 DO_VPZ(ANDV, andv)
569 DO_VPZ(EORV, eorv)
570
571 DO_VPZ(UADDV, uaddv)
572 DO_VPZ(SMAXV, smaxv)
573 DO_VPZ(UMAXV, umaxv)
574 DO_VPZ(SMINV, sminv)
575 DO_VPZ(UMINV, uminv)
576
577 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
578 {
579 static gen_helper_gvec_reduc * const fns[4] = {
580 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
581 gen_helper_sve_saddv_s, NULL
582 };
583 return do_vpz_ool(s, a, fns[a->esz]);
584 }
585
586 #undef DO_VPZ
587
588 /*
589 *** SVE Shift by Immediate - Predicated Group
590 */
591
592 /* Store zero into every active element of Zd. We will use this for two
593 * and three-operand predicated instructions for which logic dictates a
594 * zero result.
595 */
596 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
597 {
598 static gen_helper_gvec_2 * const fns[4] = {
599 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
600 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
601 };
602 if (sve_access_check(s)) {
603 unsigned vsz = vec_full_reg_size(s);
604 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
605 pred_full_reg_offset(s, pg),
606 vsz, vsz, 0, fns[esz]);
607 }
608 return true;
609 }
610
611 /* Copy Zn into Zd, storing zeros into inactive elements. */
612 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
613 {
614 static gen_helper_gvec_3 * const fns[4] = {
615 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
616 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
617 };
618 unsigned vsz = vec_full_reg_size(s);
619 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
620 vec_full_reg_offset(s, rn),
621 pred_full_reg_offset(s, pg),
622 vsz, vsz, 0, fns[esz]);
623 }
624
625 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
626 gen_helper_gvec_3 *fn)
627 {
628 if (sve_access_check(s)) {
629 unsigned vsz = vec_full_reg_size(s);
630 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
631 vec_full_reg_offset(s, a->rn),
632 pred_full_reg_offset(s, a->pg),
633 vsz, vsz, a->imm, fn);
634 }
635 return true;
636 }
637
638 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
639 {
640 static gen_helper_gvec_3 * const fns[4] = {
641 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
642 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
643 };
644 if (a->esz < 0) {
645 /* Invalid tsz encoding -- see tszimm_esz. */
646 return false;
647 }
648 /* Shift by element size is architecturally valid. For
649 arithmetic right-shift, it's the same as by one less. */
650 a->imm = MIN(a->imm, (8 << a->esz) - 1);
651 return do_zpzi_ool(s, a, fns[a->esz]);
652 }
653
654 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
655 {
656 static gen_helper_gvec_3 * const fns[4] = {
657 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
658 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
659 };
660 if (a->esz < 0) {
661 return false;
662 }
663 /* Shift by element size is architecturally valid.
664 For logical shifts, it is a zeroing operation. */
665 if (a->imm >= (8 << a->esz)) {
666 return do_clr_zp(s, a->rd, a->pg, a->esz);
667 } else {
668 return do_zpzi_ool(s, a, fns[a->esz]);
669 }
670 }
671
672 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
673 {
674 static gen_helper_gvec_3 * const fns[4] = {
675 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
676 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
677 };
678 if (a->esz < 0) {
679 return false;
680 }
681 /* Shift by element size is architecturally valid.
682 For logical shifts, it is a zeroing operation. */
683 if (a->imm >= (8 << a->esz)) {
684 return do_clr_zp(s, a->rd, a->pg, a->esz);
685 } else {
686 return do_zpzi_ool(s, a, fns[a->esz]);
687 }
688 }
689
690 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
691 {
692 static gen_helper_gvec_3 * const fns[4] = {
693 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
694 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
695 };
696 if (a->esz < 0) {
697 return false;
698 }
699 /* Shift by element size is architecturally valid. For arithmetic
700 right shift for division, it is a zeroing operation. */
701 if (a->imm >= (8 << a->esz)) {
702 return do_clr_zp(s, a->rd, a->pg, a->esz);
703 } else {
704 return do_zpzi_ool(s, a, fns[a->esz]);
705 }
706 }
707
708 /*
709 *** SVE Bitwise Shift - Predicated Group
710 */
711
712 #define DO_ZPZW(NAME, name) \
713 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
714 uint32_t insn) \
715 { \
716 static gen_helper_gvec_4 * const fns[3] = { \
717 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
718 gen_helper_sve_##name##_zpzw_s, \
719 }; \
720 if (a->esz < 0 || a->esz >= 3) { \
721 return false; \
722 } \
723 return do_zpzz_ool(s, a, fns[a->esz]); \
724 }
725
726 DO_ZPZW(ASR, asr)
727 DO_ZPZW(LSR, lsr)
728 DO_ZPZW(LSL, lsl)
729
730 #undef DO_ZPZW
731
732 /*
733 *** SVE Bitwise Shift - Unpredicated Group
734 */
735
736 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
737 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
738 int64_t, uint32_t, uint32_t))
739 {
740 if (a->esz < 0) {
741 /* Invalid tsz encoding -- see tszimm_esz. */
742 return false;
743 }
744 if (sve_access_check(s)) {
745 unsigned vsz = vec_full_reg_size(s);
746 /* Shift by element size is architecturally valid. For
747 arithmetic right-shift, it's the same as by one less.
748 Otherwise it is a zeroing operation. */
749 if (a->imm >= 8 << a->esz) {
750 if (asr) {
751 a->imm = (8 << a->esz) - 1;
752 } else {
753 do_dupi_z(s, a->rd, 0);
754 return true;
755 }
756 }
757 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
758 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
759 }
760 return true;
761 }
762
763 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
764 {
765 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
766 }
767
768 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
769 {
770 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
771 }
772
773 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
774 {
775 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
776 }
777
778 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
779 {
780 if (fn == NULL) {
781 return false;
782 }
783 if (sve_access_check(s)) {
784 unsigned vsz = vec_full_reg_size(s);
785 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
786 vec_full_reg_offset(s, a->rn),
787 vec_full_reg_offset(s, a->rm),
788 vsz, vsz, 0, fn);
789 }
790 return true;
791 }
792
793 #define DO_ZZW(NAME, name) \
794 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
795 uint32_t insn) \
796 { \
797 static gen_helper_gvec_3 * const fns[4] = { \
798 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
799 gen_helper_sve_##name##_zzw_s, NULL \
800 }; \
801 return do_zzw_ool(s, a, fns[a->esz]); \
802 }
803
804 DO_ZZW(ASR, asr)
805 DO_ZZW(LSR, lsr)
806 DO_ZZW(LSL, lsl)
807
808 #undef DO_ZZW
809
810 /*
811 *** SVE Integer Multiply-Add Group
812 */
813
814 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
815 gen_helper_gvec_5 *fn)
816 {
817 if (sve_access_check(s)) {
818 unsigned vsz = vec_full_reg_size(s);
819 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
820 vec_full_reg_offset(s, a->ra),
821 vec_full_reg_offset(s, a->rn),
822 vec_full_reg_offset(s, a->rm),
823 pred_full_reg_offset(s, a->pg),
824 vsz, vsz, 0, fn);
825 }
826 return true;
827 }
828
829 #define DO_ZPZZZ(NAME, name) \
830 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
831 { \
832 static gen_helper_gvec_5 * const fns[4] = { \
833 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
834 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
835 }; \
836 return do_zpzzz_ool(s, a, fns[a->esz]); \
837 }
838
839 DO_ZPZZZ(MLA, mla)
840 DO_ZPZZZ(MLS, mls)
841
842 #undef DO_ZPZZZ
843
844 /*
845 *** SVE Index Generation Group
846 */
847
848 static void do_index(DisasContext *s, int esz, int rd,
849 TCGv_i64 start, TCGv_i64 incr)
850 {
851 unsigned vsz = vec_full_reg_size(s);
852 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
853 TCGv_ptr t_zd = tcg_temp_new_ptr();
854
855 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
856 if (esz == 3) {
857 gen_helper_sve_index_d(t_zd, start, incr, desc);
858 } else {
859 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
860 static index_fn * const fns[3] = {
861 gen_helper_sve_index_b,
862 gen_helper_sve_index_h,
863 gen_helper_sve_index_s,
864 };
865 TCGv_i32 s32 = tcg_temp_new_i32();
866 TCGv_i32 i32 = tcg_temp_new_i32();
867
868 tcg_gen_extrl_i64_i32(s32, start);
869 tcg_gen_extrl_i64_i32(i32, incr);
870 fns[esz](t_zd, s32, i32, desc);
871
872 tcg_temp_free_i32(s32);
873 tcg_temp_free_i32(i32);
874 }
875 tcg_temp_free_ptr(t_zd);
876 tcg_temp_free_i32(desc);
877 }
878
879 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
880 {
881 if (sve_access_check(s)) {
882 TCGv_i64 start = tcg_const_i64(a->imm1);
883 TCGv_i64 incr = tcg_const_i64(a->imm2);
884 do_index(s, a->esz, a->rd, start, incr);
885 tcg_temp_free_i64(start);
886 tcg_temp_free_i64(incr);
887 }
888 return true;
889 }
890
891 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
892 {
893 if (sve_access_check(s)) {
894 TCGv_i64 start = tcg_const_i64(a->imm);
895 TCGv_i64 incr = cpu_reg(s, a->rm);
896 do_index(s, a->esz, a->rd, start, incr);
897 tcg_temp_free_i64(start);
898 }
899 return true;
900 }
901
902 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
903 {
904 if (sve_access_check(s)) {
905 TCGv_i64 start = cpu_reg(s, a->rn);
906 TCGv_i64 incr = tcg_const_i64(a->imm);
907 do_index(s, a->esz, a->rd, start, incr);
908 tcg_temp_free_i64(incr);
909 }
910 return true;
911 }
912
913 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
914 {
915 if (sve_access_check(s)) {
916 TCGv_i64 start = cpu_reg(s, a->rn);
917 TCGv_i64 incr = cpu_reg(s, a->rm);
918 do_index(s, a->esz, a->rd, start, incr);
919 }
920 return true;
921 }
922
923 /*
924 *** SVE Stack Allocation Group
925 */
926
927 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
928 {
929 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
930 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
931 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
932 return true;
933 }
934
935 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
936 {
937 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
938 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
939 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
940 return true;
941 }
942
943 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
944 {
945 TCGv_i64 reg = cpu_reg(s, a->rd);
946 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
947 return true;
948 }
949
950 /*
951 *** SVE Compute Vector Address Group
952 */
953
954 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
955 {
956 if (sve_access_check(s)) {
957 unsigned vsz = vec_full_reg_size(s);
958 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
959 vec_full_reg_offset(s, a->rn),
960 vec_full_reg_offset(s, a->rm),
961 vsz, vsz, a->imm, fn);
962 }
963 return true;
964 }
965
966 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
967 {
968 return do_adr(s, a, gen_helper_sve_adr_p32);
969 }
970
971 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
972 {
973 return do_adr(s, a, gen_helper_sve_adr_p64);
974 }
975
976 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
977 {
978 return do_adr(s, a, gen_helper_sve_adr_s32);
979 }
980
981 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
982 {
983 return do_adr(s, a, gen_helper_sve_adr_u32);
984 }
985
986 /*
987 *** SVE Integer Misc - Unpredicated Group
988 */
989
990 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
991 {
992 static gen_helper_gvec_2 * const fns[4] = {
993 NULL,
994 gen_helper_sve_fexpa_h,
995 gen_helper_sve_fexpa_s,
996 gen_helper_sve_fexpa_d,
997 };
998 if (a->esz == 0) {
999 return false;
1000 }
1001 if (sve_access_check(s)) {
1002 unsigned vsz = vec_full_reg_size(s);
1003 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1004 vec_full_reg_offset(s, a->rn),
1005 vsz, vsz, 0, fns[a->esz]);
1006 }
1007 return true;
1008 }
1009
1010 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1011 {
1012 static gen_helper_gvec_3 * const fns[4] = {
1013 NULL,
1014 gen_helper_sve_ftssel_h,
1015 gen_helper_sve_ftssel_s,
1016 gen_helper_sve_ftssel_d,
1017 };
1018 if (a->esz == 0) {
1019 return false;
1020 }
1021 if (sve_access_check(s)) {
1022 unsigned vsz = vec_full_reg_size(s);
1023 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1024 vec_full_reg_offset(s, a->rn),
1025 vec_full_reg_offset(s, a->rm),
1026 vsz, vsz, 0, fns[a->esz]);
1027 }
1028 return true;
1029 }
1030
1031 /*
1032 *** SVE Predicate Logical Operations Group
1033 */
1034
1035 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1036 const GVecGen4 *gvec_op)
1037 {
1038 if (!sve_access_check(s)) {
1039 return true;
1040 }
1041
1042 unsigned psz = pred_gvec_reg_size(s);
1043 int dofs = pred_full_reg_offset(s, a->rd);
1044 int nofs = pred_full_reg_offset(s, a->rn);
1045 int mofs = pred_full_reg_offset(s, a->rm);
1046 int gofs = pred_full_reg_offset(s, a->pg);
1047
1048 if (psz == 8) {
1049 /* Do the operation and the flags generation in temps. */
1050 TCGv_i64 pd = tcg_temp_new_i64();
1051 TCGv_i64 pn = tcg_temp_new_i64();
1052 TCGv_i64 pm = tcg_temp_new_i64();
1053 TCGv_i64 pg = tcg_temp_new_i64();
1054
1055 tcg_gen_ld_i64(pn, cpu_env, nofs);
1056 tcg_gen_ld_i64(pm, cpu_env, mofs);
1057 tcg_gen_ld_i64(pg, cpu_env, gofs);
1058
1059 gvec_op->fni8(pd, pn, pm, pg);
1060 tcg_gen_st_i64(pd, cpu_env, dofs);
1061
1062 do_predtest1(pd, pg);
1063
1064 tcg_temp_free_i64(pd);
1065 tcg_temp_free_i64(pn);
1066 tcg_temp_free_i64(pm);
1067 tcg_temp_free_i64(pg);
1068 } else {
1069 /* The operation and flags generation is large. The computation
1070 * of the flags depends on the original contents of the guarding
1071 * predicate. If the destination overwrites the guarding predicate,
1072 * then the easiest way to get this right is to save a copy.
1073 */
1074 int tofs = gofs;
1075 if (a->rd == a->pg) {
1076 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1077 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1078 }
1079
1080 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1081 do_predtest(s, dofs, tofs, psz / 8);
1082 }
1083 return true;
1084 }
1085
1086 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1087 {
1088 tcg_gen_and_i64(pd, pn, pm);
1089 tcg_gen_and_i64(pd, pd, pg);
1090 }
1091
1092 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1093 TCGv_vec pm, TCGv_vec pg)
1094 {
1095 tcg_gen_and_vec(vece, pd, pn, pm);
1096 tcg_gen_and_vec(vece, pd, pd, pg);
1097 }
1098
1099 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1100 {
1101 static const GVecGen4 op = {
1102 .fni8 = gen_and_pg_i64,
1103 .fniv = gen_and_pg_vec,
1104 .fno = gen_helper_sve_and_pppp,
1105 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1106 };
1107 if (a->s) {
1108 return do_pppp_flags(s, a, &op);
1109 } else if (a->rn == a->rm) {
1110 if (a->pg == a->rn) {
1111 return do_mov_p(s, a->rd, a->rn);
1112 } else {
1113 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1114 }
1115 } else if (a->pg == a->rn || a->pg == a->rm) {
1116 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1117 } else {
1118 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1119 }
1120 }
1121
1122 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1123 {
1124 tcg_gen_andc_i64(pd, pn, pm);
1125 tcg_gen_and_i64(pd, pd, pg);
1126 }
1127
1128 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1129 TCGv_vec pm, TCGv_vec pg)
1130 {
1131 tcg_gen_andc_vec(vece, pd, pn, pm);
1132 tcg_gen_and_vec(vece, pd, pd, pg);
1133 }
1134
1135 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1136 {
1137 static const GVecGen4 op = {
1138 .fni8 = gen_bic_pg_i64,
1139 .fniv = gen_bic_pg_vec,
1140 .fno = gen_helper_sve_bic_pppp,
1141 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1142 };
1143 if (a->s) {
1144 return do_pppp_flags(s, a, &op);
1145 } else if (a->pg == a->rn) {
1146 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1147 } else {
1148 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1149 }
1150 }
1151
1152 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1153 {
1154 tcg_gen_xor_i64(pd, pn, pm);
1155 tcg_gen_and_i64(pd, pd, pg);
1156 }
1157
1158 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1159 TCGv_vec pm, TCGv_vec pg)
1160 {
1161 tcg_gen_xor_vec(vece, pd, pn, pm);
1162 tcg_gen_and_vec(vece, pd, pd, pg);
1163 }
1164
1165 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1166 {
1167 static const GVecGen4 op = {
1168 .fni8 = gen_eor_pg_i64,
1169 .fniv = gen_eor_pg_vec,
1170 .fno = gen_helper_sve_eor_pppp,
1171 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1172 };
1173 if (a->s) {
1174 return do_pppp_flags(s, a, &op);
1175 } else {
1176 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1177 }
1178 }
1179
1180 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1181 {
1182 tcg_gen_and_i64(pn, pn, pg);
1183 tcg_gen_andc_i64(pm, pm, pg);
1184 tcg_gen_or_i64(pd, pn, pm);
1185 }
1186
1187 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1188 TCGv_vec pm, TCGv_vec pg)
1189 {
1190 tcg_gen_and_vec(vece, pn, pn, pg);
1191 tcg_gen_andc_vec(vece, pm, pm, pg);
1192 tcg_gen_or_vec(vece, pd, pn, pm);
1193 }
1194
1195 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1196 {
1197 static const GVecGen4 op = {
1198 .fni8 = gen_sel_pg_i64,
1199 .fniv = gen_sel_pg_vec,
1200 .fno = gen_helper_sve_sel_pppp,
1201 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1202 };
1203 if (a->s) {
1204 return false;
1205 } else {
1206 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1207 }
1208 }
1209
1210 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1211 {
1212 tcg_gen_or_i64(pd, pn, pm);
1213 tcg_gen_and_i64(pd, pd, pg);
1214 }
1215
1216 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1217 TCGv_vec pm, TCGv_vec pg)
1218 {
1219 tcg_gen_or_vec(vece, pd, pn, pm);
1220 tcg_gen_and_vec(vece, pd, pd, pg);
1221 }
1222
1223 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1224 {
1225 static const GVecGen4 op = {
1226 .fni8 = gen_orr_pg_i64,
1227 .fniv = gen_orr_pg_vec,
1228 .fno = gen_helper_sve_orr_pppp,
1229 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1230 };
1231 if (a->s) {
1232 return do_pppp_flags(s, a, &op);
1233 } else if (a->pg == a->rn && a->rn == a->rm) {
1234 return do_mov_p(s, a->rd, a->rn);
1235 } else {
1236 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1237 }
1238 }
1239
1240 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1241 {
1242 tcg_gen_orc_i64(pd, pn, pm);
1243 tcg_gen_and_i64(pd, pd, pg);
1244 }
1245
1246 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1247 TCGv_vec pm, TCGv_vec pg)
1248 {
1249 tcg_gen_orc_vec(vece, pd, pn, pm);
1250 tcg_gen_and_vec(vece, pd, pd, pg);
1251 }
1252
1253 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1254 {
1255 static const GVecGen4 op = {
1256 .fni8 = gen_orn_pg_i64,
1257 .fniv = gen_orn_pg_vec,
1258 .fno = gen_helper_sve_orn_pppp,
1259 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1260 };
1261 if (a->s) {
1262 return do_pppp_flags(s, a, &op);
1263 } else {
1264 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1265 }
1266 }
1267
1268 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1269 {
1270 tcg_gen_or_i64(pd, pn, pm);
1271 tcg_gen_andc_i64(pd, pg, pd);
1272 }
1273
1274 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1275 TCGv_vec pm, TCGv_vec pg)
1276 {
1277 tcg_gen_or_vec(vece, pd, pn, pm);
1278 tcg_gen_andc_vec(vece, pd, pg, pd);
1279 }
1280
1281 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1282 {
1283 static const GVecGen4 op = {
1284 .fni8 = gen_nor_pg_i64,
1285 .fniv = gen_nor_pg_vec,
1286 .fno = gen_helper_sve_nor_pppp,
1287 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1288 };
1289 if (a->s) {
1290 return do_pppp_flags(s, a, &op);
1291 } else {
1292 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1293 }
1294 }
1295
1296 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1297 {
1298 tcg_gen_and_i64(pd, pn, pm);
1299 tcg_gen_andc_i64(pd, pg, pd);
1300 }
1301
1302 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1303 TCGv_vec pm, TCGv_vec pg)
1304 {
1305 tcg_gen_and_vec(vece, pd, pn, pm);
1306 tcg_gen_andc_vec(vece, pd, pg, pd);
1307 }
1308
1309 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1310 {
1311 static const GVecGen4 op = {
1312 .fni8 = gen_nand_pg_i64,
1313 .fniv = gen_nand_pg_vec,
1314 .fno = gen_helper_sve_nand_pppp,
1315 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1316 };
1317 if (a->s) {
1318 return do_pppp_flags(s, a, &op);
1319 } else {
1320 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1321 }
1322 }
1323
1324 /*
1325 *** SVE Predicate Misc Group
1326 */
1327
1328 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1329 {
1330 if (sve_access_check(s)) {
1331 int nofs = pred_full_reg_offset(s, a->rn);
1332 int gofs = pred_full_reg_offset(s, a->pg);
1333 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1334
1335 if (words == 1) {
1336 TCGv_i64 pn = tcg_temp_new_i64();
1337 TCGv_i64 pg = tcg_temp_new_i64();
1338
1339 tcg_gen_ld_i64(pn, cpu_env, nofs);
1340 tcg_gen_ld_i64(pg, cpu_env, gofs);
1341 do_predtest1(pn, pg);
1342
1343 tcg_temp_free_i64(pn);
1344 tcg_temp_free_i64(pg);
1345 } else {
1346 do_predtest(s, nofs, gofs, words);
1347 }
1348 }
1349 return true;
1350 }
1351
1352 /* See the ARM pseudocode DecodePredCount. */
1353 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1354 {
1355 unsigned elements = fullsz >> esz;
1356 unsigned bound;
1357
1358 switch (pattern) {
1359 case 0x0: /* POW2 */
1360 return pow2floor(elements);
1361 case 0x1: /* VL1 */
1362 case 0x2: /* VL2 */
1363 case 0x3: /* VL3 */
1364 case 0x4: /* VL4 */
1365 case 0x5: /* VL5 */
1366 case 0x6: /* VL6 */
1367 case 0x7: /* VL7 */
1368 case 0x8: /* VL8 */
1369 bound = pattern;
1370 break;
1371 case 0x9: /* VL16 */
1372 case 0xa: /* VL32 */
1373 case 0xb: /* VL64 */
1374 case 0xc: /* VL128 */
1375 case 0xd: /* VL256 */
1376 bound = 16 << (pattern - 9);
1377 break;
1378 case 0x1d: /* MUL4 */
1379 return elements - elements % 4;
1380 case 0x1e: /* MUL3 */
1381 return elements - elements % 3;
1382 case 0x1f: /* ALL */
1383 return elements;
1384 default: /* #uimm5 */
1385 return 0;
1386 }
1387 return elements >= bound ? bound : 0;
1388 }
1389
1390 /* This handles all of the predicate initialization instructions,
1391 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1392 * so that decode_pred_count returns 0. For SETFFR, we will have
1393 * set RD == 16 == FFR.
1394 */
1395 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1396 {
1397 if (!sve_access_check(s)) {
1398 return true;
1399 }
1400
1401 unsigned fullsz = vec_full_reg_size(s);
1402 unsigned ofs = pred_full_reg_offset(s, rd);
1403 unsigned numelem, setsz, i;
1404 uint64_t word, lastword;
1405 TCGv_i64 t;
1406
1407 numelem = decode_pred_count(fullsz, pat, esz);
1408
1409 /* Determine what we must store into each bit, and how many. */
1410 if (numelem == 0) {
1411 lastword = word = 0;
1412 setsz = fullsz;
1413 } else {
1414 setsz = numelem << esz;
1415 lastword = word = pred_esz_masks[esz];
1416 if (setsz % 64) {
1417 lastword &= ~(-1ull << (setsz % 64));
1418 }
1419 }
1420
1421 t = tcg_temp_new_i64();
1422 if (fullsz <= 64) {
1423 tcg_gen_movi_i64(t, lastword);
1424 tcg_gen_st_i64(t, cpu_env, ofs);
1425 goto done;
1426 }
1427
1428 if (word == lastword) {
1429 unsigned maxsz = size_for_gvec(fullsz / 8);
1430 unsigned oprsz = size_for_gvec(setsz / 8);
1431
1432 if (oprsz * 8 == setsz) {
1433 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1434 goto done;
1435 }
1436 if (oprsz * 8 == setsz + 8) {
1437 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1438 tcg_gen_movi_i64(t, 0);
1439 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1440 goto done;
1441 }
1442 }
1443
1444 setsz /= 8;
1445 fullsz /= 8;
1446
1447 tcg_gen_movi_i64(t, word);
1448 for (i = 0; i < setsz; i += 8) {
1449 tcg_gen_st_i64(t, cpu_env, ofs + i);
1450 }
1451 if (lastword != word) {
1452 tcg_gen_movi_i64(t, lastword);
1453 tcg_gen_st_i64(t, cpu_env, ofs + i);
1454 i += 8;
1455 }
1456 if (i < fullsz) {
1457 tcg_gen_movi_i64(t, 0);
1458 for (; i < fullsz; i += 8) {
1459 tcg_gen_st_i64(t, cpu_env, ofs + i);
1460 }
1461 }
1462
1463 done:
1464 tcg_temp_free_i64(t);
1465
1466 /* PTRUES */
1467 if (setflag) {
1468 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1469 tcg_gen_movi_i32(cpu_CF, word == 0);
1470 tcg_gen_movi_i32(cpu_VF, 0);
1471 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1472 }
1473 return true;
1474 }
1475
1476 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1477 {
1478 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1479 }
1480
1481 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1482 {
1483 /* Note pat == 31 is #all, to set all elements. */
1484 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1485 }
1486
1487 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1488 {
1489 /* Note pat == 32 is #unimp, to set no elements. */
1490 return do_predset(s, 0, a->rd, 32, false);
1491 }
1492
1493 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1494 {
1495 /* The path through do_pppp_flags is complicated enough to want to avoid
1496 * duplication. Frob the arguments into the form of a predicated AND.
1497 */
1498 arg_rprr_s alt_a = {
1499 .rd = a->rd, .pg = a->pg, .s = a->s,
1500 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1501 };
1502 return trans_AND_pppp(s, &alt_a, insn);
1503 }
1504
1505 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1506 {
1507 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1508 }
1509
1510 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1511 {
1512 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1513 }
1514
1515 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1516 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1517 TCGv_ptr, TCGv_i32))
1518 {
1519 if (!sve_access_check(s)) {
1520 return true;
1521 }
1522
1523 TCGv_ptr t_pd = tcg_temp_new_ptr();
1524 TCGv_ptr t_pg = tcg_temp_new_ptr();
1525 TCGv_i32 t;
1526 unsigned desc;
1527
1528 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1529 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1530
1531 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1532 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1533 t = tcg_const_i32(desc);
1534
1535 gen_fn(t, t_pd, t_pg, t);
1536 tcg_temp_free_ptr(t_pd);
1537 tcg_temp_free_ptr(t_pg);
1538
1539 do_pred_flags(t);
1540 tcg_temp_free_i32(t);
1541 return true;
1542 }
1543
1544 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1545 {
1546 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1547 }
1548
1549 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1550 {
1551 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1552 }
1553
1554 /*
1555 *** SVE Element Count Group
1556 */
1557
1558 /* Perform an inline saturating addition of a 32-bit value within
1559 * a 64-bit register. The second operand is known to be positive,
1560 * which halves the comparisions we must perform to bound the result.
1561 */
1562 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1563 {
1564 int64_t ibound;
1565 TCGv_i64 bound;
1566 TCGCond cond;
1567
1568 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1569 if (u) {
1570 tcg_gen_ext32u_i64(reg, reg);
1571 } else {
1572 tcg_gen_ext32s_i64(reg, reg);
1573 }
1574 if (d) {
1575 tcg_gen_sub_i64(reg, reg, val);
1576 ibound = (u ? 0 : INT32_MIN);
1577 cond = TCG_COND_LT;
1578 } else {
1579 tcg_gen_add_i64(reg, reg, val);
1580 ibound = (u ? UINT32_MAX : INT32_MAX);
1581 cond = TCG_COND_GT;
1582 }
1583 bound = tcg_const_i64(ibound);
1584 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1585 tcg_temp_free_i64(bound);
1586 }
1587
1588 /* Similarly with 64-bit values. */
1589 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1590 {
1591 TCGv_i64 t0 = tcg_temp_new_i64();
1592 TCGv_i64 t1 = tcg_temp_new_i64();
1593 TCGv_i64 t2;
1594
1595 if (u) {
1596 if (d) {
1597 tcg_gen_sub_i64(t0, reg, val);
1598 tcg_gen_movi_i64(t1, 0);
1599 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1600 } else {
1601 tcg_gen_add_i64(t0, reg, val);
1602 tcg_gen_movi_i64(t1, -1);
1603 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1604 }
1605 } else {
1606 if (d) {
1607 /* Detect signed overflow for subtraction. */
1608 tcg_gen_xor_i64(t0, reg, val);
1609 tcg_gen_sub_i64(t1, reg, val);
1610 tcg_gen_xor_i64(reg, reg, t0);
1611 tcg_gen_and_i64(t0, t0, reg);
1612
1613 /* Bound the result. */
1614 tcg_gen_movi_i64(reg, INT64_MIN);
1615 t2 = tcg_const_i64(0);
1616 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1617 } else {
1618 /* Detect signed overflow for addition. */
1619 tcg_gen_xor_i64(t0, reg, val);
1620 tcg_gen_add_i64(reg, reg, val);
1621 tcg_gen_xor_i64(t1, reg, val);
1622 tcg_gen_andc_i64(t0, t1, t0);
1623
1624 /* Bound the result. */
1625 tcg_gen_movi_i64(t1, INT64_MAX);
1626 t2 = tcg_const_i64(0);
1627 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1628 }
1629 tcg_temp_free_i64(t2);
1630 }
1631 tcg_temp_free_i64(t0);
1632 tcg_temp_free_i64(t1);
1633 }
1634
1635 /* Similarly with a vector and a scalar operand. */
1636 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1637 TCGv_i64 val, bool u, bool d)
1638 {
1639 unsigned vsz = vec_full_reg_size(s);
1640 TCGv_ptr dptr, nptr;
1641 TCGv_i32 t32, desc;
1642 TCGv_i64 t64;
1643
1644 dptr = tcg_temp_new_ptr();
1645 nptr = tcg_temp_new_ptr();
1646 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1647 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1648 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1649
1650 switch (esz) {
1651 case MO_8:
1652 t32 = tcg_temp_new_i32();
1653 tcg_gen_extrl_i64_i32(t32, val);
1654 if (d) {
1655 tcg_gen_neg_i32(t32, t32);
1656 }
1657 if (u) {
1658 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1659 } else {
1660 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1661 }
1662 tcg_temp_free_i32(t32);
1663 break;
1664
1665 case MO_16:
1666 t32 = tcg_temp_new_i32();
1667 tcg_gen_extrl_i64_i32(t32, val);
1668 if (d) {
1669 tcg_gen_neg_i32(t32, t32);
1670 }
1671 if (u) {
1672 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1673 } else {
1674 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1675 }
1676 tcg_temp_free_i32(t32);
1677 break;
1678
1679 case MO_32:
1680 t64 = tcg_temp_new_i64();
1681 if (d) {
1682 tcg_gen_neg_i64(t64, val);
1683 } else {
1684 tcg_gen_mov_i64(t64, val);
1685 }
1686 if (u) {
1687 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1688 } else {
1689 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1690 }
1691 tcg_temp_free_i64(t64);
1692 break;
1693
1694 case MO_64:
1695 if (u) {
1696 if (d) {
1697 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1698 } else {
1699 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1700 }
1701 } else if (d) {
1702 t64 = tcg_temp_new_i64();
1703 tcg_gen_neg_i64(t64, val);
1704 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1705 tcg_temp_free_i64(t64);
1706 } else {
1707 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1708 }
1709 break;
1710
1711 default:
1712 g_assert_not_reached();
1713 }
1714
1715 tcg_temp_free_ptr(dptr);
1716 tcg_temp_free_ptr(nptr);
1717 tcg_temp_free_i32(desc);
1718 }
1719
1720 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1721 {
1722 if (sve_access_check(s)) {
1723 unsigned fullsz = vec_full_reg_size(s);
1724 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1725 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1726 }
1727 return true;
1728 }
1729
1730 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1731 {
1732 if (sve_access_check(s)) {
1733 unsigned fullsz = vec_full_reg_size(s);
1734 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1735 int inc = numelem * a->imm * (a->d ? -1 : 1);
1736 TCGv_i64 reg = cpu_reg(s, a->rd);
1737
1738 tcg_gen_addi_i64(reg, reg, inc);
1739 }
1740 return true;
1741 }
1742
1743 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1744 uint32_t insn)
1745 {
1746 if (!sve_access_check(s)) {
1747 return true;
1748 }
1749
1750 unsigned fullsz = vec_full_reg_size(s);
1751 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752 int inc = numelem * a->imm;
1753 TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1756 if (inc == 0) {
1757 if (a->u) {
1758 tcg_gen_ext32u_i64(reg, reg);
1759 } else {
1760 tcg_gen_ext32s_i64(reg, reg);
1761 }
1762 } else {
1763 TCGv_i64 t = tcg_const_i64(inc);
1764 do_sat_addsub_32(reg, t, a->u, a->d);
1765 tcg_temp_free_i64(t);
1766 }
1767 return true;
1768 }
1769
1770 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1771 uint32_t insn)
1772 {
1773 if (!sve_access_check(s)) {
1774 return true;
1775 }
1776
1777 unsigned fullsz = vec_full_reg_size(s);
1778 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1779 int inc = numelem * a->imm;
1780 TCGv_i64 reg = cpu_reg(s, a->rd);
1781
1782 if (inc != 0) {
1783 TCGv_i64 t = tcg_const_i64(inc);
1784 do_sat_addsub_64(reg, t, a->u, a->d);
1785 tcg_temp_free_i64(t);
1786 }
1787 return true;
1788 }
1789
1790 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1791 {
1792 if (a->esz == 0) {
1793 return false;
1794 }
1795
1796 unsigned fullsz = vec_full_reg_size(s);
1797 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1798 int inc = numelem * a->imm;
1799
1800 if (inc != 0) {
1801 if (sve_access_check(s)) {
1802 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1803 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1804 vec_full_reg_offset(s, a->rn),
1805 t, fullsz, fullsz);
1806 tcg_temp_free_i64(t);
1807 }
1808 } else {
1809 do_mov_z(s, a->rd, a->rn);
1810 }
1811 return true;
1812 }
1813
1814 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1815 uint32_t insn)
1816 {
1817 if (a->esz == 0) {
1818 return false;
1819 }
1820
1821 unsigned fullsz = vec_full_reg_size(s);
1822 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1823 int inc = numelem * a->imm;
1824
1825 if (inc != 0) {
1826 if (sve_access_check(s)) {
1827 TCGv_i64 t = tcg_const_i64(inc);
1828 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1829 tcg_temp_free_i64(t);
1830 }
1831 } else {
1832 do_mov_z(s, a->rd, a->rn);
1833 }
1834 return true;
1835 }
1836
1837 /*
1838 *** SVE Bitwise Immediate Group
1839 */
1840
1841 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1842 {
1843 uint64_t imm;
1844 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1845 extract32(a->dbm, 0, 6),
1846 extract32(a->dbm, 6, 6))) {
1847 return false;
1848 }
1849 if (sve_access_check(s)) {
1850 unsigned vsz = vec_full_reg_size(s);
1851 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1852 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1853 }
1854 return true;
1855 }
1856
1857 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1858 {
1859 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1860 }
1861
1862 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1863 {
1864 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1865 }
1866
1867 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1868 {
1869 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1870 }
1871
1872 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1873 {
1874 uint64_t imm;
1875 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1876 extract32(a->dbm, 0, 6),
1877 extract32(a->dbm, 6, 6))) {
1878 return false;
1879 }
1880 if (sve_access_check(s)) {
1881 do_dupi_z(s, a->rd, imm);
1882 }
1883 return true;
1884 }
1885
1886 /*
1887 *** SVE Integer Wide Immediate - Predicated Group
1888 */
1889
1890 /* Implement all merging copies. This is used for CPY (immediate),
1891 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1892 */
1893 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1894 TCGv_i64 val)
1895 {
1896 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1897 static gen_cpy * const fns[4] = {
1898 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1899 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1900 };
1901 unsigned vsz = vec_full_reg_size(s);
1902 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1903 TCGv_ptr t_zd = tcg_temp_new_ptr();
1904 TCGv_ptr t_zn = tcg_temp_new_ptr();
1905 TCGv_ptr t_pg = tcg_temp_new_ptr();
1906
1907 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1908 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1909 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1910
1911 fns[esz](t_zd, t_zn, t_pg, val, desc);
1912
1913 tcg_temp_free_ptr(t_zd);
1914 tcg_temp_free_ptr(t_zn);
1915 tcg_temp_free_ptr(t_pg);
1916 tcg_temp_free_i32(desc);
1917 }
1918
1919 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1920 {
1921 if (a->esz == 0) {
1922 return false;
1923 }
1924 if (sve_access_check(s)) {
1925 /* Decode the VFP immediate. */
1926 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1927 TCGv_i64 t_imm = tcg_const_i64(imm);
1928 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1929 tcg_temp_free_i64(t_imm);
1930 }
1931 return true;
1932 }
1933
1934 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1935 {
1936 if (a->esz == 0 && extract32(insn, 13, 1)) {
1937 return false;
1938 }
1939 if (sve_access_check(s)) {
1940 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1941 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1942 tcg_temp_free_i64(t_imm);
1943 }
1944 return true;
1945 }
1946
1947 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1948 {
1949 static gen_helper_gvec_2i * const fns[4] = {
1950 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1951 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1952 };
1953
1954 if (a->esz == 0 && extract32(insn, 13, 1)) {
1955 return false;
1956 }
1957 if (sve_access_check(s)) {
1958 unsigned vsz = vec_full_reg_size(s);
1959 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1960 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1961 pred_full_reg_offset(s, a->pg),
1962 t_imm, vsz, vsz, 0, fns[a->esz]);
1963 tcg_temp_free_i64(t_imm);
1964 }
1965 return true;
1966 }
1967
1968 /*
1969 *** SVE Permute Extract Group
1970 */
1971
1972 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1973 {
1974 if (!sve_access_check(s)) {
1975 return true;
1976 }
1977
1978 unsigned vsz = vec_full_reg_size(s);
1979 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1980 unsigned n_siz = vsz - n_ofs;
1981 unsigned d = vec_full_reg_offset(s, a->rd);
1982 unsigned n = vec_full_reg_offset(s, a->rn);
1983 unsigned m = vec_full_reg_offset(s, a->rm);
1984
1985 /* Use host vector move insns if we have appropriate sizes
1986 * and no unfortunate overlap.
1987 */
1988 if (m != d
1989 && n_ofs == size_for_gvec(n_ofs)
1990 && n_siz == size_for_gvec(n_siz)
1991 && (d != n || n_siz <= n_ofs)) {
1992 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1993 if (n_ofs != 0) {
1994 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1995 }
1996 } else {
1997 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1998 }
1999 return true;
2000 }
2001
2002 /*
2003 *** SVE Permute - Unpredicated Group
2004 */
2005
2006 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2007 {
2008 if (sve_access_check(s)) {
2009 unsigned vsz = vec_full_reg_size(s);
2010 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2011 vsz, vsz, cpu_reg_sp(s, a->rn));
2012 }
2013 return true;
2014 }
2015
2016 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2017 {
2018 if ((a->imm & 0x1f) == 0) {
2019 return false;
2020 }
2021 if (sve_access_check(s)) {
2022 unsigned vsz = vec_full_reg_size(s);
2023 unsigned dofs = vec_full_reg_offset(s, a->rd);
2024 unsigned esz, index;
2025
2026 esz = ctz32(a->imm);
2027 index = a->imm >> (esz + 1);
2028
2029 if ((index << esz) < vsz) {
2030 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2031 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2032 } else {
2033 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2034 }
2035 }
2036 return true;
2037 }
2038
2039 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2040 {
2041 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2042 static gen_insr * const fns[4] = {
2043 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2044 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2045 };
2046 unsigned vsz = vec_full_reg_size(s);
2047 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2048 TCGv_ptr t_zd = tcg_temp_new_ptr();
2049 TCGv_ptr t_zn = tcg_temp_new_ptr();
2050
2051 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2052 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2053
2054 fns[a->esz](t_zd, t_zn, val, desc);
2055
2056 tcg_temp_free_ptr(t_zd);
2057 tcg_temp_free_ptr(t_zn);
2058 tcg_temp_free_i32(desc);
2059 }
2060
2061 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2062 {
2063 if (sve_access_check(s)) {
2064 TCGv_i64 t = tcg_temp_new_i64();
2065 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2066 do_insr_i64(s, a, t);
2067 tcg_temp_free_i64(t);
2068 }
2069 return true;
2070 }
2071
2072 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2073 {
2074 if (sve_access_check(s)) {
2075 do_insr_i64(s, a, cpu_reg(s, a->rm));
2076 }
2077 return true;
2078 }
2079
2080 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2081 {
2082 static gen_helper_gvec_2 * const fns[4] = {
2083 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2084 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2085 };
2086
2087 if (sve_access_check(s)) {
2088 unsigned vsz = vec_full_reg_size(s);
2089 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2090 vec_full_reg_offset(s, a->rn),
2091 vsz, vsz, 0, fns[a->esz]);
2092 }
2093 return true;
2094 }
2095
2096 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2097 {
2098 static gen_helper_gvec_3 * const fns[4] = {
2099 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2100 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2101 };
2102
2103 if (sve_access_check(s)) {
2104 unsigned vsz = vec_full_reg_size(s);
2105 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2106 vec_full_reg_offset(s, a->rn),
2107 vec_full_reg_offset(s, a->rm),
2108 vsz, vsz, 0, fns[a->esz]);
2109 }
2110 return true;
2111 }
2112
2113 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2114 {
2115 static gen_helper_gvec_2 * const fns[4][2] = {
2116 { NULL, NULL },
2117 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2118 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2119 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2120 };
2121
2122 if (a->esz == 0) {
2123 return false;
2124 }
2125 if (sve_access_check(s)) {
2126 unsigned vsz = vec_full_reg_size(s);
2127 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2128 vec_full_reg_offset(s, a->rn)
2129 + (a->h ? vsz / 2 : 0),
2130 vsz, vsz, 0, fns[a->esz][a->u]);
2131 }
2132 return true;
2133 }
2134
2135 /*
2136 *** SVE Permute - Predicates Group
2137 */
2138
2139 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2140 gen_helper_gvec_3 *fn)
2141 {
2142 if (!sve_access_check(s)) {
2143 return true;
2144 }
2145
2146 unsigned vsz = pred_full_reg_size(s);
2147
2148 /* Predicate sizes may be smaller and cannot use simd_desc.
2149 We cannot round up, as we do elsewhere, because we need
2150 the exact size for ZIP2 and REV. We retain the style for
2151 the other helpers for consistency. */
2152 TCGv_ptr t_d = tcg_temp_new_ptr();
2153 TCGv_ptr t_n = tcg_temp_new_ptr();
2154 TCGv_ptr t_m = tcg_temp_new_ptr();
2155 TCGv_i32 t_desc;
2156 int desc;
2157
2158 desc = vsz - 2;
2159 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2160 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2161
2162 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2163 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2164 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2165 t_desc = tcg_const_i32(desc);
2166
2167 fn(t_d, t_n, t_m, t_desc);
2168
2169 tcg_temp_free_ptr(t_d);
2170 tcg_temp_free_ptr(t_n);
2171 tcg_temp_free_ptr(t_m);
2172 tcg_temp_free_i32(t_desc);
2173 return true;
2174 }
2175
2176 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2177 gen_helper_gvec_2 *fn)
2178 {
2179 if (!sve_access_check(s)) {
2180 return true;
2181 }
2182
2183 unsigned vsz = pred_full_reg_size(s);
2184 TCGv_ptr t_d = tcg_temp_new_ptr();
2185 TCGv_ptr t_n = tcg_temp_new_ptr();
2186 TCGv_i32 t_desc;
2187 int desc;
2188
2189 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2190 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2191
2192 /* Predicate sizes may be smaller and cannot use simd_desc.
2193 We cannot round up, as we do elsewhere, because we need
2194 the exact size for ZIP2 and REV. We retain the style for
2195 the other helpers for consistency. */
2196
2197 desc = vsz - 2;
2198 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2199 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2200 t_desc = tcg_const_i32(desc);
2201
2202 fn(t_d, t_n, t_desc);
2203
2204 tcg_temp_free_i32(t_desc);
2205 tcg_temp_free_ptr(t_d);
2206 tcg_temp_free_ptr(t_n);
2207 return true;
2208 }
2209
2210 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2211 {
2212 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2213 }
2214
2215 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2216 {
2217 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2218 }
2219
2220 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2221 {
2222 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2223 }
2224
2225 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2226 {
2227 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2228 }
2229
2230 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2231 {
2232 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2233 }
2234
2235 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2236 {
2237 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2238 }
2239
2240 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2241 {
2242 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2243 }
2244
2245 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2246 {
2247 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2248 }
2249
2250 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2251 {
2252 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2253 }
2254
2255 /*
2256 *** SVE Permute - Interleaving Group
2257 */
2258
2259 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2260 {
2261 static gen_helper_gvec_3 * const fns[4] = {
2262 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2263 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2264 };
2265
2266 if (sve_access_check(s)) {
2267 unsigned vsz = vec_full_reg_size(s);
2268 unsigned high_ofs = high ? vsz / 2 : 0;
2269 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2270 vec_full_reg_offset(s, a->rn) + high_ofs,
2271 vec_full_reg_offset(s, a->rm) + high_ofs,
2272 vsz, vsz, 0, fns[a->esz]);
2273 }
2274 return true;
2275 }
2276
2277 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2278 gen_helper_gvec_3 *fn)
2279 {
2280 if (sve_access_check(s)) {
2281 unsigned vsz = vec_full_reg_size(s);
2282 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2283 vec_full_reg_offset(s, a->rn),
2284 vec_full_reg_offset(s, a->rm),
2285 vsz, vsz, data, fn);
2286 }
2287 return true;
2288 }
2289
2290 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2291 {
2292 return do_zip(s, a, false);
2293 }
2294
2295 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2296 {
2297 return do_zip(s, a, true);
2298 }
2299
2300 static gen_helper_gvec_3 * const uzp_fns[4] = {
2301 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2302 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2303 };
2304
2305 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2306 {
2307 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2308 }
2309
2310 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2311 {
2312 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2313 }
2314
2315 static gen_helper_gvec_3 * const trn_fns[4] = {
2316 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2317 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2318 };
2319
2320 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2321 {
2322 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2323 }
2324
2325 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2326 {
2327 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2328 }
2329
2330 /*
2331 *** SVE Permute Vector - Predicated Group
2332 */
2333
2334 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2335 {
2336 static gen_helper_gvec_3 * const fns[4] = {
2337 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2338 };
2339 return do_zpz_ool(s, a, fns[a->esz]);
2340 }
2341
2342 /* Call the helper that computes the ARM LastActiveElement pseudocode
2343 * function, scaled by the element size. This includes the not found
2344 * indication; e.g. not found for esz=3 is -8.
2345 */
2346 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2347 {
2348 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2349 * round up, as we do elsewhere, because we need the exact size.
2350 */
2351 TCGv_ptr t_p = tcg_temp_new_ptr();
2352 TCGv_i32 t_desc;
2353 unsigned vsz = pred_full_reg_size(s);
2354 unsigned desc;
2355
2356 desc = vsz - 2;
2357 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2358
2359 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2360 t_desc = tcg_const_i32(desc);
2361
2362 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2363
2364 tcg_temp_free_i32(t_desc);
2365 tcg_temp_free_ptr(t_p);
2366 }
2367
2368 /* Increment LAST to the offset of the next element in the vector,
2369 * wrapping around to 0.
2370 */
2371 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2372 {
2373 unsigned vsz = vec_full_reg_size(s);
2374
2375 tcg_gen_addi_i32(last, last, 1 << esz);
2376 if (is_power_of_2(vsz)) {
2377 tcg_gen_andi_i32(last, last, vsz - 1);
2378 } else {
2379 TCGv_i32 max = tcg_const_i32(vsz);
2380 TCGv_i32 zero = tcg_const_i32(0);
2381 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2382 tcg_temp_free_i32(max);
2383 tcg_temp_free_i32(zero);
2384 }
2385 }
2386
2387 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2388 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2389 {
2390 unsigned vsz = vec_full_reg_size(s);
2391
2392 if (is_power_of_2(vsz)) {
2393 tcg_gen_andi_i32(last, last, vsz - 1);
2394 } else {
2395 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2396 TCGv_i32 zero = tcg_const_i32(0);
2397 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2398 tcg_temp_free_i32(max);
2399 tcg_temp_free_i32(zero);
2400 }
2401 }
2402
2403 /* Load an unsigned element of ESZ from BASE+OFS. */
2404 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2405 {
2406 TCGv_i64 r = tcg_temp_new_i64();
2407
2408 switch (esz) {
2409 case 0:
2410 tcg_gen_ld8u_i64(r, base, ofs);
2411 break;
2412 case 1:
2413 tcg_gen_ld16u_i64(r, base, ofs);
2414 break;
2415 case 2:
2416 tcg_gen_ld32u_i64(r, base, ofs);
2417 break;
2418 case 3:
2419 tcg_gen_ld_i64(r, base, ofs);
2420 break;
2421 default:
2422 g_assert_not_reached();
2423 }
2424 return r;
2425 }
2426
2427 /* Load an unsigned element of ESZ from RM[LAST]. */
2428 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2429 int rm, int esz)
2430 {
2431 TCGv_ptr p = tcg_temp_new_ptr();
2432 TCGv_i64 r;
2433
2434 /* Convert offset into vector into offset into ENV.
2435 * The final adjustment for the vector register base
2436 * is added via constant offset to the load.
2437 */
2438 #ifdef HOST_WORDS_BIGENDIAN
2439 /* Adjust for element ordering. See vec_reg_offset. */
2440 if (esz < 3) {
2441 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2442 }
2443 #endif
2444 tcg_gen_ext_i32_ptr(p, last);
2445 tcg_gen_add_ptr(p, p, cpu_env);
2446
2447 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2448 tcg_temp_free_ptr(p);
2449
2450 return r;
2451 }
2452
2453 /* Compute CLAST for a Zreg. */
2454 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2455 {
2456 TCGv_i32 last;
2457 TCGLabel *over;
2458 TCGv_i64 ele;
2459 unsigned vsz, esz = a->esz;
2460
2461 if (!sve_access_check(s)) {
2462 return true;
2463 }
2464
2465 last = tcg_temp_local_new_i32();
2466 over = gen_new_label();
2467
2468 find_last_active(s, last, esz, a->pg);
2469
2470 /* There is of course no movcond for a 2048-bit vector,
2471 * so we must branch over the actual store.
2472 */
2473 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2474
2475 if (!before) {
2476 incr_last_active(s, last, esz);
2477 }
2478
2479 ele = load_last_active(s, last, a->rm, esz);
2480 tcg_temp_free_i32(last);
2481
2482 vsz = vec_full_reg_size(s);
2483 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2484 tcg_temp_free_i64(ele);
2485
2486 /* If this insn used MOVPRFX, we may need a second move. */
2487 if (a->rd != a->rn) {
2488 TCGLabel *done = gen_new_label();
2489 tcg_gen_br(done);
2490
2491 gen_set_label(over);
2492 do_mov_z(s, a->rd, a->rn);
2493
2494 gen_set_label(done);
2495 } else {
2496 gen_set_label(over);
2497 }
2498 return true;
2499 }
2500
2501 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2502 {
2503 return do_clast_vector(s, a, false);
2504 }
2505
2506 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2507 {
2508 return do_clast_vector(s, a, true);
2509 }
2510
2511 /* Compute CLAST for a scalar. */
2512 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2513 bool before, TCGv_i64 reg_val)
2514 {
2515 TCGv_i32 last = tcg_temp_new_i32();
2516 TCGv_i64 ele, cmp, zero;
2517
2518 find_last_active(s, last, esz, pg);
2519
2520 /* Extend the original value of last prior to incrementing. */
2521 cmp = tcg_temp_new_i64();
2522 tcg_gen_ext_i32_i64(cmp, last);
2523
2524 if (!before) {
2525 incr_last_active(s, last, esz);
2526 }
2527
2528 /* The conceit here is that while last < 0 indicates not found, after
2529 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2530 * from which we can load garbage. We then discard the garbage with
2531 * a conditional move.
2532 */
2533 ele = load_last_active(s, last, rm, esz);
2534 tcg_temp_free_i32(last);
2535
2536 zero = tcg_const_i64(0);
2537 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2538
2539 tcg_temp_free_i64(zero);
2540 tcg_temp_free_i64(cmp);
2541 tcg_temp_free_i64(ele);
2542 }
2543
2544 /* Compute CLAST for a Vreg. */
2545 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2546 {
2547 if (sve_access_check(s)) {
2548 int esz = a->esz;
2549 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2550 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2551
2552 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2553 write_fp_dreg(s, a->rd, reg);
2554 tcg_temp_free_i64(reg);
2555 }
2556 return true;
2557 }
2558
2559 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2560 {
2561 return do_clast_fp(s, a, false);
2562 }
2563
2564 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2565 {
2566 return do_clast_fp(s, a, true);
2567 }
2568
2569 /* Compute CLAST for a Xreg. */
2570 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2571 {
2572 TCGv_i64 reg;
2573
2574 if (!sve_access_check(s)) {
2575 return true;
2576 }
2577
2578 reg = cpu_reg(s, a->rd);
2579 switch (a->esz) {
2580 case 0:
2581 tcg_gen_ext8u_i64(reg, reg);
2582 break;
2583 case 1:
2584 tcg_gen_ext16u_i64(reg, reg);
2585 break;
2586 case 2:
2587 tcg_gen_ext32u_i64(reg, reg);
2588 break;
2589 case 3:
2590 break;
2591 default:
2592 g_assert_not_reached();
2593 }
2594
2595 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2596 return true;
2597 }
2598
2599 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2600 {
2601 return do_clast_general(s, a, false);
2602 }
2603
2604 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2605 {
2606 return do_clast_general(s, a, true);
2607 }
2608
2609 /* Compute LAST for a scalar. */
2610 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2611 int pg, int rm, bool before)
2612 {
2613 TCGv_i32 last = tcg_temp_new_i32();
2614 TCGv_i64 ret;
2615
2616 find_last_active(s, last, esz, pg);
2617 if (before) {
2618 wrap_last_active(s, last, esz);
2619 } else {
2620 incr_last_active(s, last, esz);
2621 }
2622
2623 ret = load_last_active(s, last, rm, esz);
2624 tcg_temp_free_i32(last);
2625 return ret;
2626 }
2627
2628 /* Compute LAST for a Vreg. */
2629 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2630 {
2631 if (sve_access_check(s)) {
2632 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2633 write_fp_dreg(s, a->rd, val);
2634 tcg_temp_free_i64(val);
2635 }
2636 return true;
2637 }
2638
2639 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2640 {
2641 return do_last_fp(s, a, false);
2642 }
2643
2644 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2645 {
2646 return do_last_fp(s, a, true);
2647 }
2648
2649 /* Compute LAST for a Xreg. */
2650 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2651 {
2652 if (sve_access_check(s)) {
2653 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2654 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2655 tcg_temp_free_i64(val);
2656 }
2657 return true;
2658 }
2659
2660 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2661 {
2662 return do_last_general(s, a, false);
2663 }
2664
2665 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2666 {
2667 return do_last_general(s, a, true);
2668 }
2669
2670 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2671 {
2672 if (sve_access_check(s)) {
2673 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2674 }
2675 return true;
2676 }
2677
2678 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2679 {
2680 if (sve_access_check(s)) {
2681 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2682 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2683 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2684 tcg_temp_free_i64(t);
2685 }
2686 return true;
2687 }
2688
2689 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2690 {
2691 static gen_helper_gvec_3 * const fns[4] = {
2692 NULL,
2693 gen_helper_sve_revb_h,
2694 gen_helper_sve_revb_s,
2695 gen_helper_sve_revb_d,
2696 };
2697 return do_zpz_ool(s, a, fns[a->esz]);
2698 }
2699
2700 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2701 {
2702 static gen_helper_gvec_3 * const fns[4] = {
2703 NULL,
2704 NULL,
2705 gen_helper_sve_revh_s,
2706 gen_helper_sve_revh_d,
2707 };
2708 return do_zpz_ool(s, a, fns[a->esz]);
2709 }
2710
2711 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2712 {
2713 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2714 }
2715
2716 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2717 {
2718 static gen_helper_gvec_3 * const fns[4] = {
2719 gen_helper_sve_rbit_b,
2720 gen_helper_sve_rbit_h,
2721 gen_helper_sve_rbit_s,
2722 gen_helper_sve_rbit_d,
2723 };
2724 return do_zpz_ool(s, a, fns[a->esz]);
2725 }
2726
2727 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2728 {
2729 if (sve_access_check(s)) {
2730 unsigned vsz = vec_full_reg_size(s);
2731 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2732 vec_full_reg_offset(s, a->rn),
2733 vec_full_reg_offset(s, a->rm),
2734 pred_full_reg_offset(s, a->pg),
2735 vsz, vsz, a->esz, gen_helper_sve_splice);
2736 }
2737 return true;
2738 }
2739
2740 /*
2741 *** SVE Integer Compare - Vectors Group
2742 */
2743
2744 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2745 gen_helper_gvec_flags_4 *gen_fn)
2746 {
2747 TCGv_ptr pd, zn, zm, pg;
2748 unsigned vsz;
2749 TCGv_i32 t;
2750
2751 if (gen_fn == NULL) {
2752 return false;
2753 }
2754 if (!sve_access_check(s)) {
2755 return true;
2756 }
2757
2758 vsz = vec_full_reg_size(s);
2759 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2760 pd = tcg_temp_new_ptr();
2761 zn = tcg_temp_new_ptr();
2762 zm = tcg_temp_new_ptr();
2763 pg = tcg_temp_new_ptr();
2764
2765 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2766 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2767 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2768 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2769
2770 gen_fn(t, pd, zn, zm, pg, t);
2771
2772 tcg_temp_free_ptr(pd);
2773 tcg_temp_free_ptr(zn);
2774 tcg_temp_free_ptr(zm);
2775 tcg_temp_free_ptr(pg);
2776
2777 do_pred_flags(t);
2778
2779 tcg_temp_free_i32(t);
2780 return true;
2781 }
2782
2783 #define DO_PPZZ(NAME, name) \
2784 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2785 uint32_t insn) \
2786 { \
2787 static gen_helper_gvec_flags_4 * const fns[4] = { \
2788 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2789 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2790 }; \
2791 return do_ppzz_flags(s, a, fns[a->esz]); \
2792 }
2793
2794 DO_PPZZ(CMPEQ, cmpeq)
2795 DO_PPZZ(CMPNE, cmpne)
2796 DO_PPZZ(CMPGT, cmpgt)
2797 DO_PPZZ(CMPGE, cmpge)
2798 DO_PPZZ(CMPHI, cmphi)
2799 DO_PPZZ(CMPHS, cmphs)
2800
2801 #undef DO_PPZZ
2802
2803 #define DO_PPZW(NAME, name) \
2804 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2805 uint32_t insn) \
2806 { \
2807 static gen_helper_gvec_flags_4 * const fns[4] = { \
2808 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2809 gen_helper_sve_##name##_ppzw_s, NULL \
2810 }; \
2811 return do_ppzz_flags(s, a, fns[a->esz]); \
2812 }
2813
2814 DO_PPZW(CMPEQ, cmpeq)
2815 DO_PPZW(CMPNE, cmpne)
2816 DO_PPZW(CMPGT, cmpgt)
2817 DO_PPZW(CMPGE, cmpge)
2818 DO_PPZW(CMPHI, cmphi)
2819 DO_PPZW(CMPHS, cmphs)
2820 DO_PPZW(CMPLT, cmplt)
2821 DO_PPZW(CMPLE, cmple)
2822 DO_PPZW(CMPLO, cmplo)
2823 DO_PPZW(CMPLS, cmpls)
2824
2825 #undef DO_PPZW
2826
2827 /*
2828 *** SVE Integer Compare - Immediate Groups
2829 */
2830
2831 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2832 gen_helper_gvec_flags_3 *gen_fn)
2833 {
2834 TCGv_ptr pd, zn, pg;
2835 unsigned vsz;
2836 TCGv_i32 t;
2837
2838 if (gen_fn == NULL) {
2839 return false;
2840 }
2841 if (!sve_access_check(s)) {
2842 return true;
2843 }
2844
2845 vsz = vec_full_reg_size(s);
2846 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2847 pd = tcg_temp_new_ptr();
2848 zn = tcg_temp_new_ptr();
2849 pg = tcg_temp_new_ptr();
2850
2851 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2852 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2853 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2854
2855 gen_fn(t, pd, zn, pg, t);
2856
2857 tcg_temp_free_ptr(pd);
2858 tcg_temp_free_ptr(zn);
2859 tcg_temp_free_ptr(pg);
2860
2861 do_pred_flags(t);
2862
2863 tcg_temp_free_i32(t);
2864 return true;
2865 }
2866
2867 #define DO_PPZI(NAME, name) \
2868 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2869 uint32_t insn) \
2870 { \
2871 static gen_helper_gvec_flags_3 * const fns[4] = { \
2872 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2873 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2874 }; \
2875 return do_ppzi_flags(s, a, fns[a->esz]); \
2876 }
2877
2878 DO_PPZI(CMPEQ, cmpeq)
2879 DO_PPZI(CMPNE, cmpne)
2880 DO_PPZI(CMPGT, cmpgt)
2881 DO_PPZI(CMPGE, cmpge)
2882 DO_PPZI(CMPHI, cmphi)
2883 DO_PPZI(CMPHS, cmphs)
2884 DO_PPZI(CMPLT, cmplt)
2885 DO_PPZI(CMPLE, cmple)
2886 DO_PPZI(CMPLO, cmplo)
2887 DO_PPZI(CMPLS, cmpls)
2888
2889 #undef DO_PPZI
2890
2891 /*
2892 *** SVE Partition Break Group
2893 */
2894
2895 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2896 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2897 {
2898 if (!sve_access_check(s)) {
2899 return true;
2900 }
2901
2902 unsigned vsz = pred_full_reg_size(s);
2903
2904 /* Predicate sizes may be smaller and cannot use simd_desc. */
2905 TCGv_ptr d = tcg_temp_new_ptr();
2906 TCGv_ptr n = tcg_temp_new_ptr();
2907 TCGv_ptr m = tcg_temp_new_ptr();
2908 TCGv_ptr g = tcg_temp_new_ptr();
2909 TCGv_i32 t = tcg_const_i32(vsz - 2);
2910
2911 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2912 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2913 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2914 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2915
2916 if (a->s) {
2917 fn_s(t, d, n, m, g, t);
2918 do_pred_flags(t);
2919 } else {
2920 fn(d, n, m, g, t);
2921 }
2922 tcg_temp_free_ptr(d);
2923 tcg_temp_free_ptr(n);
2924 tcg_temp_free_ptr(m);
2925 tcg_temp_free_ptr(g);
2926 tcg_temp_free_i32(t);
2927 return true;
2928 }
2929
2930 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2931 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2932 {
2933 if (!sve_access_check(s)) {
2934 return true;
2935 }
2936
2937 unsigned vsz = pred_full_reg_size(s);
2938
2939 /* Predicate sizes may be smaller and cannot use simd_desc. */
2940 TCGv_ptr d = tcg_temp_new_ptr();
2941 TCGv_ptr n = tcg_temp_new_ptr();
2942 TCGv_ptr g = tcg_temp_new_ptr();
2943 TCGv_i32 t = tcg_const_i32(vsz - 2);
2944
2945 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2946 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2947 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2948
2949 if (a->s) {
2950 fn_s(t, d, n, g, t);
2951 do_pred_flags(t);
2952 } else {
2953 fn(d, n, g, t);
2954 }
2955 tcg_temp_free_ptr(d);
2956 tcg_temp_free_ptr(n);
2957 tcg_temp_free_ptr(g);
2958 tcg_temp_free_i32(t);
2959 return true;
2960 }
2961
2962 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2963 {
2964 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2965 }
2966
2967 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2968 {
2969 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2970 }
2971
2972 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2973 {
2974 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2975 }
2976
2977 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2978 {
2979 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2980 }
2981
2982 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2983 {
2984 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2985 }
2986
2987 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2988 {
2989 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2990 }
2991
2992 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2993 {
2994 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2995 }
2996
2997 /*
2998 *** SVE Predicate Count Group
2999 */
3000
3001 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3002 {
3003 unsigned psz = pred_full_reg_size(s);
3004
3005 if (psz <= 8) {
3006 uint64_t psz_mask;
3007
3008 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3009 if (pn != pg) {
3010 TCGv_i64 g = tcg_temp_new_i64();
3011 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3012 tcg_gen_and_i64(val, val, g);
3013 tcg_temp_free_i64(g);
3014 }
3015
3016 /* Reduce the pred_esz_masks value simply to reduce the
3017 * size of the code generated here.
3018 */
3019 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3020 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3021
3022 tcg_gen_ctpop_i64(val, val);
3023 } else {
3024 TCGv_ptr t_pn = tcg_temp_new_ptr();
3025 TCGv_ptr t_pg = tcg_temp_new_ptr();
3026 unsigned desc;
3027 TCGv_i32 t_desc;
3028
3029 desc = psz - 2;
3030 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3031
3032 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3033 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3034 t_desc = tcg_const_i32(desc);
3035
3036 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3037 tcg_temp_free_ptr(t_pn);
3038 tcg_temp_free_ptr(t_pg);
3039 tcg_temp_free_i32(t_desc);
3040 }
3041 }
3042
3043 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3044 {
3045 if (sve_access_check(s)) {
3046 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3047 }
3048 return true;
3049 }
3050
3051 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3052 uint32_t insn)
3053 {
3054 if (sve_access_check(s)) {
3055 TCGv_i64 reg = cpu_reg(s, a->rd);
3056 TCGv_i64 val = tcg_temp_new_i64();
3057
3058 do_cntp(s, val, a->esz, a->pg, a->pg);
3059 if (a->d) {
3060 tcg_gen_sub_i64(reg, reg, val);
3061 } else {
3062 tcg_gen_add_i64(reg, reg, val);
3063 }
3064 tcg_temp_free_i64(val);
3065 }
3066 return true;
3067 }
3068
3069 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3070 uint32_t insn)
3071 {
3072 if (a->esz == 0) {
3073 return false;
3074 }
3075 if (sve_access_check(s)) {
3076 unsigned vsz = vec_full_reg_size(s);
3077 TCGv_i64 val = tcg_temp_new_i64();
3078 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3079
3080 do_cntp(s, val, a->esz, a->pg, a->pg);
3081 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3082 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3083 }
3084 return true;
3085 }
3086
3087 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3088 uint32_t insn)
3089 {
3090 if (sve_access_check(s)) {
3091 TCGv_i64 reg = cpu_reg(s, a->rd);
3092 TCGv_i64 val = tcg_temp_new_i64();
3093
3094 do_cntp(s, val, a->esz, a->pg, a->pg);
3095 do_sat_addsub_32(reg, val, a->u, a->d);
3096 }
3097 return true;
3098 }
3099
3100 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3101 uint32_t insn)
3102 {
3103 if (sve_access_check(s)) {
3104 TCGv_i64 reg = cpu_reg(s, a->rd);
3105 TCGv_i64 val = tcg_temp_new_i64();
3106
3107 do_cntp(s, val, a->esz, a->pg, a->pg);
3108 do_sat_addsub_64(reg, val, a->u, a->d);
3109 }
3110 return true;
3111 }
3112
3113 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3114 uint32_t insn)
3115 {
3116 if (a->esz == 0) {
3117 return false;
3118 }
3119 if (sve_access_check(s)) {
3120 TCGv_i64 val = tcg_temp_new_i64();
3121 do_cntp(s, val, a->esz, a->pg, a->pg);
3122 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3123 }
3124 return true;
3125 }
3126
3127 /*
3128 *** SVE Integer Compare Scalars Group
3129 */
3130
3131 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3132 {
3133 if (!sve_access_check(s)) {
3134 return true;
3135 }
3136
3137 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3138 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3139 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3140 TCGv_i64 cmp = tcg_temp_new_i64();
3141
3142 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3143 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3144 tcg_temp_free_i64(cmp);
3145
3146 /* VF = !NF & !CF. */
3147 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3148 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3149
3150 /* Both NF and VF actually look at bit 31. */
3151 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3152 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3153 return true;
3154 }
3155
3156 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3157 {
3158 if (!sve_access_check(s)) {
3159 return true;
3160 }
3161
3162 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3163 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3164 TCGv_i64 t0 = tcg_temp_new_i64();
3165 TCGv_i64 t1 = tcg_temp_new_i64();
3166 TCGv_i32 t2, t3;
3167 TCGv_ptr ptr;
3168 unsigned desc, vsz = vec_full_reg_size(s);
3169 TCGCond cond;
3170
3171 if (!a->sf) {
3172 if (a->u) {
3173 tcg_gen_ext32u_i64(op0, op0);
3174 tcg_gen_ext32u_i64(op1, op1);
3175 } else {
3176 tcg_gen_ext32s_i64(op0, op0);
3177 tcg_gen_ext32s_i64(op1, op1);
3178 }
3179 }
3180
3181 /* For the helper, compress the different conditions into a computation
3182 * of how many iterations for which the condition is true.
3183 *
3184 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally