migration: increase max-bandwidth to 128 MiB/s (1 Gib/s)
[qemu.git] / target / arm / translate-sve.c
1 /*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "tcg/tcg-gvec-desc.h"
26 #include "qemu/log.h"
27 #include "arm_ldst.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
32 #include "exec/log.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
35 #include "fpu/softfloat.h"
36
37
38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39 TCGv_i64, uint32_t, uint32_t);
40
41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 TCGv_ptr, TCGv_i32);
43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44 TCGv_ptr, TCGv_ptr, TCGv_i32);
45
46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48 TCGv_ptr, TCGv_i64, TCGv_i32);
49
50 /*
51 * Helpers for extracting complex instruction fields.
52 */
53
54 /* See e.g. ASR (immediate, predicated).
55 * Returns -1 for unallocated encoding; diagnose later.
56 */
57 static int tszimm_esz(DisasContext *s, int x)
58 {
59 x >>= 3; /* discard imm3 */
60 return 31 - clz32(x);
61 }
62
63 static int tszimm_shr(DisasContext *s, int x)
64 {
65 return (16 << tszimm_esz(s, x)) - x;
66 }
67
68 /* See e.g. LSL (immediate, predicated). */
69 static int tszimm_shl(DisasContext *s, int x)
70 {
71 return x - (8 << tszimm_esz(s, x));
72 }
73
74 static inline int plus1(DisasContext *s, int x)
75 {
76 return x + 1;
77 }
78
79 /* The SH bit is in bit 8. Extract the low 8 and shift. */
80 static inline int expand_imm_sh8s(DisasContext *s, int x)
81 {
82 return (int8_t)x << (x & 0x100 ? 8 : 0);
83 }
84
85 static inline int expand_imm_sh8u(DisasContext *s, int x)
86 {
87 return (uint8_t)x << (x & 0x100 ? 8 : 0);
88 }
89
90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
91 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 */
93 static inline int msz_dtype(DisasContext *s, int msz)
94 {
95 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
96 return dtype[msz];
97 }
98
99 /*
100 * Include the generated decoder.
101 */
102
103 #include "decode-sve.c.inc"
104
105 /*
106 * Implement all of the translator functions referenced by the decoder.
107 */
108
109 /* Return the offset info CPUARMState of the predicate vector register Pn.
110 * Note for this purpose, FFR is P16.
111 */
112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
113 {
114 return offsetof(CPUARMState, vfp.pregs[regno]);
115 }
116
117 /* Return the byte size of the whole predicate register, VL / 64. */
118 static inline int pred_full_reg_size(DisasContext *s)
119 {
120 return s->sve_len >> 3;
121 }
122
123 /* Round up the size of a register to a size allowed by
124 * the tcg vector infrastructure. Any operation which uses this
125 * size may assume that the bits above pred_full_reg_size are zero,
126 * and must leave them the same way.
127 *
128 * Note that this is not needed for the vector registers as they
129 * are always properly sized for tcg vectors.
130 */
131 static int size_for_gvec(int size)
132 {
133 if (size <= 8) {
134 return 8;
135 } else {
136 return QEMU_ALIGN_UP(size, 16);
137 }
138 }
139
140 static int pred_gvec_reg_size(DisasContext *s)
141 {
142 return size_for_gvec(pred_full_reg_size(s));
143 }
144
145 /* Invoke an out-of-line helper on 2 Zregs. */
146 static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
147 int rd, int rn, int data)
148 {
149 unsigned vsz = vec_full_reg_size(s);
150 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn),
152 vsz, vsz, data, fn);
153 }
154
155 /* Invoke an out-of-line helper on 3 Zregs. */
156 static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
157 int rd, int rn, int rm, int data)
158 {
159 unsigned vsz = vec_full_reg_size(s);
160 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm),
163 vsz, vsz, data, fn);
164 }
165
166 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
167 static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
168 int rd, int rn, int pg, int data)
169 {
170 unsigned vsz = vec_full_reg_size(s);
171 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
172 vec_full_reg_offset(s, rn),
173 pred_full_reg_offset(s, pg),
174 vsz, vsz, data, fn);
175 }
176
177 /* Invoke an out-of-line helper on 3 Zregs and a predicate. */
178 static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
179 int rd, int rn, int rm, int pg, int data)
180 {
181 unsigned vsz = vec_full_reg_size(s);
182 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
183 vec_full_reg_offset(s, rn),
184 vec_full_reg_offset(s, rm),
185 pred_full_reg_offset(s, pg),
186 vsz, vsz, data, fn);
187 }
188
189 /* Invoke a vector expander on two Zregs. */
190 static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
191 int esz, int rd, int rn)
192 {
193 unsigned vsz = vec_full_reg_size(s);
194 gvec_fn(esz, vec_full_reg_offset(s, rd),
195 vec_full_reg_offset(s, rn), vsz, vsz);
196 }
197
198 /* Invoke a vector expander on three Zregs. */
199 static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
200 int esz, int rd, int rn, int rm)
201 {
202 unsigned vsz = vec_full_reg_size(s);
203 gvec_fn(esz, vec_full_reg_offset(s, rd),
204 vec_full_reg_offset(s, rn),
205 vec_full_reg_offset(s, rm), vsz, vsz);
206 }
207
208 /* Invoke a vector move on two Zregs. */
209 static bool do_mov_z(DisasContext *s, int rd, int rn)
210 {
211 if (sve_access_check(s)) {
212 gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
213 }
214 return true;
215 }
216
217 /* Initialize a Zreg with replications of a 64-bit immediate. */
218 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
219 {
220 unsigned vsz = vec_full_reg_size(s);
221 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
222 }
223
224 /* Invoke a vector expander on three Pregs. */
225 static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
226 int rd, int rn, int rm)
227 {
228 unsigned psz = pred_gvec_reg_size(s);
229 gvec_fn(MO_64, pred_full_reg_offset(s, rd),
230 pred_full_reg_offset(s, rn),
231 pred_full_reg_offset(s, rm), psz, psz);
232 }
233
234 /* Invoke a vector move on two Pregs. */
235 static bool do_mov_p(DisasContext *s, int rd, int rn)
236 {
237 if (sve_access_check(s)) {
238 unsigned psz = pred_gvec_reg_size(s);
239 tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
240 pred_full_reg_offset(s, rn), psz, psz);
241 }
242 return true;
243 }
244
245 /* Set the cpu flags as per a return from an SVE helper. */
246 static void do_pred_flags(TCGv_i32 t)
247 {
248 tcg_gen_mov_i32(cpu_NF, t);
249 tcg_gen_andi_i32(cpu_ZF, t, 2);
250 tcg_gen_andi_i32(cpu_CF, t, 1);
251 tcg_gen_movi_i32(cpu_VF, 0);
252 }
253
254 /* Subroutines computing the ARM PredTest psuedofunction. */
255 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
256 {
257 TCGv_i32 t = tcg_temp_new_i32();
258
259 gen_helper_sve_predtest1(t, d, g);
260 do_pred_flags(t);
261 tcg_temp_free_i32(t);
262 }
263
264 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
265 {
266 TCGv_ptr dptr = tcg_temp_new_ptr();
267 TCGv_ptr gptr = tcg_temp_new_ptr();
268 TCGv_i32 t;
269
270 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
271 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
272 t = tcg_const_i32(words);
273
274 gen_helper_sve_predtest(t, dptr, gptr, t);
275 tcg_temp_free_ptr(dptr);
276 tcg_temp_free_ptr(gptr);
277
278 do_pred_flags(t);
279 tcg_temp_free_i32(t);
280 }
281
282 /* For each element size, the bits within a predicate word that are active. */
283 const uint64_t pred_esz_masks[4] = {
284 0xffffffffffffffffull, 0x5555555555555555ull,
285 0x1111111111111111ull, 0x0101010101010101ull
286 };
287
288 /*
289 *** SVE Logical - Unpredicated Group
290 */
291
292 static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
293 {
294 if (sve_access_check(s)) {
295 gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
296 }
297 return true;
298 }
299
300 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
301 {
302 return do_zzz_fn(s, a, tcg_gen_gvec_and);
303 }
304
305 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
306 {
307 return do_zzz_fn(s, a, tcg_gen_gvec_or);
308 }
309
310 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
311 {
312 return do_zzz_fn(s, a, tcg_gen_gvec_xor);
313 }
314
315 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
316 {
317 return do_zzz_fn(s, a, tcg_gen_gvec_andc);
318 }
319
320 /*
321 *** SVE Integer Arithmetic - Unpredicated Group
322 */
323
324 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
325 {
326 return do_zzz_fn(s, a, tcg_gen_gvec_add);
327 }
328
329 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
330 {
331 return do_zzz_fn(s, a, tcg_gen_gvec_sub);
332 }
333
334 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
335 {
336 return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
337 }
338
339 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
340 {
341 return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
342 }
343
344 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
345 {
346 return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
347 }
348
349 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
350 {
351 return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
352 }
353
354 /*
355 *** SVE Integer Arithmetic - Binary Predicated Group
356 */
357
358 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
359 {
360 if (fn == NULL) {
361 return false;
362 }
363 if (sve_access_check(s)) {
364 gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
365 }
366 return true;
367 }
368
369 /* Select active elememnts from Zn and inactive elements from Zm,
370 * storing the result in Zd.
371 */
372 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
373 {
374 static gen_helper_gvec_4 * const fns[4] = {
375 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
376 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
377 };
378 gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
379 }
380
381 #define DO_ZPZZ(NAME, name) \
382 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \
383 { \
384 static gen_helper_gvec_4 * const fns[4] = { \
385 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
386 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
387 }; \
388 return do_zpzz_ool(s, a, fns[a->esz]); \
389 }
390
391 DO_ZPZZ(AND, and)
392 DO_ZPZZ(EOR, eor)
393 DO_ZPZZ(ORR, orr)
394 DO_ZPZZ(BIC, bic)
395
396 DO_ZPZZ(ADD, add)
397 DO_ZPZZ(SUB, sub)
398
399 DO_ZPZZ(SMAX, smax)
400 DO_ZPZZ(UMAX, umax)
401 DO_ZPZZ(SMIN, smin)
402 DO_ZPZZ(UMIN, umin)
403 DO_ZPZZ(SABD, sabd)
404 DO_ZPZZ(UABD, uabd)
405
406 DO_ZPZZ(MUL, mul)
407 DO_ZPZZ(SMULH, smulh)
408 DO_ZPZZ(UMULH, umulh)
409
410 DO_ZPZZ(ASR, asr)
411 DO_ZPZZ(LSR, lsr)
412 DO_ZPZZ(LSL, lsl)
413
414 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
415 {
416 static gen_helper_gvec_4 * const fns[4] = {
417 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
418 };
419 return do_zpzz_ool(s, a, fns[a->esz]);
420 }
421
422 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
423 {
424 static gen_helper_gvec_4 * const fns[4] = {
425 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
426 };
427 return do_zpzz_ool(s, a, fns[a->esz]);
428 }
429
430 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
431 {
432 if (sve_access_check(s)) {
433 do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
434 }
435 return true;
436 }
437
438 #undef DO_ZPZZ
439
440 /*
441 *** SVE Integer Arithmetic - Unary Predicated Group
442 */
443
444 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
445 {
446 if (fn == NULL) {
447 return false;
448 }
449 if (sve_access_check(s)) {
450 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
451 }
452 return true;
453 }
454
455 #define DO_ZPZ(NAME, name) \
456 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
457 { \
458 static gen_helper_gvec_3 * const fns[4] = { \
459 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
460 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
461 }; \
462 return do_zpz_ool(s, a, fns[a->esz]); \
463 }
464
465 DO_ZPZ(CLS, cls)
466 DO_ZPZ(CLZ, clz)
467 DO_ZPZ(CNT_zpz, cnt_zpz)
468 DO_ZPZ(CNOT, cnot)
469 DO_ZPZ(NOT_zpz, not_zpz)
470 DO_ZPZ(ABS, abs)
471 DO_ZPZ(NEG, neg)
472
473 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
474 {
475 static gen_helper_gvec_3 * const fns[4] = {
476 NULL,
477 gen_helper_sve_fabs_h,
478 gen_helper_sve_fabs_s,
479 gen_helper_sve_fabs_d
480 };
481 return do_zpz_ool(s, a, fns[a->esz]);
482 }
483
484 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
485 {
486 static gen_helper_gvec_3 * const fns[4] = {
487 NULL,
488 gen_helper_sve_fneg_h,
489 gen_helper_sve_fneg_s,
490 gen_helper_sve_fneg_d
491 };
492 return do_zpz_ool(s, a, fns[a->esz]);
493 }
494
495 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
496 {
497 static gen_helper_gvec_3 * const fns[4] = {
498 NULL,
499 gen_helper_sve_sxtb_h,
500 gen_helper_sve_sxtb_s,
501 gen_helper_sve_sxtb_d
502 };
503 return do_zpz_ool(s, a, fns[a->esz]);
504 }
505
506 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
507 {
508 static gen_helper_gvec_3 * const fns[4] = {
509 NULL,
510 gen_helper_sve_uxtb_h,
511 gen_helper_sve_uxtb_s,
512 gen_helper_sve_uxtb_d
513 };
514 return do_zpz_ool(s, a, fns[a->esz]);
515 }
516
517 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
518 {
519 static gen_helper_gvec_3 * const fns[4] = {
520 NULL, NULL,
521 gen_helper_sve_sxth_s,
522 gen_helper_sve_sxth_d
523 };
524 return do_zpz_ool(s, a, fns[a->esz]);
525 }
526
527 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
528 {
529 static gen_helper_gvec_3 * const fns[4] = {
530 NULL, NULL,
531 gen_helper_sve_uxth_s,
532 gen_helper_sve_uxth_d
533 };
534 return do_zpz_ool(s, a, fns[a->esz]);
535 }
536
537 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
538 {
539 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
540 }
541
542 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
543 {
544 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
545 }
546
547 #undef DO_ZPZ
548
549 /*
550 *** SVE Integer Reduction Group
551 */
552
553 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
554 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
555 gen_helper_gvec_reduc *fn)
556 {
557 unsigned vsz = vec_full_reg_size(s);
558 TCGv_ptr t_zn, t_pg;
559 TCGv_i32 desc;
560 TCGv_i64 temp;
561
562 if (fn == NULL) {
563 return false;
564 }
565 if (!sve_access_check(s)) {
566 return true;
567 }
568
569 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
570 temp = tcg_temp_new_i64();
571 t_zn = tcg_temp_new_ptr();
572 t_pg = tcg_temp_new_ptr();
573
574 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
575 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
576 fn(temp, t_zn, t_pg, desc);
577 tcg_temp_free_ptr(t_zn);
578 tcg_temp_free_ptr(t_pg);
579 tcg_temp_free_i32(desc);
580
581 write_fp_dreg(s, a->rd, temp);
582 tcg_temp_free_i64(temp);
583 return true;
584 }
585
586 #define DO_VPZ(NAME, name) \
587 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \
588 { \
589 static gen_helper_gvec_reduc * const fns[4] = { \
590 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
591 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
592 }; \
593 return do_vpz_ool(s, a, fns[a->esz]); \
594 }
595
596 DO_VPZ(ORV, orv)
597 DO_VPZ(ANDV, andv)
598 DO_VPZ(EORV, eorv)
599
600 DO_VPZ(UADDV, uaddv)
601 DO_VPZ(SMAXV, smaxv)
602 DO_VPZ(UMAXV, umaxv)
603 DO_VPZ(SMINV, sminv)
604 DO_VPZ(UMINV, uminv)
605
606 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
607 {
608 static gen_helper_gvec_reduc * const fns[4] = {
609 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
610 gen_helper_sve_saddv_s, NULL
611 };
612 return do_vpz_ool(s, a, fns[a->esz]);
613 }
614
615 #undef DO_VPZ
616
617 /*
618 *** SVE Shift by Immediate - Predicated Group
619 */
620
621 /*
622 * Copy Zn into Zd, storing zeros into inactive elements.
623 * If invert, store zeros into the active elements.
624 */
625 static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
626 int esz, bool invert)
627 {
628 static gen_helper_gvec_3 * const fns[4] = {
629 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
630 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
631 };
632
633 if (sve_access_check(s)) {
634 gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
635 }
636 return true;
637 }
638
639 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
640 gen_helper_gvec_3 *fn)
641 {
642 if (sve_access_check(s)) {
643 gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
644 }
645 return true;
646 }
647
648 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
649 {
650 static gen_helper_gvec_3 * const fns[4] = {
651 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
652 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
653 };
654 if (a->esz < 0) {
655 /* Invalid tsz encoding -- see tszimm_esz. */
656 return false;
657 }
658 /* Shift by element size is architecturally valid. For
659 arithmetic right-shift, it's the same as by one less. */
660 a->imm = MIN(a->imm, (8 << a->esz) - 1);
661 return do_zpzi_ool(s, a, fns[a->esz]);
662 }
663
664 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
665 {
666 static gen_helper_gvec_3 * const fns[4] = {
667 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
668 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
669 };
670 if (a->esz < 0) {
671 return false;
672 }
673 /* Shift by element size is architecturally valid.
674 For logical shifts, it is a zeroing operation. */
675 if (a->imm >= (8 << a->esz)) {
676 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
677 } else {
678 return do_zpzi_ool(s, a, fns[a->esz]);
679 }
680 }
681
682 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
683 {
684 static gen_helper_gvec_3 * const fns[4] = {
685 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
686 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
687 };
688 if (a->esz < 0) {
689 return false;
690 }
691 /* Shift by element size is architecturally valid.
692 For logical shifts, it is a zeroing operation. */
693 if (a->imm >= (8 << a->esz)) {
694 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
695 } else {
696 return do_zpzi_ool(s, a, fns[a->esz]);
697 }
698 }
699
700 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
701 {
702 static gen_helper_gvec_3 * const fns[4] = {
703 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
704 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
705 };
706 if (a->esz < 0) {
707 return false;
708 }
709 /* Shift by element size is architecturally valid. For arithmetic
710 right shift for division, it is a zeroing operation. */
711 if (a->imm >= (8 << a->esz)) {
712 return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
713 } else {
714 return do_zpzi_ool(s, a, fns[a->esz]);
715 }
716 }
717
718 /*
719 *** SVE Bitwise Shift - Predicated Group
720 */
721
722 #define DO_ZPZW(NAME, name) \
723 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \
724 { \
725 static gen_helper_gvec_4 * const fns[3] = { \
726 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
727 gen_helper_sve_##name##_zpzw_s, \
728 }; \
729 if (a->esz < 0 || a->esz >= 3) { \
730 return false; \
731 } \
732 return do_zpzz_ool(s, a, fns[a->esz]); \
733 }
734
735 DO_ZPZW(ASR, asr)
736 DO_ZPZW(LSR, lsr)
737 DO_ZPZW(LSL, lsl)
738
739 #undef DO_ZPZW
740
741 /*
742 *** SVE Bitwise Shift - Unpredicated Group
743 */
744
745 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
746 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
747 int64_t, uint32_t, uint32_t))
748 {
749 if (a->esz < 0) {
750 /* Invalid tsz encoding -- see tszimm_esz. */
751 return false;
752 }
753 if (sve_access_check(s)) {
754 unsigned vsz = vec_full_reg_size(s);
755 /* Shift by element size is architecturally valid. For
756 arithmetic right-shift, it's the same as by one less.
757 Otherwise it is a zeroing operation. */
758 if (a->imm >= 8 << a->esz) {
759 if (asr) {
760 a->imm = (8 << a->esz) - 1;
761 } else {
762 do_dupi_z(s, a->rd, 0);
763 return true;
764 }
765 }
766 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
767 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
768 }
769 return true;
770 }
771
772 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
773 {
774 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
775 }
776
777 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
778 {
779 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
780 }
781
782 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
783 {
784 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
785 }
786
787 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
788 {
789 if (fn == NULL) {
790 return false;
791 }
792 if (sve_access_check(s)) {
793 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
794 }
795 return true;
796 }
797
798 #define DO_ZZW(NAME, name) \
799 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \
800 { \
801 static gen_helper_gvec_3 * const fns[4] = { \
802 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
803 gen_helper_sve_##name##_zzw_s, NULL \
804 }; \
805 return do_zzw_ool(s, a, fns[a->esz]); \
806 }
807
808 DO_ZZW(ASR, asr)
809 DO_ZZW(LSR, lsr)
810 DO_ZZW(LSL, lsl)
811
812 #undef DO_ZZW
813
814 /*
815 *** SVE Integer Multiply-Add Group
816 */
817
818 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
819 gen_helper_gvec_5 *fn)
820 {
821 if (sve_access_check(s)) {
822 unsigned vsz = vec_full_reg_size(s);
823 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
824 vec_full_reg_offset(s, a->ra),
825 vec_full_reg_offset(s, a->rn),
826 vec_full_reg_offset(s, a->rm),
827 pred_full_reg_offset(s, a->pg),
828 vsz, vsz, 0, fn);
829 }
830 return true;
831 }
832
833 #define DO_ZPZZZ(NAME, name) \
834 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \
835 { \
836 static gen_helper_gvec_5 * const fns[4] = { \
837 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
838 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
839 }; \
840 return do_zpzzz_ool(s, a, fns[a->esz]); \
841 }
842
843 DO_ZPZZZ(MLA, mla)
844 DO_ZPZZZ(MLS, mls)
845
846 #undef DO_ZPZZZ
847
848 /*
849 *** SVE Index Generation Group
850 */
851
852 static void do_index(DisasContext *s, int esz, int rd,
853 TCGv_i64 start, TCGv_i64 incr)
854 {
855 unsigned vsz = vec_full_reg_size(s);
856 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
857 TCGv_ptr t_zd = tcg_temp_new_ptr();
858
859 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
860 if (esz == 3) {
861 gen_helper_sve_index_d(t_zd, start, incr, desc);
862 } else {
863 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
864 static index_fn * const fns[3] = {
865 gen_helper_sve_index_b,
866 gen_helper_sve_index_h,
867 gen_helper_sve_index_s,
868 };
869 TCGv_i32 s32 = tcg_temp_new_i32();
870 TCGv_i32 i32 = tcg_temp_new_i32();
871
872 tcg_gen_extrl_i64_i32(s32, start);
873 tcg_gen_extrl_i64_i32(i32, incr);
874 fns[esz](t_zd, s32, i32, desc);
875
876 tcg_temp_free_i32(s32);
877 tcg_temp_free_i32(i32);
878 }
879 tcg_temp_free_ptr(t_zd);
880 tcg_temp_free_i32(desc);
881 }
882
883 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
884 {
885 if (sve_access_check(s)) {
886 TCGv_i64 start = tcg_const_i64(a->imm1);
887 TCGv_i64 incr = tcg_const_i64(a->imm2);
888 do_index(s, a->esz, a->rd, start, incr);
889 tcg_temp_free_i64(start);
890 tcg_temp_free_i64(incr);
891 }
892 return true;
893 }
894
895 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
896 {
897 if (sve_access_check(s)) {
898 TCGv_i64 start = tcg_const_i64(a->imm);
899 TCGv_i64 incr = cpu_reg(s, a->rm);
900 do_index(s, a->esz, a->rd, start, incr);
901 tcg_temp_free_i64(start);
902 }
903 return true;
904 }
905
906 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
907 {
908 if (sve_access_check(s)) {
909 TCGv_i64 start = cpu_reg(s, a->rn);
910 TCGv_i64 incr = tcg_const_i64(a->imm);
911 do_index(s, a->esz, a->rd, start, incr);
912 tcg_temp_free_i64(incr);
913 }
914 return true;
915 }
916
917 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
918 {
919 if (sve_access_check(s)) {
920 TCGv_i64 start = cpu_reg(s, a->rn);
921 TCGv_i64 incr = cpu_reg(s, a->rm);
922 do_index(s, a->esz, a->rd, start, incr);
923 }
924 return true;
925 }
926
927 /*
928 *** SVE Stack Allocation Group
929 */
930
931 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
932 {
933 if (sve_access_check(s)) {
934 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
935 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
936 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
937 }
938 return true;
939 }
940
941 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
942 {
943 if (sve_access_check(s)) {
944 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
945 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
946 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
947 }
948 return true;
949 }
950
951 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
952 {
953 if (sve_access_check(s)) {
954 TCGv_i64 reg = cpu_reg(s, a->rd);
955 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
956 }
957 return true;
958 }
959
960 /*
961 *** SVE Compute Vector Address Group
962 */
963
964 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
965 {
966 if (sve_access_check(s)) {
967 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
968 }
969 return true;
970 }
971
972 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
973 {
974 return do_adr(s, a, gen_helper_sve_adr_p32);
975 }
976
977 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
978 {
979 return do_adr(s, a, gen_helper_sve_adr_p64);
980 }
981
982 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
983 {
984 return do_adr(s, a, gen_helper_sve_adr_s32);
985 }
986
987 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
988 {
989 return do_adr(s, a, gen_helper_sve_adr_u32);
990 }
991
992 /*
993 *** SVE Integer Misc - Unpredicated Group
994 */
995
996 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
997 {
998 static gen_helper_gvec_2 * const fns[4] = {
999 NULL,
1000 gen_helper_sve_fexpa_h,
1001 gen_helper_sve_fexpa_s,
1002 gen_helper_sve_fexpa_d,
1003 };
1004 if (a->esz == 0) {
1005 return false;
1006 }
1007 if (sve_access_check(s)) {
1008 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
1009 }
1010 return true;
1011 }
1012
1013 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1014 {
1015 static gen_helper_gvec_3 * const fns[4] = {
1016 NULL,
1017 gen_helper_sve_ftssel_h,
1018 gen_helper_sve_ftssel_s,
1019 gen_helper_sve_ftssel_d,
1020 };
1021 if (a->esz == 0) {
1022 return false;
1023 }
1024 if (sve_access_check(s)) {
1025 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
1026 }
1027 return true;
1028 }
1029
1030 /*
1031 *** SVE Predicate Logical Operations Group
1032 */
1033
1034 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1035 const GVecGen4 *gvec_op)
1036 {
1037 if (!sve_access_check(s)) {
1038 return true;
1039 }
1040
1041 unsigned psz = pred_gvec_reg_size(s);
1042 int dofs = pred_full_reg_offset(s, a->rd);
1043 int nofs = pred_full_reg_offset(s, a->rn);
1044 int mofs = pred_full_reg_offset(s, a->rm);
1045 int gofs = pred_full_reg_offset(s, a->pg);
1046
1047 if (!a->s) {
1048 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1049 return true;
1050 }
1051
1052 if (psz == 8) {
1053 /* Do the operation and the flags generation in temps. */
1054 TCGv_i64 pd = tcg_temp_new_i64();
1055 TCGv_i64 pn = tcg_temp_new_i64();
1056 TCGv_i64 pm = tcg_temp_new_i64();
1057 TCGv_i64 pg = tcg_temp_new_i64();
1058
1059 tcg_gen_ld_i64(pn, cpu_env, nofs);
1060 tcg_gen_ld_i64(pm, cpu_env, mofs);
1061 tcg_gen_ld_i64(pg, cpu_env, gofs);
1062
1063 gvec_op->fni8(pd, pn, pm, pg);
1064 tcg_gen_st_i64(pd, cpu_env, dofs);
1065
1066 do_predtest1(pd, pg);
1067
1068 tcg_temp_free_i64(pd);
1069 tcg_temp_free_i64(pn);
1070 tcg_temp_free_i64(pm);
1071 tcg_temp_free_i64(pg);
1072 } else {
1073 /* The operation and flags generation is large. The computation
1074 * of the flags depends on the original contents of the guarding
1075 * predicate. If the destination overwrites the guarding predicate,
1076 * then the easiest way to get this right is to save a copy.
1077 */
1078 int tofs = gofs;
1079 if (a->rd == a->pg) {
1080 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1081 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1082 }
1083
1084 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1085 do_predtest(s, dofs, tofs, psz / 8);
1086 }
1087 return true;
1088 }
1089
1090 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1091 {
1092 tcg_gen_and_i64(pd, pn, pm);
1093 tcg_gen_and_i64(pd, pd, pg);
1094 }
1095
1096 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1097 TCGv_vec pm, TCGv_vec pg)
1098 {
1099 tcg_gen_and_vec(vece, pd, pn, pm);
1100 tcg_gen_and_vec(vece, pd, pd, pg);
1101 }
1102
1103 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1104 {
1105 static const GVecGen4 op = {
1106 .fni8 = gen_and_pg_i64,
1107 .fniv = gen_and_pg_vec,
1108 .fno = gen_helper_sve_and_pppp,
1109 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1110 };
1111
1112 if (!a->s) {
1113 if (!sve_access_check(s)) {
1114 return true;
1115 }
1116 if (a->rn == a->rm) {
1117 if (a->pg == a->rn) {
1118 do_mov_p(s, a->rd, a->rn);
1119 } else {
1120 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1121 }
1122 return true;
1123 } else if (a->pg == a->rn || a->pg == a->rm) {
1124 gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1125 return true;
1126 }
1127 }
1128 return do_pppp_flags(s, a, &op);
1129 }
1130
1131 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1132 {
1133 tcg_gen_andc_i64(pd, pn, pm);
1134 tcg_gen_and_i64(pd, pd, pg);
1135 }
1136
1137 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1138 TCGv_vec pm, TCGv_vec pg)
1139 {
1140 tcg_gen_andc_vec(vece, pd, pn, pm);
1141 tcg_gen_and_vec(vece, pd, pd, pg);
1142 }
1143
1144 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1145 {
1146 static const GVecGen4 op = {
1147 .fni8 = gen_bic_pg_i64,
1148 .fniv = gen_bic_pg_vec,
1149 .fno = gen_helper_sve_bic_pppp,
1150 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1151 };
1152
1153 if (!a->s && a->pg == a->rn) {
1154 if (sve_access_check(s)) {
1155 gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1156 }
1157 return true;
1158 }
1159 return do_pppp_flags(s, a, &op);
1160 }
1161
1162 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1163 {
1164 tcg_gen_xor_i64(pd, pn, pm);
1165 tcg_gen_and_i64(pd, pd, pg);
1166 }
1167
1168 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1169 TCGv_vec pm, TCGv_vec pg)
1170 {
1171 tcg_gen_xor_vec(vece, pd, pn, pm);
1172 tcg_gen_and_vec(vece, pd, pd, pg);
1173 }
1174
1175 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1176 {
1177 static const GVecGen4 op = {
1178 .fni8 = gen_eor_pg_i64,
1179 .fniv = gen_eor_pg_vec,
1180 .fno = gen_helper_sve_eor_pppp,
1181 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1182 };
1183 return do_pppp_flags(s, a, &op);
1184 }
1185
1186 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1187 {
1188 if (a->s) {
1189 return false;
1190 }
1191 if (sve_access_check(s)) {
1192 unsigned psz = pred_gvec_reg_size(s);
1193 tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1194 pred_full_reg_offset(s, a->pg),
1195 pred_full_reg_offset(s, a->rn),
1196 pred_full_reg_offset(s, a->rm), psz, psz);
1197 }
1198 return true;
1199 }
1200
1201 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1202 {
1203 tcg_gen_or_i64(pd, pn, pm);
1204 tcg_gen_and_i64(pd, pd, pg);
1205 }
1206
1207 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1208 TCGv_vec pm, TCGv_vec pg)
1209 {
1210 tcg_gen_or_vec(vece, pd, pn, pm);
1211 tcg_gen_and_vec(vece, pd, pd, pg);
1212 }
1213
1214 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1215 {
1216 static const GVecGen4 op = {
1217 .fni8 = gen_orr_pg_i64,
1218 .fniv = gen_orr_pg_vec,
1219 .fno = gen_helper_sve_orr_pppp,
1220 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1221 };
1222
1223 if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1224 return do_mov_p(s, a->rd, a->rn);
1225 }
1226 return do_pppp_flags(s, a, &op);
1227 }
1228
1229 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1230 {
1231 tcg_gen_orc_i64(pd, pn, pm);
1232 tcg_gen_and_i64(pd, pd, pg);
1233 }
1234
1235 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1236 TCGv_vec pm, TCGv_vec pg)
1237 {
1238 tcg_gen_orc_vec(vece, pd, pn, pm);
1239 tcg_gen_and_vec(vece, pd, pd, pg);
1240 }
1241
1242 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1243 {
1244 static const GVecGen4 op = {
1245 .fni8 = gen_orn_pg_i64,
1246 .fniv = gen_orn_pg_vec,
1247 .fno = gen_helper_sve_orn_pppp,
1248 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1249 };
1250 return do_pppp_flags(s, a, &op);
1251 }
1252
1253 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1254 {
1255 tcg_gen_or_i64(pd, pn, pm);
1256 tcg_gen_andc_i64(pd, pg, pd);
1257 }
1258
1259 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1260 TCGv_vec pm, TCGv_vec pg)
1261 {
1262 tcg_gen_or_vec(vece, pd, pn, pm);
1263 tcg_gen_andc_vec(vece, pd, pg, pd);
1264 }
1265
1266 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1267 {
1268 static const GVecGen4 op = {
1269 .fni8 = gen_nor_pg_i64,
1270 .fniv = gen_nor_pg_vec,
1271 .fno = gen_helper_sve_nor_pppp,
1272 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1273 };
1274 return do_pppp_flags(s, a, &op);
1275 }
1276
1277 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1278 {
1279 tcg_gen_and_i64(pd, pn, pm);
1280 tcg_gen_andc_i64(pd, pg, pd);
1281 }
1282
1283 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1284 TCGv_vec pm, TCGv_vec pg)
1285 {
1286 tcg_gen_and_vec(vece, pd, pn, pm);
1287 tcg_gen_andc_vec(vece, pd, pg, pd);
1288 }
1289
1290 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1291 {
1292 static const GVecGen4 op = {
1293 .fni8 = gen_nand_pg_i64,
1294 .fniv = gen_nand_pg_vec,
1295 .fno = gen_helper_sve_nand_pppp,
1296 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1297 };
1298 return do_pppp_flags(s, a, &op);
1299 }
1300
1301 /*
1302 *** SVE Predicate Misc Group
1303 */
1304
1305 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1306 {
1307 if (sve_access_check(s)) {
1308 int nofs = pred_full_reg_offset(s, a->rn);
1309 int gofs = pred_full_reg_offset(s, a->pg);
1310 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1311
1312 if (words == 1) {
1313 TCGv_i64 pn = tcg_temp_new_i64();
1314 TCGv_i64 pg = tcg_temp_new_i64();
1315
1316 tcg_gen_ld_i64(pn, cpu_env, nofs);
1317 tcg_gen_ld_i64(pg, cpu_env, gofs);
1318 do_predtest1(pn, pg);
1319
1320 tcg_temp_free_i64(pn);
1321 tcg_temp_free_i64(pg);
1322 } else {
1323 do_predtest(s, nofs, gofs, words);
1324 }
1325 }
1326 return true;
1327 }
1328
1329 /* See the ARM pseudocode DecodePredCount. */
1330 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1331 {
1332 unsigned elements = fullsz >> esz;
1333 unsigned bound;
1334
1335 switch (pattern) {
1336 case 0x0: /* POW2 */
1337 return pow2floor(elements);
1338 case 0x1: /* VL1 */
1339 case 0x2: /* VL2 */
1340 case 0x3: /* VL3 */
1341 case 0x4: /* VL4 */
1342 case 0x5: /* VL5 */
1343 case 0x6: /* VL6 */
1344 case 0x7: /* VL7 */
1345 case 0x8: /* VL8 */
1346 bound = pattern;
1347 break;
1348 case 0x9: /* VL16 */
1349 case 0xa: /* VL32 */
1350 case 0xb: /* VL64 */
1351 case 0xc: /* VL128 */
1352 case 0xd: /* VL256 */
1353 bound = 16 << (pattern - 9);
1354 break;
1355 case 0x1d: /* MUL4 */
1356 return elements - elements % 4;
1357 case 0x1e: /* MUL3 */
1358 return elements - elements % 3;
1359 case 0x1f: /* ALL */
1360 return elements;
1361 default: /* #uimm5 */
1362 return 0;
1363 }
1364 return elements >= bound ? bound : 0;
1365 }
1366
1367 /* This handles all of the predicate initialization instructions,
1368 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1369 * so that decode_pred_count returns 0. For SETFFR, we will have
1370 * set RD == 16 == FFR.
1371 */
1372 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1373 {
1374 if (!sve_access_check(s)) {
1375 return true;
1376 }
1377
1378 unsigned fullsz = vec_full_reg_size(s);
1379 unsigned ofs = pred_full_reg_offset(s, rd);
1380 unsigned numelem, setsz, i;
1381 uint64_t word, lastword;
1382 TCGv_i64 t;
1383
1384 numelem = decode_pred_count(fullsz, pat, esz);
1385
1386 /* Determine what we must store into each bit, and how many. */
1387 if (numelem == 0) {
1388 lastword = word = 0;
1389 setsz = fullsz;
1390 } else {
1391 setsz = numelem << esz;
1392 lastword = word = pred_esz_masks[esz];
1393 if (setsz % 64) {
1394 lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1395 }
1396 }
1397
1398 t = tcg_temp_new_i64();
1399 if (fullsz <= 64) {
1400 tcg_gen_movi_i64(t, lastword);
1401 tcg_gen_st_i64(t, cpu_env, ofs);
1402 goto done;
1403 }
1404
1405 if (word == lastword) {
1406 unsigned maxsz = size_for_gvec(fullsz / 8);
1407 unsigned oprsz = size_for_gvec(setsz / 8);
1408
1409 if (oprsz * 8 == setsz) {
1410 tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1411 goto done;
1412 }
1413 }
1414
1415 setsz /= 8;
1416 fullsz /= 8;
1417
1418 tcg_gen_movi_i64(t, word);
1419 for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1420 tcg_gen_st_i64(t, cpu_env, ofs + i);
1421 }
1422 if (lastword != word) {
1423 tcg_gen_movi_i64(t, lastword);
1424 tcg_gen_st_i64(t, cpu_env, ofs + i);
1425 i += 8;
1426 }
1427 if (i < fullsz) {
1428 tcg_gen_movi_i64(t, 0);
1429 for (; i < fullsz; i += 8) {
1430 tcg_gen_st_i64(t, cpu_env, ofs + i);
1431 }
1432 }
1433
1434 done:
1435 tcg_temp_free_i64(t);
1436
1437 /* PTRUES */
1438 if (setflag) {
1439 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1440 tcg_gen_movi_i32(cpu_CF, word == 0);
1441 tcg_gen_movi_i32(cpu_VF, 0);
1442 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1443 }
1444 return true;
1445 }
1446
1447 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1448 {
1449 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1450 }
1451
1452 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1453 {
1454 /* Note pat == 31 is #all, to set all elements. */
1455 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1456 }
1457
1458 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1459 {
1460 /* Note pat == 32 is #unimp, to set no elements. */
1461 return do_predset(s, 0, a->rd, 32, false);
1462 }
1463
1464 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1465 {
1466 /* The path through do_pppp_flags is complicated enough to want to avoid
1467 * duplication. Frob the arguments into the form of a predicated AND.
1468 */
1469 arg_rprr_s alt_a = {
1470 .rd = a->rd, .pg = a->pg, .s = a->s,
1471 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1472 };
1473 return trans_AND_pppp(s, &alt_a);
1474 }
1475
1476 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1477 {
1478 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1479 }
1480
1481 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1482 {
1483 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1484 }
1485
1486 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1487 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1488 TCGv_ptr, TCGv_i32))
1489 {
1490 if (!sve_access_check(s)) {
1491 return true;
1492 }
1493
1494 TCGv_ptr t_pd = tcg_temp_new_ptr();
1495 TCGv_ptr t_pg = tcg_temp_new_ptr();
1496 TCGv_i32 t;
1497 unsigned desc;
1498
1499 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1500 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1501
1502 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1503 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1504 t = tcg_const_i32(desc);
1505
1506 gen_fn(t, t_pd, t_pg, t);
1507 tcg_temp_free_ptr(t_pd);
1508 tcg_temp_free_ptr(t_pg);
1509
1510 do_pred_flags(t);
1511 tcg_temp_free_i32(t);
1512 return true;
1513 }
1514
1515 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1516 {
1517 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1518 }
1519
1520 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1521 {
1522 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1523 }
1524
1525 /*
1526 *** SVE Element Count Group
1527 */
1528
1529 /* Perform an inline saturating addition of a 32-bit value within
1530 * a 64-bit register. The second operand is known to be positive,
1531 * which halves the comparisions we must perform to bound the result.
1532 */
1533 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1534 {
1535 int64_t ibound;
1536 TCGv_i64 bound;
1537 TCGCond cond;
1538
1539 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1540 if (u) {
1541 tcg_gen_ext32u_i64(reg, reg);
1542 } else {
1543 tcg_gen_ext32s_i64(reg, reg);
1544 }
1545 if (d) {
1546 tcg_gen_sub_i64(reg, reg, val);
1547 ibound = (u ? 0 : INT32_MIN);
1548 cond = TCG_COND_LT;
1549 } else {
1550 tcg_gen_add_i64(reg, reg, val);
1551 ibound = (u ? UINT32_MAX : INT32_MAX);
1552 cond = TCG_COND_GT;
1553 }
1554 bound = tcg_const_i64(ibound);
1555 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1556 tcg_temp_free_i64(bound);
1557 }
1558
1559 /* Similarly with 64-bit values. */
1560 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1561 {
1562 TCGv_i64 t0 = tcg_temp_new_i64();
1563 TCGv_i64 t1 = tcg_temp_new_i64();
1564 TCGv_i64 t2;
1565
1566 if (u) {
1567 if (d) {
1568 tcg_gen_sub_i64(t0, reg, val);
1569 tcg_gen_movi_i64(t1, 0);
1570 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1571 } else {
1572 tcg_gen_add_i64(t0, reg, val);
1573 tcg_gen_movi_i64(t1, -1);
1574 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1575 }
1576 } else {
1577 if (d) {
1578 /* Detect signed overflow for subtraction. */
1579 tcg_gen_xor_i64(t0, reg, val);
1580 tcg_gen_sub_i64(t1, reg, val);
1581 tcg_gen_xor_i64(reg, reg, t1);
1582 tcg_gen_and_i64(t0, t0, reg);
1583
1584 /* Bound the result. */
1585 tcg_gen_movi_i64(reg, INT64_MIN);
1586 t2 = tcg_const_i64(0);
1587 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1588 } else {
1589 /* Detect signed overflow for addition. */
1590 tcg_gen_xor_i64(t0, reg, val);
1591 tcg_gen_add_i64(reg, reg, val);
1592 tcg_gen_xor_i64(t1, reg, val);
1593 tcg_gen_andc_i64(t0, t1, t0);
1594
1595 /* Bound the result. */
1596 tcg_gen_movi_i64(t1, INT64_MAX);
1597 t2 = tcg_const_i64(0);
1598 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1599 }
1600 tcg_temp_free_i64(t2);
1601 }
1602 tcg_temp_free_i64(t0);
1603 tcg_temp_free_i64(t1);
1604 }
1605
1606 /* Similarly with a vector and a scalar operand. */
1607 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1608 TCGv_i64 val, bool u, bool d)
1609 {
1610 unsigned vsz = vec_full_reg_size(s);
1611 TCGv_ptr dptr, nptr;
1612 TCGv_i32 t32, desc;
1613 TCGv_i64 t64;
1614
1615 dptr = tcg_temp_new_ptr();
1616 nptr = tcg_temp_new_ptr();
1617 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1618 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1619 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1620
1621 switch (esz) {
1622 case MO_8:
1623 t32 = tcg_temp_new_i32();
1624 tcg_gen_extrl_i64_i32(t32, val);
1625 if (d) {
1626 tcg_gen_neg_i32(t32, t32);
1627 }
1628 if (u) {
1629 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1630 } else {
1631 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1632 }
1633 tcg_temp_free_i32(t32);
1634 break;
1635
1636 case MO_16:
1637 t32 = tcg_temp_new_i32();
1638 tcg_gen_extrl_i64_i32(t32, val);
1639 if (d) {
1640 tcg_gen_neg_i32(t32, t32);
1641 }
1642 if (u) {
1643 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1644 } else {
1645 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1646 }
1647 tcg_temp_free_i32(t32);
1648 break;
1649
1650 case MO_32:
1651 t64 = tcg_temp_new_i64();
1652 if (d) {
1653 tcg_gen_neg_i64(t64, val);
1654 } else {
1655 tcg_gen_mov_i64(t64, val);
1656 }
1657 if (u) {
1658 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1659 } else {
1660 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1661 }
1662 tcg_temp_free_i64(t64);
1663 break;
1664
1665 case MO_64:
1666 if (u) {
1667 if (d) {
1668 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1669 } else {
1670 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1671 }
1672 } else if (d) {
1673 t64 = tcg_temp_new_i64();
1674 tcg_gen_neg_i64(t64, val);
1675 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1676 tcg_temp_free_i64(t64);
1677 } else {
1678 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1679 }
1680 break;
1681
1682 default:
1683 g_assert_not_reached();
1684 }
1685
1686 tcg_temp_free_ptr(dptr);
1687 tcg_temp_free_ptr(nptr);
1688 tcg_temp_free_i32(desc);
1689 }
1690
1691 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1692 {
1693 if (sve_access_check(s)) {
1694 unsigned fullsz = vec_full_reg_size(s);
1695 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1696 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1697 }
1698 return true;
1699 }
1700
1701 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1702 {
1703 if (sve_access_check(s)) {
1704 unsigned fullsz = vec_full_reg_size(s);
1705 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1706 int inc = numelem * a->imm * (a->d ? -1 : 1);
1707 TCGv_i64 reg = cpu_reg(s, a->rd);
1708
1709 tcg_gen_addi_i64(reg, reg, inc);
1710 }
1711 return true;
1712 }
1713
1714 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1715 {
1716 if (!sve_access_check(s)) {
1717 return true;
1718 }
1719
1720 unsigned fullsz = vec_full_reg_size(s);
1721 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1722 int inc = numelem * a->imm;
1723 TCGv_i64 reg = cpu_reg(s, a->rd);
1724
1725 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1726 if (inc == 0) {
1727 if (a->u) {
1728 tcg_gen_ext32u_i64(reg, reg);
1729 } else {
1730 tcg_gen_ext32s_i64(reg, reg);
1731 }
1732 } else {
1733 TCGv_i64 t = tcg_const_i64(inc);
1734 do_sat_addsub_32(reg, t, a->u, a->d);
1735 tcg_temp_free_i64(t);
1736 }
1737 return true;
1738 }
1739
1740 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1741 {
1742 if (!sve_access_check(s)) {
1743 return true;
1744 }
1745
1746 unsigned fullsz = vec_full_reg_size(s);
1747 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1748 int inc = numelem * a->imm;
1749 TCGv_i64 reg = cpu_reg(s, a->rd);
1750
1751 if (inc != 0) {
1752 TCGv_i64 t = tcg_const_i64(inc);
1753 do_sat_addsub_64(reg, t, a->u, a->d);
1754 tcg_temp_free_i64(t);
1755 }
1756 return true;
1757 }
1758
1759 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1760 {
1761 if (a->esz == 0) {
1762 return false;
1763 }
1764
1765 unsigned fullsz = vec_full_reg_size(s);
1766 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1767 int inc = numelem * a->imm;
1768
1769 if (inc != 0) {
1770 if (sve_access_check(s)) {
1771 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1772 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1773 vec_full_reg_offset(s, a->rn),
1774 t, fullsz, fullsz);
1775 tcg_temp_free_i64(t);
1776 }
1777 } else {
1778 do_mov_z(s, a->rd, a->rn);
1779 }
1780 return true;
1781 }
1782
1783 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1784 {
1785 if (a->esz == 0) {
1786 return false;
1787 }
1788
1789 unsigned fullsz = vec_full_reg_size(s);
1790 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1791 int inc = numelem * a->imm;
1792
1793 if (inc != 0) {
1794 if (sve_access_check(s)) {
1795 TCGv_i64 t = tcg_const_i64(inc);
1796 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1797 tcg_temp_free_i64(t);
1798 }
1799 } else {
1800 do_mov_z(s, a->rd, a->rn);
1801 }
1802 return true;
1803 }
1804
1805 /*
1806 *** SVE Bitwise Immediate Group
1807 */
1808
1809 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1810 {
1811 uint64_t imm;
1812 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1813 extract32(a->dbm, 0, 6),
1814 extract32(a->dbm, 6, 6))) {
1815 return false;
1816 }
1817 if (sve_access_check(s)) {
1818 unsigned vsz = vec_full_reg_size(s);
1819 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1820 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1821 }
1822 return true;
1823 }
1824
1825 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1826 {
1827 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1828 }
1829
1830 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1831 {
1832 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1833 }
1834
1835 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1836 {
1837 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1838 }
1839
1840 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1841 {
1842 uint64_t imm;
1843 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1844 extract32(a->dbm, 0, 6),
1845 extract32(a->dbm, 6, 6))) {
1846 return false;
1847 }
1848 if (sve_access_check(s)) {
1849 do_dupi_z(s, a->rd, imm);
1850 }
1851 return true;
1852 }
1853
1854 /*
1855 *** SVE Integer Wide Immediate - Predicated Group
1856 */
1857
1858 /* Implement all merging copies. This is used for CPY (immediate),
1859 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1860 */
1861 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1862 TCGv_i64 val)
1863 {
1864 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1865 static gen_cpy * const fns[4] = {
1866 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1867 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1868 };
1869 unsigned vsz = vec_full_reg_size(s);
1870 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1871 TCGv_ptr t_zd = tcg_temp_new_ptr();
1872 TCGv_ptr t_zn = tcg_temp_new_ptr();
1873 TCGv_ptr t_pg = tcg_temp_new_ptr();
1874
1875 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1876 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1877 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1878
1879 fns[esz](t_zd, t_zn, t_pg, val, desc);
1880
1881 tcg_temp_free_ptr(t_zd);
1882 tcg_temp_free_ptr(t_zn);
1883 tcg_temp_free_ptr(t_pg);
1884 tcg_temp_free_i32(desc);
1885 }
1886
1887 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1888 {
1889 if (a->esz == 0) {
1890 return false;
1891 }
1892 if (sve_access_check(s)) {
1893 /* Decode the VFP immediate. */
1894 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1895 TCGv_i64 t_imm = tcg_const_i64(imm);
1896 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1897 tcg_temp_free_i64(t_imm);
1898 }
1899 return true;
1900 }
1901
1902 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1903 {
1904 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1905 return false;
1906 }
1907 if (sve_access_check(s)) {
1908 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1909 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1910 tcg_temp_free_i64(t_imm);
1911 }
1912 return true;
1913 }
1914
1915 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1916 {
1917 static gen_helper_gvec_2i * const fns[4] = {
1918 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1919 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1920 };
1921
1922 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1923 return false;
1924 }
1925 if (sve_access_check(s)) {
1926 unsigned vsz = vec_full_reg_size(s);
1927 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1928 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1929 pred_full_reg_offset(s, a->pg),
1930 t_imm, vsz, vsz, 0, fns[a->esz]);
1931 tcg_temp_free_i64(t_imm);
1932 }
1933 return true;
1934 }
1935
1936 /*
1937 *** SVE Permute Extract Group
1938 */
1939
1940 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1941 {
1942 if (!sve_access_check(s)) {
1943 return true;
1944 }
1945
1946 unsigned vsz = vec_full_reg_size(s);
1947 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1948 unsigned n_siz = vsz - n_ofs;
1949 unsigned d = vec_full_reg_offset(s, a->rd);
1950 unsigned n = vec_full_reg_offset(s, a->rn);
1951 unsigned m = vec_full_reg_offset(s, a->rm);
1952
1953 /* Use host vector move insns if we have appropriate sizes
1954 * and no unfortunate overlap.
1955 */
1956 if (m != d
1957 && n_ofs == size_for_gvec(n_ofs)
1958 && n_siz == size_for_gvec(n_siz)
1959 && (d != n || n_siz <= n_ofs)) {
1960 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1961 if (n_ofs != 0) {
1962 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1963 }
1964 } else {
1965 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1966 }
1967 return true;
1968 }
1969
1970 /*
1971 *** SVE Permute - Unpredicated Group
1972 */
1973
1974 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
1975 {
1976 if (sve_access_check(s)) {
1977 unsigned vsz = vec_full_reg_size(s);
1978 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1979 vsz, vsz, cpu_reg_sp(s, a->rn));
1980 }
1981 return true;
1982 }
1983
1984 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
1985 {
1986 if ((a->imm & 0x1f) == 0) {
1987 return false;
1988 }
1989 if (sve_access_check(s)) {
1990 unsigned vsz = vec_full_reg_size(s);
1991 unsigned dofs = vec_full_reg_offset(s, a->rd);
1992 unsigned esz, index;
1993
1994 esz = ctz32(a->imm);
1995 index = a->imm >> (esz + 1);
1996
1997 if ((index << esz) < vsz) {
1998 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1999 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2000 } else {
2001 /*
2002 * While dup_mem handles 128-bit elements, dup_imm does not.
2003 * Thankfully element size doesn't matter for splatting zero.
2004 */
2005 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2006 }
2007 }
2008 return true;
2009 }
2010
2011 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2012 {
2013 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2014 static gen_insr * const fns[4] = {
2015 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2016 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2017 };
2018 unsigned vsz = vec_full_reg_size(s);
2019 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2020 TCGv_ptr t_zd = tcg_temp_new_ptr();
2021 TCGv_ptr t_zn = tcg_temp_new_ptr();
2022
2023 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2024 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2025
2026 fns[a->esz](t_zd, t_zn, val, desc);
2027
2028 tcg_temp_free_ptr(t_zd);
2029 tcg_temp_free_ptr(t_zn);
2030 tcg_temp_free_i32(desc);
2031 }
2032
2033 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2034 {
2035 if (sve_access_check(s)) {
2036 TCGv_i64 t = tcg_temp_new_i64();
2037 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2038 do_insr_i64(s, a, t);
2039 tcg_temp_free_i64(t);
2040 }
2041 return true;
2042 }
2043
2044 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2045 {
2046 if (sve_access_check(s)) {
2047 do_insr_i64(s, a, cpu_reg(s, a->rm));
2048 }
2049 return true;
2050 }
2051
2052 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2053 {
2054 static gen_helper_gvec_2 * const fns[4] = {
2055 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2056 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2057 };
2058
2059 if (sve_access_check(s)) {
2060 gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
2061 }
2062 return true;
2063 }
2064
2065 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2066 {
2067 static gen_helper_gvec_3 * const fns[4] = {
2068 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2069 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2070 };
2071
2072 if (sve_access_check(s)) {
2073 gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2074 }
2075 return true;
2076 }
2077
2078 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2079 {
2080 static gen_helper_gvec_2 * const fns[4][2] = {
2081 { NULL, NULL },
2082 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2083 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2084 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2085 };
2086
2087 if (a->esz == 0) {
2088 return false;
2089 }
2090 if (sve_access_check(s)) {
2091 unsigned vsz = vec_full_reg_size(s);
2092 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2093 vec_full_reg_offset(s, a->rn)
2094 + (a->h ? vsz / 2 : 0),
2095 vsz, vsz, 0, fns[a->esz][a->u]);
2096 }
2097 return true;
2098 }
2099
2100 /*
2101 *** SVE Permute - Predicates Group
2102 */
2103
2104 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2105 gen_helper_gvec_3 *fn)
2106 {
2107 if (!sve_access_check(s)) {
2108 return true;
2109 }
2110
2111 unsigned vsz = pred_full_reg_size(s);
2112
2113 /* Predicate sizes may be smaller and cannot use simd_desc.
2114 We cannot round up, as we do elsewhere, because we need
2115 the exact size for ZIP2 and REV. We retain the style for
2116 the other helpers for consistency. */
2117 TCGv_ptr t_d = tcg_temp_new_ptr();
2118 TCGv_ptr t_n = tcg_temp_new_ptr();
2119 TCGv_ptr t_m = tcg_temp_new_ptr();
2120 TCGv_i32 t_desc;
2121 int desc;
2122
2123 desc = vsz - 2;
2124 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2125 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2126
2127 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2128 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2129 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2130 t_desc = tcg_const_i32(desc);
2131
2132 fn(t_d, t_n, t_m, t_desc);
2133
2134 tcg_temp_free_ptr(t_d);
2135 tcg_temp_free_ptr(t_n);
2136 tcg_temp_free_ptr(t_m);
2137 tcg_temp_free_i32(t_desc);
2138 return true;
2139 }
2140
2141 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2142 gen_helper_gvec_2 *fn)
2143 {
2144 if (!sve_access_check(s)) {
2145 return true;
2146 }
2147
2148 unsigned vsz = pred_full_reg_size(s);
2149 TCGv_ptr t_d = tcg_temp_new_ptr();
2150 TCGv_ptr t_n = tcg_temp_new_ptr();
2151 TCGv_i32 t_desc;
2152 int desc;
2153
2154 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2155 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2156
2157 /* Predicate sizes may be smaller and cannot use simd_desc.
2158 We cannot round up, as we do elsewhere, because we need
2159 the exact size for ZIP2 and REV. We retain the style for
2160 the other helpers for consistency. */
2161
2162 desc = vsz - 2;
2163 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2164 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2165 t_desc = tcg_const_i32(desc);
2166
2167 fn(t_d, t_n, t_desc);
2168
2169 tcg_temp_free_i32(t_desc);
2170 tcg_temp_free_ptr(t_d);
2171 tcg_temp_free_ptr(t_n);
2172 return true;
2173 }
2174
2175 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2176 {
2177 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2178 }
2179
2180 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2181 {
2182 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2183 }
2184
2185 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2186 {
2187 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2188 }
2189
2190 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2191 {
2192 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2193 }
2194
2195 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2196 {
2197 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2198 }
2199
2200 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2201 {
2202 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2203 }
2204
2205 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2206 {
2207 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2208 }
2209
2210 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2211 {
2212 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2213 }
2214
2215 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2216 {
2217 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2218 }
2219
2220 /*
2221 *** SVE Permute - Interleaving Group
2222 */
2223
2224 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2225 {
2226 static gen_helper_gvec_3 * const fns[4] = {
2227 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2228 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2229 };
2230
2231 if (sve_access_check(s)) {
2232 unsigned vsz = vec_full_reg_size(s);
2233 unsigned high_ofs = high ? vsz / 2 : 0;
2234 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2235 vec_full_reg_offset(s, a->rn) + high_ofs,
2236 vec_full_reg_offset(s, a->rm) + high_ofs,
2237 vsz, vsz, 0, fns[a->esz]);
2238 }
2239 return true;
2240 }
2241
2242 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2243 gen_helper_gvec_3 *fn)
2244 {
2245 if (sve_access_check(s)) {
2246 gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
2247 }
2248 return true;
2249 }
2250
2251 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2252 {
2253 return do_zip(s, a, false);
2254 }
2255
2256 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2257 {
2258 return do_zip(s, a, true);
2259 }
2260
2261 static gen_helper_gvec_3 * const uzp_fns[4] = {
2262 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2263 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2264 };
2265
2266 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2267 {
2268 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2269 }
2270
2271 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2272 {
2273 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2274 }
2275
2276 static gen_helper_gvec_3 * const trn_fns[4] = {
2277 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2278 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2279 };
2280
2281 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2282 {
2283 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2284 }
2285
2286 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2287 {
2288 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2289 }
2290
2291 /*
2292 *** SVE Permute Vector - Predicated Group
2293 */
2294
2295 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2296 {
2297 static gen_helper_gvec_3 * const fns[4] = {
2298 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2299 };
2300 return do_zpz_ool(s, a, fns[a->esz]);
2301 }
2302
2303 /* Call the helper that computes the ARM LastActiveElement pseudocode
2304 * function, scaled by the element size. This includes the not found
2305 * indication; e.g. not found for esz=3 is -8.
2306 */
2307 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2308 {
2309 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2310 * round up, as we do elsewhere, because we need the exact size.
2311 */
2312 TCGv_ptr t_p = tcg_temp_new_ptr();
2313 TCGv_i32 t_desc;
2314 unsigned vsz = pred_full_reg_size(s);
2315 unsigned desc;
2316
2317 desc = vsz - 2;
2318 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2319
2320 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2321 t_desc = tcg_const_i32(desc);
2322
2323 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2324
2325 tcg_temp_free_i32(t_desc);
2326 tcg_temp_free_ptr(t_p);
2327 }
2328
2329 /* Increment LAST to the offset of the next element in the vector,
2330 * wrapping around to 0.
2331 */
2332 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2333 {
2334 unsigned vsz = vec_full_reg_size(s);
2335
2336 tcg_gen_addi_i32(last, last, 1 << esz);
2337 if (is_power_of_2(vsz)) {
2338 tcg_gen_andi_i32(last, last, vsz - 1);
2339 } else {
2340 TCGv_i32 max = tcg_const_i32(vsz);
2341 TCGv_i32 zero = tcg_const_i32(0);
2342 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2343 tcg_temp_free_i32(max);
2344 tcg_temp_free_i32(zero);
2345 }
2346 }
2347
2348 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2349 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2350 {
2351 unsigned vsz = vec_full_reg_size(s);
2352
2353 if (is_power_of_2(vsz)) {
2354 tcg_gen_andi_i32(last, last, vsz - 1);
2355 } else {
2356 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2357 TCGv_i32 zero = tcg_const_i32(0);
2358 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2359 tcg_temp_free_i32(max);
2360 tcg_temp_free_i32(zero);
2361 }
2362 }
2363
2364 /* Load an unsigned element of ESZ from BASE+OFS. */
2365 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2366 {
2367 TCGv_i64 r = tcg_temp_new_i64();
2368
2369 switch (esz) {
2370 case 0:
2371 tcg_gen_ld8u_i64(r, base, ofs);
2372 break;
2373 case 1:
2374 tcg_gen_ld16u_i64(r, base, ofs);
2375 break;
2376 case 2:
2377 tcg_gen_ld32u_i64(r, base, ofs);
2378 break;
2379 case 3:
2380 tcg_gen_ld_i64(r, base, ofs);
2381 break;
2382 default:
2383 g_assert_not_reached();
2384 }
2385 return r;
2386 }
2387
2388 /* Load an unsigned element of ESZ from RM[LAST]. */
2389 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2390 int rm, int esz)
2391 {
2392 TCGv_ptr p = tcg_temp_new_ptr();
2393 TCGv_i64 r;
2394
2395 /* Convert offset into vector into offset into ENV.
2396 * The final adjustment for the vector register base
2397 * is added via constant offset to the load.
2398 */
2399 #ifdef HOST_WORDS_BIGENDIAN
2400 /* Adjust for element ordering. See vec_reg_offset. */
2401 if (esz < 3) {
2402 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2403 }
2404 #endif
2405 tcg_gen_ext_i32_ptr(p, last);
2406 tcg_gen_add_ptr(p, p, cpu_env);
2407
2408 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2409 tcg_temp_free_ptr(p);
2410
2411 return r;
2412 }
2413
2414 /* Compute CLAST for a Zreg. */
2415 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2416 {
2417 TCGv_i32 last;
2418 TCGLabel *over;
2419 TCGv_i64 ele;
2420 unsigned vsz, esz = a->esz;
2421
2422 if (!sve_access_check(s)) {
2423 return true;
2424 }
2425
2426 last = tcg_temp_local_new_i32();
2427 over = gen_new_label();
2428
2429 find_last_active(s, last, esz, a->pg);
2430
2431 /* There is of course no movcond for a 2048-bit vector,
2432 * so we must branch over the actual store.
2433 */
2434 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2435
2436 if (!before) {
2437 incr_last_active(s, last, esz);
2438 }
2439
2440 ele = load_last_active(s, last, a->rm, esz);
2441 tcg_temp_free_i32(last);
2442
2443 vsz = vec_full_reg_size(s);
2444 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2445 tcg_temp_free_i64(ele);
2446
2447 /* If this insn used MOVPRFX, we may need a second move. */
2448 if (a->rd != a->rn) {
2449 TCGLabel *done = gen_new_label();
2450 tcg_gen_br(done);
2451
2452 gen_set_label(over);
2453 do_mov_z(s, a->rd, a->rn);
2454
2455 gen_set_label(done);
2456 } else {
2457 gen_set_label(over);
2458 }
2459 return true;
2460 }
2461
2462 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2463 {
2464 return do_clast_vector(s, a, false);
2465 }
2466
2467 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2468 {
2469 return do_clast_vector(s, a, true);
2470 }
2471
2472 /* Compute CLAST for a scalar. */
2473 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2474 bool before, TCGv_i64 reg_val)
2475 {
2476 TCGv_i32 last = tcg_temp_new_i32();
2477 TCGv_i64 ele, cmp, zero;
2478
2479 find_last_active(s, last, esz, pg);
2480
2481 /* Extend the original value of last prior to incrementing. */
2482 cmp = tcg_temp_new_i64();
2483 tcg_gen_ext_i32_i64(cmp, last);
2484
2485 if (!before) {
2486 incr_last_active(s, last, esz);
2487 }
2488
2489 /* The conceit here is that while last < 0 indicates not found, after
2490 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2491 * from which we can load garbage. We then discard the garbage with
2492 * a conditional move.
2493 */
2494 ele = load_last_active(s, last, rm, esz);
2495 tcg_temp_free_i32(last);
2496
2497 zero = tcg_const_i64(0);
2498 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2499
2500 tcg_temp_free_i64(zero);
2501 tcg_temp_free_i64(cmp);
2502 tcg_temp_free_i64(ele);
2503 }
2504
2505 /* Compute CLAST for a Vreg. */
2506 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2507 {
2508 if (sve_access_check(s)) {
2509 int esz = a->esz;
2510 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2511 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2512
2513 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2514 write_fp_dreg(s, a->rd, reg);
2515 tcg_temp_free_i64(reg);
2516 }
2517 return true;
2518 }
2519
2520 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2521 {
2522 return do_clast_fp(s, a, false);
2523 }
2524
2525 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2526 {
2527 return do_clast_fp(s, a, true);
2528 }
2529
2530 /* Compute CLAST for a Xreg. */
2531 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2532 {
2533 TCGv_i64 reg;
2534
2535 if (!sve_access_check(s)) {
2536 return true;
2537 }
2538
2539 reg = cpu_reg(s, a->rd);
2540 switch (a->esz) {
2541 case 0:
2542 tcg_gen_ext8u_i64(reg, reg);
2543 break;
2544 case 1:
2545 tcg_gen_ext16u_i64(reg, reg);
2546 break;
2547 case 2:
2548 tcg_gen_ext32u_i64(reg, reg);
2549 break;
2550 case 3:
2551 break;
2552 default:
2553 g_assert_not_reached();
2554 }
2555
2556 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2557 return true;
2558 }
2559
2560 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2561 {
2562 return do_clast_general(s, a, false);
2563 }
2564
2565 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2566 {
2567 return do_clast_general(s, a, true);
2568 }
2569
2570 /* Compute LAST for a scalar. */
2571 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2572 int pg, int rm, bool before)
2573 {
2574 TCGv_i32 last = tcg_temp_new_i32();
2575 TCGv_i64 ret;
2576
2577 find_last_active(s, last, esz, pg);
2578 if (before) {
2579 wrap_last_active(s, last, esz);
2580 } else {
2581 incr_last_active(s, last, esz);
2582 }
2583
2584 ret = load_last_active(s, last, rm, esz);
2585 tcg_temp_free_i32(last);
2586 return ret;
2587 }
2588
2589 /* Compute LAST for a Vreg. */
2590 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2591 {
2592 if (sve_access_check(s)) {
2593 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2594 write_fp_dreg(s, a->rd, val);
2595 tcg_temp_free_i64(val);
2596 }
2597 return true;
2598 }
2599
2600 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2601 {
2602 return do_last_fp(s, a, false);
2603 }
2604
2605 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2606 {
2607 return do_last_fp(s, a, true);
2608 }
2609
2610 /* Compute LAST for a Xreg. */
2611 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2612 {
2613 if (sve_access_check(s)) {
2614 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2615 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2616 tcg_temp_free_i64(val);
2617 }
2618 return true;
2619 }
2620
2621 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2622 {
2623 return do_last_general(s, a, false);
2624 }
2625
2626 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2627 {
2628 return do_last_general(s, a, true);
2629 }
2630
2631 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2632 {
2633 if (sve_access_check(s)) {
2634 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2635 }
2636 return true;
2637 }
2638
2639 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2640 {
2641 if (sve_access_check(s)) {
2642 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2643 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2644 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2645 tcg_temp_free_i64(t);
2646 }
2647 return true;
2648 }
2649
2650 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2651 {
2652 static gen_helper_gvec_3 * const fns[4] = {
2653 NULL,
2654 gen_helper_sve_revb_h,
2655 gen_helper_sve_revb_s,
2656 gen_helper_sve_revb_d,
2657 };
2658 return do_zpz_ool(s, a, fns[a->esz]);
2659 }
2660
2661 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2662 {
2663 static gen_helper_gvec_3 * const fns[4] = {
2664 NULL,
2665 NULL,
2666 gen_helper_sve_revh_s,
2667 gen_helper_sve_revh_d,
2668 };
2669 return do_zpz_ool(s, a, fns[a->esz]);
2670 }
2671
2672 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2673 {
2674 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2675 }
2676
2677 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2678 {
2679 static gen_helper_gvec_3 * const fns[4] = {
2680 gen_helper_sve_rbit_b,
2681 gen_helper_sve_rbit_h,
2682 gen_helper_sve_rbit_s,
2683 gen_helper_sve_rbit_d,
2684 };
2685 return do_zpz_ool(s, a, fns[a->esz]);
2686 }
2687
2688 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2689 {
2690 if (sve_access_check(s)) {
2691 gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
2692 a->rd, a->rn, a->rm, a->pg, 0);
2693 }
2694 return true;
2695 }
2696
2697 /*
2698 *** SVE Integer Compare - Vectors Group
2699 */
2700
2701 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2702 gen_helper_gvec_flags_4 *gen_fn)
2703 {
2704 TCGv_ptr pd, zn, zm, pg;
2705 unsigned vsz;
2706 TCGv_i32 t;
2707
2708 if (gen_fn == NULL) {
2709 return false;
2710 }
2711 if (!sve_access_check(s)) {
2712 return true;
2713 }
2714
2715 vsz = vec_full_reg_size(s);
2716 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2717 pd = tcg_temp_new_ptr();
2718 zn = tcg_temp_new_ptr();
2719 zm = tcg_temp_new_ptr();
2720 pg = tcg_temp_new_ptr();
2721
2722 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2723 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2724 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2725 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2726
2727 gen_fn(t, pd, zn, zm, pg, t);
2728
2729 tcg_temp_free_ptr(pd);
2730 tcg_temp_free_ptr(zn);
2731 tcg_temp_free_ptr(zm);
2732 tcg_temp_free_ptr(pg);
2733
2734 do_pred_flags(t);
2735
2736 tcg_temp_free_i32(t);
2737 return true;
2738 }
2739
2740 #define DO_PPZZ(NAME, name) \
2741 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \
2742 { \
2743 static gen_helper_gvec_flags_4 * const fns[4] = { \
2744 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2745 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2746 }; \
2747 return do_ppzz_flags(s, a, fns[a->esz]); \
2748 }
2749
2750 DO_PPZZ(CMPEQ, cmpeq)
2751 DO_PPZZ(CMPNE, cmpne)
2752 DO_PPZZ(CMPGT, cmpgt)
2753 DO_PPZZ(CMPGE, cmpge)
2754 DO_PPZZ(CMPHI, cmphi)
2755 DO_PPZZ(CMPHS, cmphs)
2756
2757 #undef DO_PPZZ
2758
2759 #define DO_PPZW(NAME, name) \
2760 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \
2761 { \
2762 static gen_helper_gvec_flags_4 * const fns[4] = { \
2763 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2764 gen_helper_sve_##name##_ppzw_s, NULL \
2765 }; \
2766 return do_ppzz_flags(s, a, fns[a->esz]); \
2767 }
2768
2769 DO_PPZW(CMPEQ, cmpeq)
2770 DO_PPZW(CMPNE, cmpne)
2771 DO_PPZW(CMPGT, cmpgt)
2772 DO_PPZW(CMPGE, cmpge)
2773 DO_PPZW(CMPHI, cmphi)
2774 DO_PPZW(CMPHS, cmphs)
2775 DO_PPZW(CMPLT, cmplt)
2776 DO_PPZW(CMPLE, cmple)
2777 DO_PPZW(CMPLO, cmplo)
2778 DO_PPZW(CMPLS, cmpls)
2779
2780 #undef DO_PPZW
2781
2782 /*
2783 *** SVE Integer Compare - Immediate Groups
2784 */
2785
2786 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2787 gen_helper_gvec_flags_3 *gen_fn)
2788 {
2789 TCGv_ptr pd, zn, pg;
2790 unsigned vsz;
2791 TCGv_i32 t;
2792
2793 if (gen_fn == NULL) {
2794 return false;
2795 }
2796 if (!sve_access_check(s)) {
2797 return true;
2798 }
2799
2800 vsz = vec_full_reg_size(s);
2801 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2802 pd = tcg_temp_new_ptr();
2803 zn = tcg_temp_new_ptr();
2804 pg = tcg_temp_new_ptr();
2805
2806 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2807 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2808 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2809
2810 gen_fn(t, pd, zn, pg, t);
2811
2812 tcg_temp_free_ptr(pd);
2813 tcg_temp_free_ptr(zn);
2814 tcg_temp_free_ptr(pg);
2815
2816 do_pred_flags(t);
2817
2818 tcg_temp_free_i32(t);
2819 return true;
2820 }
2821
2822 #define DO_PPZI(NAME, name) \
2823 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \
2824 { \
2825 static gen_helper_gvec_flags_3 * const fns[4] = { \
2826 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2827 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2828 }; \
2829 return do_ppzi_flags(s, a, fns[a->esz]); \
2830 }
2831
2832 DO_PPZI(CMPEQ, cmpeq)
2833 DO_PPZI(CMPNE, cmpne)
2834 DO_PPZI(CMPGT, cmpgt)
2835 DO_PPZI(CMPGE, cmpge)
2836 DO_PPZI(CMPHI, cmphi)
2837 DO_PPZI(CMPHS, cmphs)
2838 DO_PPZI(CMPLT, cmplt)
2839 DO_PPZI(CMPLE, cmple)
2840 DO_PPZI(CMPLO, cmplo)
2841 DO_PPZI(CMPLS, cmpls)
2842
2843 #undef DO_PPZI
2844
2845 /*
2846 *** SVE Partition Break Group
2847 */
2848
2849 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2850 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2851 {
2852 if (!sve_access_check(s)) {
2853 return true;
2854 }
2855
2856 unsigned vsz = pred_full_reg_size(s);
2857
2858 /* Predicate sizes may be smaller and cannot use simd_desc. */
2859 TCGv_ptr d = tcg_temp_new_ptr();
2860 TCGv_ptr n = tcg_temp_new_ptr();
2861 TCGv_ptr m = tcg_temp_new_ptr();
2862 TCGv_ptr g = tcg_temp_new_ptr();
2863 TCGv_i32 t = tcg_const_i32(vsz - 2);
2864
2865 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2866 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2867 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2868 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2869
2870 if (a->s) {
2871 fn_s(t, d, n, m, g, t);
2872 do_pred_flags(t);
2873 } else {
2874 fn(d, n, m, g, t);
2875 }
2876 tcg_temp_free_ptr(d);
2877 tcg_temp_free_ptr(n);
2878 tcg_temp_free_ptr(m);
2879 tcg_temp_free_ptr(g);
2880 tcg_temp_free_i32(t);
2881 return true;
2882 }
2883
2884 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2885 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2886 {
2887 if (!sve_access_check(s)) {
2888 return true;
2889 }
2890
2891 unsigned vsz = pred_full_reg_size(s);
2892
2893 /* Predicate sizes may be smaller and cannot use simd_desc. */
2894 TCGv_ptr d = tcg_temp_new_ptr();
2895 TCGv_ptr n = tcg_temp_new_ptr();
2896 TCGv_ptr g = tcg_temp_new_ptr();
2897 TCGv_i32 t = tcg_const_i32(vsz - 2);
2898
2899 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2900 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2901 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2902
2903 if (a->s) {
2904 fn_s(t, d, n, g, t);
2905 do_pred_flags(t);
2906 } else {
2907 fn(d, n, g, t);
2908 }
2909 tcg_temp_free_ptr(d);
2910 tcg_temp_free_ptr(n);
2911 tcg_temp_free_ptr(g);
2912 tcg_temp_free_i32(t);
2913 return true;
2914 }
2915
2916 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2917 {
2918 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2919 }
2920
2921 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2922 {
2923 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2924 }
2925
2926 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2927 {
2928 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2929 }
2930
2931 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2932 {
2933 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2934 }
2935
2936 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2937 {
2938 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2939 }
2940
2941 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2942 {
2943 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2944 }
2945
2946 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
2947 {
2948 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2949 }
2950
2951 /*
2952 *** SVE Predicate Count Group
2953 */
2954
2955 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2956 {
2957 unsigned psz = pred_full_reg_size(s);
2958
2959 if (psz <= 8) {
2960 uint64_t psz_mask;
2961
2962 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2963 if (pn != pg) {
2964 TCGv_i64 g = tcg_temp_new_i64();
2965 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2966 tcg_gen_and_i64(val, val, g);
2967 tcg_temp_free_i64(g);
2968 }
2969
2970 /* Reduce the pred_esz_masks value simply to reduce the
2971 * size of the code generated here.
2972 */
2973 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2974 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2975
2976 tcg_gen_ctpop_i64(val, val);
2977 } else {
2978 TCGv_ptr t_pn = tcg_temp_new_ptr();
2979 TCGv_ptr t_pg = tcg_temp_new_ptr();
2980 unsigned desc;
2981 TCGv_i32 t_desc;
2982
2983 desc = psz - 2;
2984 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2985
2986 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
2987 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2988 t_desc = tcg_const_i32(desc);
2989
2990 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
2991 tcg_temp_free_ptr(t_pn);
2992 tcg_temp_free_ptr(t_pg);
2993 tcg_temp_free_i32(t_desc);
2994 }
2995 }
2996
2997 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
2998 {
2999 if (sve_access_check(s)) {
3000 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3001 }
3002 return true;
3003 }
3004
3005 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3006 {
3007 if (sve_access_check(s)) {
3008 TCGv_i64 reg = cpu_reg(s, a->rd);
3009 TCGv_i64 val = tcg_temp_new_i64();
3010
3011 do_cntp(s, val, a->esz, a->pg, a->pg);
3012 if (a->d) {
3013 tcg_gen_sub_i64(reg, reg, val);
3014 } else {
3015 tcg_gen_add_i64(reg, reg, val);
3016 }
3017 tcg_temp_free_i64(val);
3018 }
3019 return true;
3020 }
3021
3022 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3023 {
3024 if (a->esz == 0) {
3025 return false;
3026 }
3027 if (sve_access_check(s)) {
3028 unsigned vsz = vec_full_reg_size(s);
3029 TCGv_i64 val = tcg_temp_new_i64();
3030 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3031
3032 do_cntp(s, val, a->esz, a->pg, a->pg);
3033 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3034 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3035 }
3036 return true;
3037 }
3038
3039 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3040 {
3041 if (sve_access_check(s)) {
3042 TCGv_i64 reg = cpu_reg(s, a->rd);
3043 TCGv_i64 val = tcg_temp_new_i64();
3044
3045 do_cntp(s, val, a->esz, a->pg, a->pg);
3046 do_sat_addsub_32(reg, val, a->u, a->d);
3047 }
3048 return true;
3049 }
3050
3051 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3052 {
3053 if (sve_access_check(s)) {
3054 TCGv_i64 reg = cpu_reg(s, a->rd);
3055 TCGv_i64 val = tcg_temp_new_i64();
3056
3057 do_cntp(s, val, a->esz, a->pg, a->pg);
3058 do_sat_addsub_64(reg, val, a->u, a->d);
3059 }
3060 return true;
3061 }
3062
3063 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3064 {
3065 if (a->esz == 0) {
3066 return false;
3067 }
3068 if (sve_access_check(s)) {
3069 TCGv_i64 val = tcg_temp_new_i64();
3070 do_cntp(s, val, a->esz, a->pg, a->pg);
3071 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3072 }
3073 return true;
3074 }
3075
3076 /*
3077 *** SVE Integer Compare Scalars Group
3078 */
3079
3080 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3081 {
3082 if (!sve_access_check(s)) {
3083 return true;
3084 }
3085
3086 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3087 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3088 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3089 TCGv_i64 cmp = tcg_temp_new_i64();
3090
3091 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3092 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3093 tcg_temp_free_i64(cmp);
3094
3095 /* VF = !NF & !CF. */
3096 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3097 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3098
3099 /* Both NF and VF actually look at bit 31. */
3100 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3101 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3102 return true;
3103 }
3104
3105 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3106 {
3107 TCGv_i64 op0, op1, t0, t1, tmax;
3108 TCGv_i32 t2, t3;
3109 TCGv_ptr ptr;
3110 unsigned desc, vsz = vec_full_reg_size(s);
3111 TCGCond cond;
3112
3113 if (!sve_access_check(s)) {
3114 return true;
3115 }
3116
3117 op0 = read_cpu_reg(s, a->rn, 1);
3118 op1 = read_cpu_reg(s, a->rm, 1);
3119
3120 if (!a->sf) {
3121 if (a->u) {
3122 tcg_gen_ext32u_i64(op0, op0);
3123 tcg_gen_ext32u_i64(op1, op1);
3124 } else {
3125 tcg_gen_ext32s_i64(op0, op0);
3126 tcg_gen_ext32s_i64(op1, op1);
3127 }
3128 }
3129
3130 /* For the helper, compress the different conditions into a computation
3131 * of how many iterations for which the condition is true.
3132 */
3133 t0 = tcg_temp_new_i64();
3134 t1 = tcg_temp_new_i64();
3135 tcg_gen_sub_i64(t0, op1, op0);
3136
3137 tmax = tcg_const_i64(vsz >> a->esz);
3138 if (a->eq) {
3139 /* Equality means one more iteration. */
3140 tcg_gen_addi_i64(t0, t0, 1);
3141
3142 /* If op1 is max (un)signed integer (and the only time the addition
3143 * above could overflow), then we produce an all-true predicate by
3144 * setting the count to the vector length. This is because the
3145 * pseudocode is described as an increment + compare loop, and the
3146 * max integer would always compare true.
3147 */
3148 tcg_gen_movi_i64(t1, (a->sf
3149 ? (a->u ? UINT64_MAX : INT64_MAX)
3150 : (a->u ? UINT32_MAX : INT32_MAX)));
3151 tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3152 }
3153
3154 /* Bound to the maximum. */
3155 tcg_gen_umin_i64(t0, t0, tmax);
3156 tcg_temp_free_i64(tmax);
3157
3158 /* Set the count to zero if the condition is false. */
3159 cond = (a->u
3160 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3161 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3162 tcg_gen_movi_i64(t1, 0);
3163 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3164 tcg_temp_free_i64(t1);
3165
3166 /* Since we're bounded, pass as a 32-bit type. */
3167 t2 = tcg_temp_new_i32();
3168 tcg_gen_extrl_i64_i32(t2, t0);
3169 tcg_temp_free_i64(t0);
3170
3171 /* Scale elements to bits. */
3172 tcg_gen_shli_i32(t2, t2, a->esz);
3173
3174 desc = (vsz / 8) - 2;
3175 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3176 t3 = tcg_const_i32(desc);
3177
3178 ptr = tcg_temp_new_ptr();
3179 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3180
3181 gen_helper_sve_while(t2, ptr, t2, t3);
3182 do_pred_flags(t2);
3183
3184 tcg_temp_free_ptr(ptr);
3185 tcg_temp_free_i32(t2);
3186 tcg_temp_free_i32(t3);
3187 return true;
3188 }
3189
3190 /*
3191 *** SVE Integer Wide Immediate - Unpredicated Group
3192 */
3193
3194 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3195 {
3196 if (a->esz == 0) {
3197 return false;
3198 }
3199 if (sve_access_check(s)) {
3200 unsigned vsz = vec_full_reg_size(s);
3201 int dofs = vec_full_reg_offset(s, a->rd);
3202 uint64_t imm;
3203
3204 /* Decode the VFP immediate. */
3205 imm = vfp_expand_imm(a->esz, a->imm);
3206 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3207 }
3208 return true;
3209 }
3210
3211 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3212 {
3213 if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3214 return false;
3215 }
3216 if (sve_access_check(s)) {
3217 unsigned vsz = vec_full_reg_size(s);
3218 int dofs = vec_full_reg_offset(s, a->rd);
3219
3220 tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3221 }
3222 return true;
3223 }
3224
3225 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3226 {
3227 if (a->esz == 0 && extract32(s->insn, 13,