2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64
, uint32_t, uint32_t);
40 typedef void gen_helper_gvec_flags_3(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
42 typedef void gen_helper_gvec_flags_4(TCGv_i32
, TCGv_ptr
, TCGv_ptr
,
43 TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
45 typedef void gen_helper_gvec_mem(TCGv_env
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
48 * Helpers for extracting complex instruction fields.
51 /* See e.g. ASR (immediate, predicated).
52 * Returns -1 for unallocated encoding; diagnose later.
54 static int tszimm_esz(int x
)
56 x
>>= 3; /* discard imm3 */
60 static int tszimm_shr(int x
)
62 return (16 << tszimm_esz(x
)) - x
;
65 /* See e.g. LSL (immediate, predicated). */
66 static int tszimm_shl(int x
)
68 return x
- (8 << tszimm_esz(x
));
71 static inline int plus1(int x
)
76 /* The SH bit is in bit 8. Extract the low 8 and shift. */
77 static inline int expand_imm_sh8s(int x
)
79 return (int8_t)x
<< (x
& 0x100 ?
8 : 0);
82 static inline int expand_imm_sh8u(int x
)
84 return (uint8_t)x
<< (x
& 0x100 ?
8 : 0);
87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
88 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
90 static inline int msz_dtype(int msz
)
92 static const uint8_t dtype
[4] = { 0, 5, 10, 15 };
97 * Include the generated decoder.
100 #include "decode-sve.inc.c"
103 * Implement all of the translator functions referenced by the decoder.
106 /* Return the offset info CPUARMState of the predicate vector register Pn.
107 * Note for this purpose, FFR is P16.
109 static inline int pred_full_reg_offset(DisasContext
*s
, int regno
)
111 return offsetof(CPUARMState
, vfp
.pregs
[regno
]);
114 /* Return the byte size of the whole predicate register, VL / 64. */
115 static inline int pred_full_reg_size(DisasContext
*s
)
117 return s
->sve_len
>> 3;
120 /* Round up the size of a register to a size allowed by
121 * the tcg vector infrastructure. Any operation which uses this
122 * size may assume that the bits above pred_full_reg_size are zero,
123 * and must leave them the same way.
125 * Note that this is not needed for the vector registers as they
126 * are always properly sized for tcg vectors.
128 static int size_for_gvec(int size
)
133 return QEMU_ALIGN_UP(size
, 16);
137 static int pred_gvec_reg_size(DisasContext
*s
)
139 return size_for_gvec(pred_full_reg_size(s
));
142 /* Invoke a vector expander on two Zregs. */
143 static bool do_vector2_z(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
144 int esz
, int rd
, int rn
)
146 if (sve_access_check(s
)) {
147 unsigned vsz
= vec_full_reg_size(s
);
148 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
149 vec_full_reg_offset(s
, rn
), vsz
, vsz
);
154 /* Invoke a vector expander on three Zregs. */
155 static bool do_vector3_z(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
156 int esz
, int rd
, int rn
, int rm
)
158 if (sve_access_check(s
)) {
159 unsigned vsz
= vec_full_reg_size(s
);
160 gvec_fn(esz
, vec_full_reg_offset(s
, rd
),
161 vec_full_reg_offset(s
, rn
),
162 vec_full_reg_offset(s
, rm
), vsz
, vsz
);
167 /* Invoke a vector move on two Zregs. */
168 static bool do_mov_z(DisasContext
*s
, int rd
, int rn
)
170 return do_vector2_z(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
173 /* Initialize a Zreg with replications of a 64-bit immediate. */
174 static void do_dupi_z(DisasContext
*s
, int rd
, uint64_t word
)
176 unsigned vsz
= vec_full_reg_size(s
);
177 tcg_gen_gvec_dup64i(vec_full_reg_offset(s
, rd
), vsz
, vsz
, word
);
180 /* Invoke a vector expander on two Pregs. */
181 static bool do_vector2_p(DisasContext
*s
, GVecGen2Fn
*gvec_fn
,
182 int esz
, int rd
, int rn
)
184 if (sve_access_check(s
)) {
185 unsigned psz
= pred_gvec_reg_size(s
);
186 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
187 pred_full_reg_offset(s
, rn
), psz
, psz
);
192 /* Invoke a vector expander on three Pregs. */
193 static bool do_vector3_p(DisasContext
*s
, GVecGen3Fn
*gvec_fn
,
194 int esz
, int rd
, int rn
, int rm
)
196 if (sve_access_check(s
)) {
197 unsigned psz
= pred_gvec_reg_size(s
);
198 gvec_fn(esz
, pred_full_reg_offset(s
, rd
),
199 pred_full_reg_offset(s
, rn
),
200 pred_full_reg_offset(s
, rm
), psz
, psz
);
205 /* Invoke a vector operation on four Pregs. */
206 static bool do_vecop4_p(DisasContext
*s
, const GVecGen4
*gvec_op
,
207 int rd
, int rn
, int rm
, int rg
)
209 if (sve_access_check(s
)) {
210 unsigned psz
= pred_gvec_reg_size(s
);
211 tcg_gen_gvec_4(pred_full_reg_offset(s
, rd
),
212 pred_full_reg_offset(s
, rn
),
213 pred_full_reg_offset(s
, rm
),
214 pred_full_reg_offset(s
, rg
),
220 /* Invoke a vector move on two Pregs. */
221 static bool do_mov_p(DisasContext
*s
, int rd
, int rn
)
223 return do_vector2_p(s
, tcg_gen_gvec_mov
, 0, rd
, rn
);
226 /* Set the cpu flags as per a return from an SVE helper. */
227 static void do_pred_flags(TCGv_i32 t
)
229 tcg_gen_mov_i32(cpu_NF
, t
);
230 tcg_gen_andi_i32(cpu_ZF
, t
, 2);
231 tcg_gen_andi_i32(cpu_CF
, t
, 1);
232 tcg_gen_movi_i32(cpu_VF
, 0);
235 /* Subroutines computing the ARM PredTest psuedofunction. */
236 static void do_predtest1(TCGv_i64 d
, TCGv_i64 g
)
238 TCGv_i32 t
= tcg_temp_new_i32();
240 gen_helper_sve_predtest1(t
, d
, g
);
242 tcg_temp_free_i32(t
);
245 static void do_predtest(DisasContext
*s
, int dofs
, int gofs
, int words
)
247 TCGv_ptr dptr
= tcg_temp_new_ptr();
248 TCGv_ptr gptr
= tcg_temp_new_ptr();
251 tcg_gen_addi_ptr(dptr
, cpu_env
, dofs
);
252 tcg_gen_addi_ptr(gptr
, cpu_env
, gofs
);
253 t
= tcg_const_i32(words
);
255 gen_helper_sve_predtest(t
, dptr
, gptr
, t
);
256 tcg_temp_free_ptr(dptr
);
257 tcg_temp_free_ptr(gptr
);
260 tcg_temp_free_i32(t
);
263 /* For each element size, the bits within a predicate word that are active. */
264 const uint64_t pred_esz_masks
[4] = {
265 0xffffffffffffffffull
, 0x5555555555555555ull
,
266 0x1111111111111111ull
, 0x0101010101010101ull
270 *** SVE Logical - Unpredicated Group
273 static bool trans_AND_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
275 return do_vector3_z(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
278 static bool trans_ORR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
280 if (a
->rn
== a
->rm
) { /* MOV */
281 return do_mov_z(s
, a
->rd
, a
->rn
);
283 return do_vector3_z(s
, tcg_gen_gvec_or
, 0, a
->rd
, a
->rn
, a
->rm
);
287 static bool trans_EOR_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
289 return do_vector3_z(s
, tcg_gen_gvec_xor
, 0, a
->rd
, a
->rn
, a
->rm
);
292 static bool trans_BIC_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
294 return do_vector3_z(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
298 *** SVE Integer Arithmetic - Unpredicated Group
301 static bool trans_ADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
303 return do_vector3_z(s
, tcg_gen_gvec_add
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
306 static bool trans_SUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
308 return do_vector3_z(s
, tcg_gen_gvec_sub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
311 static bool trans_SQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
313 return do_vector3_z(s
, tcg_gen_gvec_ssadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
316 static bool trans_SQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
318 return do_vector3_z(s
, tcg_gen_gvec_sssub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
321 static bool trans_UQADD_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
323 return do_vector3_z(s
, tcg_gen_gvec_usadd
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
326 static bool trans_UQSUB_zzz(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
328 return do_vector3_z(s
, tcg_gen_gvec_ussub
, a
->esz
, a
->rd
, a
->rn
, a
->rm
);
332 *** SVE Integer Arithmetic - Binary Predicated Group
335 static bool do_zpzz_ool(DisasContext
*s
, arg_rprr_esz
*a
, gen_helper_gvec_4
*fn
)
337 unsigned vsz
= vec_full_reg_size(s
);
341 if (sve_access_check(s
)) {
342 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
343 vec_full_reg_offset(s
, a
->rn
),
344 vec_full_reg_offset(s
, a
->rm
),
345 pred_full_reg_offset(s
, a
->pg
),
351 #define DO_ZPZZ(NAME, name) \
352 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
355 static gen_helper_gvec_4 * const fns[4] = { \
356 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
357 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
359 return do_zpzz_ool(s, a, fns[a->esz]); \
378 DO_ZPZZ(SMULH
, smulh
)
379 DO_ZPZZ(UMULH
, umulh
)
385 static bool trans_SDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
387 static gen_helper_gvec_4
* const fns
[4] = {
388 NULL
, NULL
, gen_helper_sve_sdiv_zpzz_s
, gen_helper_sve_sdiv_zpzz_d
390 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
393 static bool trans_UDIV_zpzz(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
395 static gen_helper_gvec_4
* const fns
[4] = {
396 NULL
, NULL
, gen_helper_sve_udiv_zpzz_s
, gen_helper_sve_udiv_zpzz_d
398 return do_zpzz_ool(s
, a
, fns
[a
->esz
]);
406 *** SVE Integer Arithmetic - Unary Predicated Group
409 static bool do_zpz_ool(DisasContext
*s
, arg_rpr_esz
*a
, gen_helper_gvec_3
*fn
)
414 if (sve_access_check(s
)) {
415 unsigned vsz
= vec_full_reg_size(s
);
416 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
417 vec_full_reg_offset(s
, a
->rn
),
418 pred_full_reg_offset(s
, a
->pg
),
424 #define DO_ZPZ(NAME, name) \
425 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
427 static gen_helper_gvec_3 * const fns[4] = { \
428 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
429 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
431 return do_zpz_ool(s, a, fns[a->esz]); \
436 DO_ZPZ(CNT_zpz
, cnt_zpz
)
438 DO_ZPZ(NOT_zpz
, not_zpz
)
442 static bool trans_FABS(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
444 static gen_helper_gvec_3
* const fns
[4] = {
446 gen_helper_sve_fabs_h
,
447 gen_helper_sve_fabs_s
,
448 gen_helper_sve_fabs_d
450 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
453 static bool trans_FNEG(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
455 static gen_helper_gvec_3
* const fns
[4] = {
457 gen_helper_sve_fneg_h
,
458 gen_helper_sve_fneg_s
,
459 gen_helper_sve_fneg_d
461 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
464 static bool trans_SXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
466 static gen_helper_gvec_3
* const fns
[4] = {
468 gen_helper_sve_sxtb_h
,
469 gen_helper_sve_sxtb_s
,
470 gen_helper_sve_sxtb_d
472 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
475 static bool trans_UXTB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
477 static gen_helper_gvec_3
* const fns
[4] = {
479 gen_helper_sve_uxtb_h
,
480 gen_helper_sve_uxtb_s
,
481 gen_helper_sve_uxtb_d
483 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
486 static bool trans_SXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
488 static gen_helper_gvec_3
* const fns
[4] = {
490 gen_helper_sve_sxth_s
,
491 gen_helper_sve_sxth_d
493 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
496 static bool trans_UXTH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
498 static gen_helper_gvec_3
* const fns
[4] = {
500 gen_helper_sve_uxth_s
,
501 gen_helper_sve_uxth_d
503 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
506 static bool trans_SXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
508 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_sxtw_d
: NULL
);
511 static bool trans_UXTW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
513 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_uxtw_d
: NULL
);
519 *** SVE Integer Reduction Group
522 typedef void gen_helper_gvec_reduc(TCGv_i64
, TCGv_ptr
, TCGv_ptr
, TCGv_i32
);
523 static bool do_vpz_ool(DisasContext
*s
, arg_rpr_esz
*a
,
524 gen_helper_gvec_reduc
*fn
)
526 unsigned vsz
= vec_full_reg_size(s
);
534 if (!sve_access_check(s
)) {
538 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
539 temp
= tcg_temp_new_i64();
540 t_zn
= tcg_temp_new_ptr();
541 t_pg
= tcg_temp_new_ptr();
543 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
544 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
545 fn(temp
, t_zn
, t_pg
, desc
);
546 tcg_temp_free_ptr(t_zn
);
547 tcg_temp_free_ptr(t_pg
);
548 tcg_temp_free_i32(desc
);
550 write_fp_dreg(s
, a
->rd
, temp
);
551 tcg_temp_free_i64(temp
);
555 #define DO_VPZ(NAME, name) \
556 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
558 static gen_helper_gvec_reduc * const fns[4] = { \
559 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
560 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
562 return do_vpz_ool(s, a, fns[a->esz]); \
575 static bool trans_SADDV(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
577 static gen_helper_gvec_reduc
* const fns
[4] = {
578 gen_helper_sve_saddv_b
, gen_helper_sve_saddv_h
,
579 gen_helper_sve_saddv_s
, NULL
581 return do_vpz_ool(s
, a
, fns
[a
->esz
]);
587 *** SVE Shift by Immediate - Predicated Group
590 /* Store zero into every active element of Zd. We will use this for two
591 * and three-operand predicated instructions for which logic dictates a
594 static bool do_clr_zp(DisasContext
*s
, int rd
, int pg
, int esz
)
596 static gen_helper_gvec_2
* const fns
[4] = {
597 gen_helper_sve_clr_b
, gen_helper_sve_clr_h
,
598 gen_helper_sve_clr_s
, gen_helper_sve_clr_d
,
600 if (sve_access_check(s
)) {
601 unsigned vsz
= vec_full_reg_size(s
);
602 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, rd
),
603 pred_full_reg_offset(s
, pg
),
604 vsz
, vsz
, 0, fns
[esz
]);
609 /* Copy Zn into Zd, storing zeros into inactive elements. */
610 static void do_movz_zpz(DisasContext
*s
, int rd
, int rn
, int pg
, int esz
)
612 static gen_helper_gvec_3
* const fns
[4] = {
613 gen_helper_sve_movz_b
, gen_helper_sve_movz_h
,
614 gen_helper_sve_movz_s
, gen_helper_sve_movz_d
,
616 unsigned vsz
= vec_full_reg_size(s
);
617 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, rd
),
618 vec_full_reg_offset(s
, rn
),
619 pred_full_reg_offset(s
, pg
),
620 vsz
, vsz
, 0, fns
[esz
]);
623 static bool do_zpzi_ool(DisasContext
*s
, arg_rpri_esz
*a
,
624 gen_helper_gvec_3
*fn
)
626 if (sve_access_check(s
)) {
627 unsigned vsz
= vec_full_reg_size(s
);
628 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
629 vec_full_reg_offset(s
, a
->rn
),
630 pred_full_reg_offset(s
, a
->pg
),
631 vsz
, vsz
, a
->imm
, fn
);
636 static bool trans_ASR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
638 static gen_helper_gvec_3
* const fns
[4] = {
639 gen_helper_sve_asr_zpzi_b
, gen_helper_sve_asr_zpzi_h
,
640 gen_helper_sve_asr_zpzi_s
, gen_helper_sve_asr_zpzi_d
,
643 /* Invalid tsz encoding -- see tszimm_esz. */
646 /* Shift by element size is architecturally valid. For
647 arithmetic right-shift, it's the same as by one less. */
648 a
->imm
= MIN(a
->imm
, (8 << a
->esz
) - 1);
649 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
652 static bool trans_LSR_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
654 static gen_helper_gvec_3
* const fns
[4] = {
655 gen_helper_sve_lsr_zpzi_b
, gen_helper_sve_lsr_zpzi_h
,
656 gen_helper_sve_lsr_zpzi_s
, gen_helper_sve_lsr_zpzi_d
,
661 /* Shift by element size is architecturally valid.
662 For logical shifts, it is a zeroing operation. */
663 if (a
->imm
>= (8 << a
->esz
)) {
664 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
666 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
670 static bool trans_LSL_zpzi(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
672 static gen_helper_gvec_3
* const fns
[4] = {
673 gen_helper_sve_lsl_zpzi_b
, gen_helper_sve_lsl_zpzi_h
,
674 gen_helper_sve_lsl_zpzi_s
, gen_helper_sve_lsl_zpzi_d
,
679 /* Shift by element size is architecturally valid.
680 For logical shifts, it is a zeroing operation. */
681 if (a
->imm
>= (8 << a
->esz
)) {
682 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
684 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
688 static bool trans_ASRD(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
690 static gen_helper_gvec_3
* const fns
[4] = {
691 gen_helper_sve_asrd_b
, gen_helper_sve_asrd_h
,
692 gen_helper_sve_asrd_s
, gen_helper_sve_asrd_d
,
697 /* Shift by element size is architecturally valid. For arithmetic
698 right shift for division, it is a zeroing operation. */
699 if (a
->imm
>= (8 << a
->esz
)) {
700 return do_clr_zp(s
, a
->rd
, a
->pg
, a
->esz
);
702 return do_zpzi_ool(s
, a
, fns
[a
->esz
]);
707 *** SVE Bitwise Shift - Predicated Group
710 #define DO_ZPZW(NAME, name) \
711 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
714 static gen_helper_gvec_4 * const fns[3] = { \
715 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
716 gen_helper_sve_##name##_zpzw_s, \
718 if (a->esz < 0 || a->esz >= 3) { \
721 return do_zpzz_ool(s, a, fns[a->esz]); \
731 *** SVE Bitwise Shift - Unpredicated Group
734 static bool do_shift_imm(DisasContext
*s
, arg_rri_esz
*a
, bool asr
,
735 void (*gvec_fn
)(unsigned, uint32_t, uint32_t,
736 int64_t, uint32_t, uint32_t))
739 /* Invalid tsz encoding -- see tszimm_esz. */
742 if (sve_access_check(s
)) {
743 unsigned vsz
= vec_full_reg_size(s
);
744 /* Shift by element size is architecturally valid. For
745 arithmetic right-shift, it's the same as by one less.
746 Otherwise it is a zeroing operation. */
747 if (a
->imm
>= 8 << a
->esz
) {
749 a
->imm
= (8 << a
->esz
) - 1;
751 do_dupi_z(s
, a
->rd
, 0);
755 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
756 vec_full_reg_offset(s
, a
->rn
), a
->imm
, vsz
, vsz
);
761 static bool trans_ASR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
763 return do_shift_imm(s
, a
, true, tcg_gen_gvec_sari
);
766 static bool trans_LSR_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
768 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shri
);
771 static bool trans_LSL_zzi(DisasContext
*s
, arg_rri_esz
*a
, uint32_t insn
)
773 return do_shift_imm(s
, a
, false, tcg_gen_gvec_shli
);
776 static bool do_zzw_ool(DisasContext
*s
, arg_rrr_esz
*a
, gen_helper_gvec_3
*fn
)
781 if (sve_access_check(s
)) {
782 unsigned vsz
= vec_full_reg_size(s
);
783 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
784 vec_full_reg_offset(s
, a
->rn
),
785 vec_full_reg_offset(s
, a
->rm
),
791 #define DO_ZZW(NAME, name) \
792 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
795 static gen_helper_gvec_3 * const fns[4] = { \
796 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
797 gen_helper_sve_##name##_zzw_s, NULL \
799 return do_zzw_ool(s, a, fns[a->esz]); \
809 *** SVE Integer Multiply-Add Group
812 static bool do_zpzzz_ool(DisasContext
*s
, arg_rprrr_esz
*a
,
813 gen_helper_gvec_5
*fn
)
815 if (sve_access_check(s
)) {
816 unsigned vsz
= vec_full_reg_size(s
);
817 tcg_gen_gvec_5_ool(vec_full_reg_offset(s
, a
->rd
),
818 vec_full_reg_offset(s
, a
->ra
),
819 vec_full_reg_offset(s
, a
->rn
),
820 vec_full_reg_offset(s
, a
->rm
),
821 pred_full_reg_offset(s
, a
->pg
),
827 #define DO_ZPZZZ(NAME, name) \
828 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
830 static gen_helper_gvec_5 * const fns[4] = { \
831 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
832 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
834 return do_zpzzz_ool(s, a, fns[a->esz]); \
843 *** SVE Index Generation Group
846 static void do_index(DisasContext
*s
, int esz
, int rd
,
847 TCGv_i64 start
, TCGv_i64 incr
)
849 unsigned vsz
= vec_full_reg_size(s
);
850 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
851 TCGv_ptr t_zd
= tcg_temp_new_ptr();
853 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
855 gen_helper_sve_index_d(t_zd
, start
, incr
, desc
);
857 typedef void index_fn(TCGv_ptr
, TCGv_i32
, TCGv_i32
, TCGv_i32
);
858 static index_fn
* const fns
[3] = {
859 gen_helper_sve_index_b
,
860 gen_helper_sve_index_h
,
861 gen_helper_sve_index_s
,
863 TCGv_i32 s32
= tcg_temp_new_i32();
864 TCGv_i32 i32
= tcg_temp_new_i32();
866 tcg_gen_extrl_i64_i32(s32
, start
);
867 tcg_gen_extrl_i64_i32(i32
, incr
);
868 fns
[esz
](t_zd
, s32
, i32
, desc
);
870 tcg_temp_free_i32(s32
);
871 tcg_temp_free_i32(i32
);
873 tcg_temp_free_ptr(t_zd
);
874 tcg_temp_free_i32(desc
);
877 static bool trans_INDEX_ii(DisasContext
*s
, arg_INDEX_ii
*a
, uint32_t insn
)
879 if (sve_access_check(s
)) {
880 TCGv_i64 start
= tcg_const_i64(a
->imm1
);
881 TCGv_i64 incr
= tcg_const_i64(a
->imm2
);
882 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
883 tcg_temp_free_i64(start
);
884 tcg_temp_free_i64(incr
);
889 static bool trans_INDEX_ir(DisasContext
*s
, arg_INDEX_ir
*a
, uint32_t insn
)
891 if (sve_access_check(s
)) {
892 TCGv_i64 start
= tcg_const_i64(a
->imm
);
893 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
894 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
895 tcg_temp_free_i64(start
);
900 static bool trans_INDEX_ri(DisasContext
*s
, arg_INDEX_ri
*a
, uint32_t insn
)
902 if (sve_access_check(s
)) {
903 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
904 TCGv_i64 incr
= tcg_const_i64(a
->imm
);
905 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
906 tcg_temp_free_i64(incr
);
911 static bool trans_INDEX_rr(DisasContext
*s
, arg_INDEX_rr
*a
, uint32_t insn
)
913 if (sve_access_check(s
)) {
914 TCGv_i64 start
= cpu_reg(s
, a
->rn
);
915 TCGv_i64 incr
= cpu_reg(s
, a
->rm
);
916 do_index(s
, a
->esz
, a
->rd
, start
, incr
);
922 *** SVE Stack Allocation Group
925 static bool trans_ADDVL(DisasContext
*s
, arg_ADDVL
*a
, uint32_t insn
)
927 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
928 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
929 tcg_gen_addi_i64(rd
, rn
, a
->imm
* vec_full_reg_size(s
));
933 static bool trans_ADDPL(DisasContext
*s
, arg_ADDPL
*a
, uint32_t insn
)
935 TCGv_i64 rd
= cpu_reg_sp(s
, a
->rd
);
936 TCGv_i64 rn
= cpu_reg_sp(s
, a
->rn
);
937 tcg_gen_addi_i64(rd
, rn
, a
->imm
* pred_full_reg_size(s
));
941 static bool trans_RDVL(DisasContext
*s
, arg_RDVL
*a
, uint32_t insn
)
943 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
944 tcg_gen_movi_i64(reg
, a
->imm
* vec_full_reg_size(s
));
949 *** SVE Compute Vector Address Group
952 static bool do_adr(DisasContext
*s
, arg_rrri
*a
, gen_helper_gvec_3
*fn
)
954 if (sve_access_check(s
)) {
955 unsigned vsz
= vec_full_reg_size(s
);
956 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
957 vec_full_reg_offset(s
, a
->rn
),
958 vec_full_reg_offset(s
, a
->rm
),
959 vsz
, vsz
, a
->imm
, fn
);
964 static bool trans_ADR_p32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
966 return do_adr(s
, a
, gen_helper_sve_adr_p32
);
969 static bool trans_ADR_p64(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
971 return do_adr(s
, a
, gen_helper_sve_adr_p64
);
974 static bool trans_ADR_s32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
976 return do_adr(s
, a
, gen_helper_sve_adr_s32
);
979 static bool trans_ADR_u32(DisasContext
*s
, arg_rrri
*a
, uint32_t insn
)
981 return do_adr(s
, a
, gen_helper_sve_adr_u32
);
985 *** SVE Integer Misc - Unpredicated Group
988 static bool trans_FEXPA(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
990 static gen_helper_gvec_2
* const fns
[4] = {
992 gen_helper_sve_fexpa_h
,
993 gen_helper_sve_fexpa_s
,
994 gen_helper_sve_fexpa_d
,
999 if (sve_access_check(s
)) {
1000 unsigned vsz
= vec_full_reg_size(s
);
1001 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
1002 vec_full_reg_offset(s
, a
->rn
),
1003 vsz
, vsz
, 0, fns
[a
->esz
]);
1008 static bool trans_FTSSEL(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
1010 static gen_helper_gvec_3
* const fns
[4] = {
1012 gen_helper_sve_ftssel_h
,
1013 gen_helper_sve_ftssel_s
,
1014 gen_helper_sve_ftssel_d
,
1019 if (sve_access_check(s
)) {
1020 unsigned vsz
= vec_full_reg_size(s
);
1021 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
1022 vec_full_reg_offset(s
, a
->rn
),
1023 vec_full_reg_offset(s
, a
->rm
),
1024 vsz
, vsz
, 0, fns
[a
->esz
]);
1030 *** SVE Predicate Logical Operations Group
1033 static bool do_pppp_flags(DisasContext
*s
, arg_rprr_s
*a
,
1034 const GVecGen4
*gvec_op
)
1036 if (!sve_access_check(s
)) {
1040 unsigned psz
= pred_gvec_reg_size(s
);
1041 int dofs
= pred_full_reg_offset(s
, a
->rd
);
1042 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1043 int mofs
= pred_full_reg_offset(s
, a
->rm
);
1044 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1047 /* Do the operation and the flags generation in temps. */
1048 TCGv_i64 pd
= tcg_temp_new_i64();
1049 TCGv_i64 pn
= tcg_temp_new_i64();
1050 TCGv_i64 pm
= tcg_temp_new_i64();
1051 TCGv_i64 pg
= tcg_temp_new_i64();
1053 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1054 tcg_gen_ld_i64(pm
, cpu_env
, mofs
);
1055 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1057 gvec_op
->fni8(pd
, pn
, pm
, pg
);
1058 tcg_gen_st_i64(pd
, cpu_env
, dofs
);
1060 do_predtest1(pd
, pg
);
1062 tcg_temp_free_i64(pd
);
1063 tcg_temp_free_i64(pn
);
1064 tcg_temp_free_i64(pm
);
1065 tcg_temp_free_i64(pg
);
1067 /* The operation and flags generation is large. The computation
1068 * of the flags depends on the original contents of the guarding
1069 * predicate. If the destination overwrites the guarding predicate,
1070 * then the easiest way to get this right is to save a copy.
1073 if (a
->rd
== a
->pg
) {
1074 tofs
= offsetof(CPUARMState
, vfp
.preg_tmp
);
1075 tcg_gen_gvec_mov(0, tofs
, gofs
, psz
, psz
);
1078 tcg_gen_gvec_4(dofs
, nofs
, mofs
, gofs
, psz
, psz
, gvec_op
);
1079 do_predtest(s
, dofs
, tofs
, psz
/ 8);
1084 static void gen_and_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1086 tcg_gen_and_i64(pd
, pn
, pm
);
1087 tcg_gen_and_i64(pd
, pd
, pg
);
1090 static void gen_and_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1091 TCGv_vec pm
, TCGv_vec pg
)
1093 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1094 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1097 static bool trans_AND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1099 static const GVecGen4 op
= {
1100 .fni8
= gen_and_pg_i64
,
1101 .fniv
= gen_and_pg_vec
,
1102 .fno
= gen_helper_sve_and_pppp
,
1103 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1106 return do_pppp_flags(s
, a
, &op
);
1107 } else if (a
->rn
== a
->rm
) {
1108 if (a
->pg
== a
->rn
) {
1109 return do_mov_p(s
, a
->rd
, a
->rn
);
1111 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->pg
);
1113 } else if (a
->pg
== a
->rn
|| a
->pg
== a
->rm
) {
1114 return do_vector3_p(s
, tcg_gen_gvec_and
, 0, a
->rd
, a
->rn
, a
->rm
);
1116 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1120 static void gen_bic_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1122 tcg_gen_andc_i64(pd
, pn
, pm
);
1123 tcg_gen_and_i64(pd
, pd
, pg
);
1126 static void gen_bic_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1127 TCGv_vec pm
, TCGv_vec pg
)
1129 tcg_gen_andc_vec(vece
, pd
, pn
, pm
);
1130 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1133 static bool trans_BIC_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1135 static const GVecGen4 op
= {
1136 .fni8
= gen_bic_pg_i64
,
1137 .fniv
= gen_bic_pg_vec
,
1138 .fno
= gen_helper_sve_bic_pppp
,
1139 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1142 return do_pppp_flags(s
, a
, &op
);
1143 } else if (a
->pg
== a
->rn
) {
1144 return do_vector3_p(s
, tcg_gen_gvec_andc
, 0, a
->rd
, a
->rn
, a
->rm
);
1146 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1150 static void gen_eor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1152 tcg_gen_xor_i64(pd
, pn
, pm
);
1153 tcg_gen_and_i64(pd
, pd
, pg
);
1156 static void gen_eor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1157 TCGv_vec pm
, TCGv_vec pg
)
1159 tcg_gen_xor_vec(vece
, pd
, pn
, pm
);
1160 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1163 static bool trans_EOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1165 static const GVecGen4 op
= {
1166 .fni8
= gen_eor_pg_i64
,
1167 .fniv
= gen_eor_pg_vec
,
1168 .fno
= gen_helper_sve_eor_pppp
,
1169 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1172 return do_pppp_flags(s
, a
, &op
);
1174 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1178 static void gen_sel_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1180 tcg_gen_and_i64(pn
, pn
, pg
);
1181 tcg_gen_andc_i64(pm
, pm
, pg
);
1182 tcg_gen_or_i64(pd
, pn
, pm
);
1185 static void gen_sel_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1186 TCGv_vec pm
, TCGv_vec pg
)
1188 tcg_gen_and_vec(vece
, pn
, pn
, pg
);
1189 tcg_gen_andc_vec(vece
, pm
, pm
, pg
);
1190 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1193 static bool trans_SEL_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1195 static const GVecGen4 op
= {
1196 .fni8
= gen_sel_pg_i64
,
1197 .fniv
= gen_sel_pg_vec
,
1198 .fno
= gen_helper_sve_sel_pppp
,
1199 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1204 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1208 static void gen_orr_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1210 tcg_gen_or_i64(pd
, pn
, pm
);
1211 tcg_gen_and_i64(pd
, pd
, pg
);
1214 static void gen_orr_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1215 TCGv_vec pm
, TCGv_vec pg
)
1217 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1218 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1221 static bool trans_ORR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1223 static const GVecGen4 op
= {
1224 .fni8
= gen_orr_pg_i64
,
1225 .fniv
= gen_orr_pg_vec
,
1226 .fno
= gen_helper_sve_orr_pppp
,
1227 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1230 return do_pppp_flags(s
, a
, &op
);
1231 } else if (a
->pg
== a
->rn
&& a
->rn
== a
->rm
) {
1232 return do_mov_p(s
, a
->rd
, a
->rn
);
1234 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1238 static void gen_orn_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1240 tcg_gen_orc_i64(pd
, pn
, pm
);
1241 tcg_gen_and_i64(pd
, pd
, pg
);
1244 static void gen_orn_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1245 TCGv_vec pm
, TCGv_vec pg
)
1247 tcg_gen_orc_vec(vece
, pd
, pn
, pm
);
1248 tcg_gen_and_vec(vece
, pd
, pd
, pg
);
1251 static bool trans_ORN_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1253 static const GVecGen4 op
= {
1254 .fni8
= gen_orn_pg_i64
,
1255 .fniv
= gen_orn_pg_vec
,
1256 .fno
= gen_helper_sve_orn_pppp
,
1257 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1260 return do_pppp_flags(s
, a
, &op
);
1262 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1266 static void gen_nor_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1268 tcg_gen_or_i64(pd
, pn
, pm
);
1269 tcg_gen_andc_i64(pd
, pg
, pd
);
1272 static void gen_nor_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1273 TCGv_vec pm
, TCGv_vec pg
)
1275 tcg_gen_or_vec(vece
, pd
, pn
, pm
);
1276 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1279 static bool trans_NOR_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1281 static const GVecGen4 op
= {
1282 .fni8
= gen_nor_pg_i64
,
1283 .fniv
= gen_nor_pg_vec
,
1284 .fno
= gen_helper_sve_nor_pppp
,
1285 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1288 return do_pppp_flags(s
, a
, &op
);
1290 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1294 static void gen_nand_pg_i64(TCGv_i64 pd
, TCGv_i64 pn
, TCGv_i64 pm
, TCGv_i64 pg
)
1296 tcg_gen_and_i64(pd
, pn
, pm
);
1297 tcg_gen_andc_i64(pd
, pg
, pd
);
1300 static void gen_nand_pg_vec(unsigned vece
, TCGv_vec pd
, TCGv_vec pn
,
1301 TCGv_vec pm
, TCGv_vec pg
)
1303 tcg_gen_and_vec(vece
, pd
, pn
, pm
);
1304 tcg_gen_andc_vec(vece
, pd
, pg
, pd
);
1307 static bool trans_NAND_pppp(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
1309 static const GVecGen4 op
= {
1310 .fni8
= gen_nand_pg_i64
,
1311 .fniv
= gen_nand_pg_vec
,
1312 .fno
= gen_helper_sve_nand_pppp
,
1313 .prefer_i64
= TCG_TARGET_REG_BITS
== 64,
1316 return do_pppp_flags(s
, a
, &op
);
1318 return do_vecop4_p(s
, &op
, a
->rd
, a
->rn
, a
->rm
, a
->pg
);
1323 *** SVE Predicate Misc Group
1326 static bool trans_PTEST(DisasContext
*s
, arg_PTEST
*a
, uint32_t insn
)
1328 if (sve_access_check(s
)) {
1329 int nofs
= pred_full_reg_offset(s
, a
->rn
);
1330 int gofs
= pred_full_reg_offset(s
, a
->pg
);
1331 int words
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1334 TCGv_i64 pn
= tcg_temp_new_i64();
1335 TCGv_i64 pg
= tcg_temp_new_i64();
1337 tcg_gen_ld_i64(pn
, cpu_env
, nofs
);
1338 tcg_gen_ld_i64(pg
, cpu_env
, gofs
);
1339 do_predtest1(pn
, pg
);
1341 tcg_temp_free_i64(pn
);
1342 tcg_temp_free_i64(pg
);
1344 do_predtest(s
, nofs
, gofs
, words
);
1350 /* See the ARM pseudocode DecodePredCount. */
1351 static unsigned decode_pred_count(unsigned fullsz
, int pattern
, int esz
)
1353 unsigned elements
= fullsz
>> esz
;
1357 case 0x0: /* POW2 */
1358 return pow2floor(elements
);
1369 case 0x9: /* VL16 */
1370 case 0xa: /* VL32 */
1371 case 0xb: /* VL64 */
1372 case 0xc: /* VL128 */
1373 case 0xd: /* VL256 */
1374 bound
= 16 << (pattern
- 9);
1376 case 0x1d: /* MUL4 */
1377 return elements
- elements
% 4;
1378 case 0x1e: /* MUL3 */
1379 return elements
- elements
% 3;
1380 case 0x1f: /* ALL */
1382 default: /* #uimm5 */
1385 return elements
>= bound ? bound
: 0;
1388 /* This handles all of the predicate initialization instructions,
1389 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1390 * so that decode_pred_count returns 0. For SETFFR, we will have
1391 * set RD == 16 == FFR.
1393 static bool do_predset(DisasContext
*s
, int esz
, int rd
, int pat
, bool setflag
)
1395 if (!sve_access_check(s
)) {
1399 unsigned fullsz
= vec_full_reg_size(s
);
1400 unsigned ofs
= pred_full_reg_offset(s
, rd
);
1401 unsigned numelem
, setsz
, i
;
1402 uint64_t word
, lastword
;
1405 numelem
= decode_pred_count(fullsz
, pat
, esz
);
1407 /* Determine what we must store into each bit, and how many. */
1409 lastword
= word
= 0;
1412 setsz
= numelem
<< esz
;
1413 lastword
= word
= pred_esz_masks
[esz
];
1415 lastword
&= ~(-1ull << (setsz
% 64));
1419 t
= tcg_temp_new_i64();
1421 tcg_gen_movi_i64(t
, lastword
);
1422 tcg_gen_st_i64(t
, cpu_env
, ofs
);
1426 if (word
== lastword
) {
1427 unsigned maxsz
= size_for_gvec(fullsz
/ 8);
1428 unsigned oprsz
= size_for_gvec(setsz
/ 8);
1430 if (oprsz
* 8 == setsz
) {
1431 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1434 if (oprsz
* 8 == setsz
+ 8) {
1435 tcg_gen_gvec_dup64i(ofs
, oprsz
, maxsz
, word
);
1436 tcg_gen_movi_i64(t
, 0);
1437 tcg_gen_st_i64(t
, cpu_env
, ofs
+ oprsz
- 8);
1445 tcg_gen_movi_i64(t
, word
);
1446 for (i
= 0; i
< setsz
; i
+= 8) {
1447 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1449 if (lastword
!= word
) {
1450 tcg_gen_movi_i64(t
, lastword
);
1451 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1455 tcg_gen_movi_i64(t
, 0);
1456 for (; i
< fullsz
; i
+= 8) {
1457 tcg_gen_st_i64(t
, cpu_env
, ofs
+ i
);
1462 tcg_temp_free_i64(t
);
1466 tcg_gen_movi_i32(cpu_NF
, -(word
!= 0));
1467 tcg_gen_movi_i32(cpu_CF
, word
== 0);
1468 tcg_gen_movi_i32(cpu_VF
, 0);
1469 tcg_gen_mov_i32(cpu_ZF
, cpu_NF
);
1474 static bool trans_PTRUE(DisasContext
*s
, arg_PTRUE
*a
, uint32_t insn
)
1476 return do_predset(s
, a
->esz
, a
->rd
, a
->pat
, a
->s
);
1479 static bool trans_SETFFR(DisasContext
*s
, arg_SETFFR
*a
, uint32_t insn
)
1481 /* Note pat == 31 is #all, to set all elements. */
1482 return do_predset(s
, 0, FFR_PRED_NUM
, 31, false);
1485 static bool trans_PFALSE(DisasContext
*s
, arg_PFALSE
*a
, uint32_t insn
)
1487 /* Note pat == 32 is #unimp, to set no elements. */
1488 return do_predset(s
, 0, a
->rd
, 32, false);
1491 static bool trans_RDFFR_p(DisasContext
*s
, arg_RDFFR_p
*a
, uint32_t insn
)
1493 /* The path through do_pppp_flags is complicated enough to want to avoid
1494 * duplication. Frob the arguments into the form of a predicated AND.
1496 arg_rprr_s alt_a
= {
1497 .rd
= a
->rd
, .pg
= a
->pg
, .s
= a
->s
,
1498 .rn
= FFR_PRED_NUM
, .rm
= FFR_PRED_NUM
,
1500 return trans_AND_pppp(s
, &alt_a
, insn
);
1503 static bool trans_RDFFR(DisasContext
*s
, arg_RDFFR
*a
, uint32_t insn
)
1505 return do_mov_p(s
, a
->rd
, FFR_PRED_NUM
);
1508 static bool trans_WRFFR(DisasContext
*s
, arg_WRFFR
*a
, uint32_t insn
)
1510 return do_mov_p(s
, FFR_PRED_NUM
, a
->rn
);
1513 static bool do_pfirst_pnext(DisasContext
*s
, arg_rr_esz
*a
,
1514 void (*gen_fn
)(TCGv_i32
, TCGv_ptr
,
1515 TCGv_ptr
, TCGv_i32
))
1517 if (!sve_access_check(s
)) {
1521 TCGv_ptr t_pd
= tcg_temp_new_ptr();
1522 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1526 desc
= DIV_ROUND_UP(pred_full_reg_size(s
), 8);
1527 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
1529 tcg_gen_addi_ptr(t_pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
1530 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
1531 t
= tcg_const_i32(desc
);
1533 gen_fn(t
, t_pd
, t_pg
, t
);
1534 tcg_temp_free_ptr(t_pd
);
1535 tcg_temp_free_ptr(t_pg
);
1538 tcg_temp_free_i32(t
);
1542 static bool trans_PFIRST(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1544 return do_pfirst_pnext(s
, a
, gen_helper_sve_pfirst
);
1547 static bool trans_PNEXT(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
1549 return do_pfirst_pnext(s
, a
, gen_helper_sve_pnext
);
1553 *** SVE Element Count Group
1556 /* Perform an inline saturating addition of a 32-bit value within
1557 * a 64-bit register. The second operand is known to be positive,
1558 * which halves the comparisions we must perform to bound the result.
1560 static void do_sat_addsub_32(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1566 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1568 tcg_gen_ext32u_i64(reg
, reg
);
1570 tcg_gen_ext32s_i64(reg
, reg
);
1573 tcg_gen_sub_i64(reg
, reg
, val
);
1574 ibound
= (u ?
0 : INT32_MIN
);
1577 tcg_gen_add_i64(reg
, reg
, val
);
1578 ibound
= (u ? UINT32_MAX
: INT32_MAX
);
1581 bound
= tcg_const_i64(ibound
);
1582 tcg_gen_movcond_i64(cond
, reg
, reg
, bound
, bound
, reg
);
1583 tcg_temp_free_i64(bound
);
1586 /* Similarly with 64-bit values. */
1587 static void do_sat_addsub_64(TCGv_i64 reg
, TCGv_i64 val
, bool u
, bool d
)
1589 TCGv_i64 t0
= tcg_temp_new_i64();
1590 TCGv_i64 t1
= tcg_temp_new_i64();
1595 tcg_gen_sub_i64(t0
, reg
, val
);
1596 tcg_gen_movi_i64(t1
, 0);
1597 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, reg
, val
, t1
, t0
);
1599 tcg_gen_add_i64(t0
, reg
, val
);
1600 tcg_gen_movi_i64(t1
, -1);
1601 tcg_gen_movcond_i64(TCG_COND_LTU
, reg
, t0
, reg
, t1
, t0
);
1605 /* Detect signed overflow for subtraction. */
1606 tcg_gen_xor_i64(t0
, reg
, val
);
1607 tcg_gen_sub_i64(t1
, reg
, val
);
1608 tcg_gen_xor_i64(reg
, reg
, t0
);
1609 tcg_gen_and_i64(t0
, t0
, reg
);
1611 /* Bound the result. */
1612 tcg_gen_movi_i64(reg
, INT64_MIN
);
1613 t2
= tcg_const_i64(0);
1614 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, reg
, t1
);
1616 /* Detect signed overflow for addition. */
1617 tcg_gen_xor_i64(t0
, reg
, val
);
1618 tcg_gen_add_i64(reg
, reg
, val
);
1619 tcg_gen_xor_i64(t1
, reg
, val
);
1620 tcg_gen_andc_i64(t0
, t1
, t0
);
1622 /* Bound the result. */
1623 tcg_gen_movi_i64(t1
, INT64_MAX
);
1624 t2
= tcg_const_i64(0);
1625 tcg_gen_movcond_i64(TCG_COND_LT
, reg
, t0
, t2
, t1
, reg
);
1627 tcg_temp_free_i64(t2
);
1629 tcg_temp_free_i64(t0
);
1630 tcg_temp_free_i64(t1
);
1633 /* Similarly with a vector and a scalar operand. */
1634 static void do_sat_addsub_vec(DisasContext
*s
, int esz
, int rd
, int rn
,
1635 TCGv_i64 val
, bool u
, bool d
)
1637 unsigned vsz
= vec_full_reg_size(s
);
1638 TCGv_ptr dptr
, nptr
;
1642 dptr
= tcg_temp_new_ptr();
1643 nptr
= tcg_temp_new_ptr();
1644 tcg_gen_addi_ptr(dptr
, cpu_env
, vec_full_reg_offset(s
, rd
));
1645 tcg_gen_addi_ptr(nptr
, cpu_env
, vec_full_reg_offset(s
, rn
));
1646 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1650 t32
= tcg_temp_new_i32();
1651 tcg_gen_extrl_i64_i32(t32
, val
);
1653 tcg_gen_neg_i32(t32
, t32
);
1656 gen_helper_sve_uqaddi_b(dptr
, nptr
, t32
, desc
);
1658 gen_helper_sve_sqaddi_b(dptr
, nptr
, t32
, desc
);
1660 tcg_temp_free_i32(t32
);
1664 t32
= tcg_temp_new_i32();
1665 tcg_gen_extrl_i64_i32(t32
, val
);
1667 tcg_gen_neg_i32(t32
, t32
);
1670 gen_helper_sve_uqaddi_h(dptr
, nptr
, t32
, desc
);
1672 gen_helper_sve_sqaddi_h(dptr
, nptr
, t32
, desc
);
1674 tcg_temp_free_i32(t32
);
1678 t64
= tcg_temp_new_i64();
1680 tcg_gen_neg_i64(t64
, val
);
1682 tcg_gen_mov_i64(t64
, val
);
1685 gen_helper_sve_uqaddi_s(dptr
, nptr
, t64
, desc
);
1687 gen_helper_sve_sqaddi_s(dptr
, nptr
, t64
, desc
);
1689 tcg_temp_free_i64(t64
);
1695 gen_helper_sve_uqsubi_d(dptr
, nptr
, val
, desc
);
1697 gen_helper_sve_uqaddi_d(dptr
, nptr
, val
, desc
);
1700 t64
= tcg_temp_new_i64();
1701 tcg_gen_neg_i64(t64
, val
);
1702 gen_helper_sve_sqaddi_d(dptr
, nptr
, t64
, desc
);
1703 tcg_temp_free_i64(t64
);
1705 gen_helper_sve_sqaddi_d(dptr
, nptr
, val
, desc
);
1710 g_assert_not_reached();
1713 tcg_temp_free_ptr(dptr
);
1714 tcg_temp_free_ptr(nptr
);
1715 tcg_temp_free_i32(desc
);
1718 static bool trans_CNT_r(DisasContext
*s
, arg_CNT_r
*a
, uint32_t insn
)
1720 if (sve_access_check(s
)) {
1721 unsigned fullsz
= vec_full_reg_size(s
);
1722 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1723 tcg_gen_movi_i64(cpu_reg(s
, a
->rd
), numelem
* a
->imm
);
1728 static bool trans_INCDEC_r(DisasContext
*s
, arg_incdec_cnt
*a
, uint32_t insn
)
1730 if (sve_access_check(s
)) {
1731 unsigned fullsz
= vec_full_reg_size(s
);
1732 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1733 int inc
= numelem
* a
->imm
* (a
->d ?
-1 : 1);
1734 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1736 tcg_gen_addi_i64(reg
, reg
, inc
);
1741 static bool trans_SINCDEC_r_32(DisasContext
*s
, arg_incdec_cnt
*a
,
1744 if (!sve_access_check(s
)) {
1748 unsigned fullsz
= vec_full_reg_size(s
);
1749 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1750 int inc
= numelem
* a
->imm
;
1751 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1753 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1756 tcg_gen_ext32u_i64(reg
, reg
);
1758 tcg_gen_ext32s_i64(reg
, reg
);
1761 TCGv_i64 t
= tcg_const_i64(inc
);
1762 do_sat_addsub_32(reg
, t
, a
->u
, a
->d
);
1763 tcg_temp_free_i64(t
);
1768 static bool trans_SINCDEC_r_64(DisasContext
*s
, arg_incdec_cnt
*a
,
1771 if (!sve_access_check(s
)) {
1775 unsigned fullsz
= vec_full_reg_size(s
);
1776 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1777 int inc
= numelem
* a
->imm
;
1778 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
1781 TCGv_i64 t
= tcg_const_i64(inc
);
1782 do_sat_addsub_64(reg
, t
, a
->u
, a
->d
);
1783 tcg_temp_free_i64(t
);
1788 static bool trans_INCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
, uint32_t insn
)
1794 unsigned fullsz
= vec_full_reg_size(s
);
1795 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1796 int inc
= numelem
* a
->imm
;
1799 if (sve_access_check(s
)) {
1800 TCGv_i64 t
= tcg_const_i64(a
->d ?
-inc
: inc
);
1801 tcg_gen_gvec_adds(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
1802 vec_full_reg_offset(s
, a
->rn
),
1804 tcg_temp_free_i64(t
);
1807 do_mov_z(s
, a
->rd
, a
->rn
);
1812 static bool trans_SINCDEC_v(DisasContext
*s
, arg_incdec2_cnt
*a
,
1819 unsigned fullsz
= vec_full_reg_size(s
);
1820 unsigned numelem
= decode_pred_count(fullsz
, a
->pat
, a
->esz
);
1821 int inc
= numelem
* a
->imm
;
1824 if (sve_access_check(s
)) {
1825 TCGv_i64 t
= tcg_const_i64(inc
);
1826 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, t
, a
->u
, a
->d
);
1827 tcg_temp_free_i64(t
);
1830 do_mov_z(s
, a
->rd
, a
->rn
);
1836 *** SVE Bitwise Immediate Group
1839 static bool do_zz_dbm(DisasContext
*s
, arg_rr_dbm
*a
, GVecGen2iFn
*gvec_fn
)
1842 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1843 extract32(a
->dbm
, 0, 6),
1844 extract32(a
->dbm
, 6, 6))) {
1847 if (sve_access_check(s
)) {
1848 unsigned vsz
= vec_full_reg_size(s
);
1849 gvec_fn(MO_64
, vec_full_reg_offset(s
, a
->rd
),
1850 vec_full_reg_offset(s
, a
->rn
), imm
, vsz
, vsz
);
1855 static bool trans_AND_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1857 return do_zz_dbm(s
, a
, tcg_gen_gvec_andi
);
1860 static bool trans_ORR_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1862 return do_zz_dbm(s
, a
, tcg_gen_gvec_ori
);
1865 static bool trans_EOR_zzi(DisasContext
*s
, arg_rr_dbm
*a
, uint32_t insn
)
1867 return do_zz_dbm(s
, a
, tcg_gen_gvec_xori
);
1870 static bool trans_DUPM(DisasContext
*s
, arg_DUPM
*a
, uint32_t insn
)
1873 if (!logic_imm_decode_wmask(&imm
, extract32(a
->dbm
, 12, 1),
1874 extract32(a
->dbm
, 0, 6),
1875 extract32(a
->dbm
, 6, 6))) {
1878 if (sve_access_check(s
)) {
1879 do_dupi_z(s
, a
->rd
, imm
);
1885 *** SVE Integer Wide Immediate - Predicated Group
1888 /* Implement all merging copies. This is used for CPY (immediate),
1889 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1891 static void do_cpy_m(DisasContext
*s
, int esz
, int rd
, int rn
, int pg
,
1894 typedef void gen_cpy(TCGv_ptr
, TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
1895 static gen_cpy
* const fns
[4] = {
1896 gen_helper_sve_cpy_m_b
, gen_helper_sve_cpy_m_h
,
1897 gen_helper_sve_cpy_m_s
, gen_helper_sve_cpy_m_d
,
1899 unsigned vsz
= vec_full_reg_size(s
);
1900 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
1901 TCGv_ptr t_zd
= tcg_temp_new_ptr();
1902 TCGv_ptr t_zn
= tcg_temp_new_ptr();
1903 TCGv_ptr t_pg
= tcg_temp_new_ptr();
1905 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, rd
));
1906 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, rn
));
1907 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
1909 fns
[esz
](t_zd
, t_zn
, t_pg
, val
, desc
);
1911 tcg_temp_free_ptr(t_zd
);
1912 tcg_temp_free_ptr(t_zn
);
1913 tcg_temp_free_ptr(t_pg
);
1914 tcg_temp_free_i32(desc
);
1917 static bool trans_FCPY(DisasContext
*s
, arg_FCPY
*a
, uint32_t insn
)
1922 if (sve_access_check(s
)) {
1923 /* Decode the VFP immediate. */
1924 uint64_t imm
= vfp_expand_imm(a
->esz
, a
->imm
);
1925 TCGv_i64 t_imm
= tcg_const_i64(imm
);
1926 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1927 tcg_temp_free_i64(t_imm
);
1932 static bool trans_CPY_m_i(DisasContext
*s
, arg_rpri_esz
*a
, uint32_t insn
)
1934 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
1937 if (sve_access_check(s
)) {
1938 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1939 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rn
, a
->pg
, t_imm
);
1940 tcg_temp_free_i64(t_imm
);
1945 static bool trans_CPY_z_i(DisasContext
*s
, arg_CPY_z_i
*a
, uint32_t insn
)
1947 static gen_helper_gvec_2i
* const fns
[4] = {
1948 gen_helper_sve_cpy_z_b
, gen_helper_sve_cpy_z_h
,
1949 gen_helper_sve_cpy_z_s
, gen_helper_sve_cpy_z_d
,
1952 if (a
->esz
== 0 && extract32(insn
, 13, 1)) {
1955 if (sve_access_check(s
)) {
1956 unsigned vsz
= vec_full_reg_size(s
);
1957 TCGv_i64 t_imm
= tcg_const_i64(a
->imm
);
1958 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s
, a
->rd
),
1959 pred_full_reg_offset(s
, a
->pg
),
1960 t_imm
, vsz
, vsz
, 0, fns
[a
->esz
]);
1961 tcg_temp_free_i64(t_imm
);
1967 *** SVE Permute Extract Group
1970 static bool trans_EXT(DisasContext
*s
, arg_EXT
*a
, uint32_t insn
)
1972 if (!sve_access_check(s
)) {
1976 unsigned vsz
= vec_full_reg_size(s
);
1977 unsigned n_ofs
= a
->imm
>= vsz ?
0 : a
->imm
;
1978 unsigned n_siz
= vsz
- n_ofs
;
1979 unsigned d
= vec_full_reg_offset(s
, a
->rd
);
1980 unsigned n
= vec_full_reg_offset(s
, a
->rn
);
1981 unsigned m
= vec_full_reg_offset(s
, a
->rm
);
1983 /* Use host vector move insns if we have appropriate sizes
1984 * and no unfortunate overlap.
1987 && n_ofs
== size_for_gvec(n_ofs
)
1988 && n_siz
== size_for_gvec(n_siz
)
1989 && (d
!= n
|| n_siz
<= n_ofs
)) {
1990 tcg_gen_gvec_mov(0, d
, n
+ n_ofs
, n_siz
, n_siz
);
1992 tcg_gen_gvec_mov(0, d
+ n_siz
, m
, n_ofs
, n_ofs
);
1995 tcg_gen_gvec_3_ool(d
, n
, m
, vsz
, vsz
, n_ofs
, gen_helper_sve_ext
);
2001 *** SVE Permute - Unpredicated Group
2004 static bool trans_DUP_s(DisasContext
*s
, arg_DUP_s
*a
, uint32_t insn
)
2006 if (sve_access_check(s
)) {
2007 unsigned vsz
= vec_full_reg_size(s
);
2008 tcg_gen_gvec_dup_i64(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
2009 vsz
, vsz
, cpu_reg_sp(s
, a
->rn
));
2014 static bool trans_DUP_x(DisasContext
*s
, arg_DUP_x
*a
, uint32_t insn
)
2016 if ((a
->imm
& 0x1f) == 0) {
2019 if (sve_access_check(s
)) {
2020 unsigned vsz
= vec_full_reg_size(s
);
2021 unsigned dofs
= vec_full_reg_offset(s
, a
->rd
);
2022 unsigned esz
, index
;
2024 esz
= ctz32(a
->imm
);
2025 index
= a
->imm
>> (esz
+ 1);
2027 if ((index
<< esz
) < vsz
) {
2028 unsigned nofs
= vec_reg_offset(s
, a
->rn
, index
, esz
);
2029 tcg_gen_gvec_dup_mem(esz
, dofs
, nofs
, vsz
, vsz
);
2031 tcg_gen_gvec_dup64i(dofs
, vsz
, vsz
, 0);
2037 static void do_insr_i64(DisasContext
*s
, arg_rrr_esz
*a
, TCGv_i64 val
)
2039 typedef void gen_insr(TCGv_ptr
, TCGv_ptr
, TCGv_i64
, TCGv_i32
);
2040 static gen_insr
* const fns
[4] = {
2041 gen_helper_sve_insr_b
, gen_helper_sve_insr_h
,
2042 gen_helper_sve_insr_s
, gen_helper_sve_insr_d
,
2044 unsigned vsz
= vec_full_reg_size(s
);
2045 TCGv_i32 desc
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2046 TCGv_ptr t_zd
= tcg_temp_new_ptr();
2047 TCGv_ptr t_zn
= tcg_temp_new_ptr();
2049 tcg_gen_addi_ptr(t_zd
, cpu_env
, vec_full_reg_offset(s
, a
->rd
));
2050 tcg_gen_addi_ptr(t_zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2052 fns
[a
->esz
](t_zd
, t_zn
, val
, desc
);
2054 tcg_temp_free_ptr(t_zd
);
2055 tcg_temp_free_ptr(t_zn
);
2056 tcg_temp_free_i32(desc
);
2059 static bool trans_INSR_f(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2061 if (sve_access_check(s
)) {
2062 TCGv_i64 t
= tcg_temp_new_i64();
2063 tcg_gen_ld_i64(t
, cpu_env
, vec_reg_offset(s
, a
->rm
, 0, MO_64
));
2064 do_insr_i64(s
, a
, t
);
2065 tcg_temp_free_i64(t
);
2070 static bool trans_INSR_r(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2072 if (sve_access_check(s
)) {
2073 do_insr_i64(s
, a
, cpu_reg(s
, a
->rm
));
2078 static bool trans_REV_v(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
2080 static gen_helper_gvec_2
* const fns
[4] = {
2081 gen_helper_sve_rev_b
, gen_helper_sve_rev_h
,
2082 gen_helper_sve_rev_s
, gen_helper_sve_rev_d
2085 if (sve_access_check(s
)) {
2086 unsigned vsz
= vec_full_reg_size(s
);
2087 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2088 vec_full_reg_offset(s
, a
->rn
),
2089 vsz
, vsz
, 0, fns
[a
->esz
]);
2094 static bool trans_TBL(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2096 static gen_helper_gvec_3
* const fns
[4] = {
2097 gen_helper_sve_tbl_b
, gen_helper_sve_tbl_h
,
2098 gen_helper_sve_tbl_s
, gen_helper_sve_tbl_d
2101 if (sve_access_check(s
)) {
2102 unsigned vsz
= vec_full_reg_size(s
);
2103 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2104 vec_full_reg_offset(s
, a
->rn
),
2105 vec_full_reg_offset(s
, a
->rm
),
2106 vsz
, vsz
, 0, fns
[a
->esz
]);
2111 static bool trans_UNPK(DisasContext
*s
, arg_UNPK
*a
, uint32_t insn
)
2113 static gen_helper_gvec_2
* const fns
[4][2] = {
2115 { gen_helper_sve_sunpk_h
, gen_helper_sve_uunpk_h
},
2116 { gen_helper_sve_sunpk_s
, gen_helper_sve_uunpk_s
},
2117 { gen_helper_sve_sunpk_d
, gen_helper_sve_uunpk_d
},
2123 if (sve_access_check(s
)) {
2124 unsigned vsz
= vec_full_reg_size(s
);
2125 tcg_gen_gvec_2_ool(vec_full_reg_offset(s
, a
->rd
),
2126 vec_full_reg_offset(s
, a
->rn
)
2127 + (a
->h ? vsz
/ 2 : 0),
2128 vsz
, vsz
, 0, fns
[a
->esz
][a
->u
]);
2134 *** SVE Permute - Predicates Group
2137 static bool do_perm_pred3(DisasContext
*s
, arg_rrr_esz
*a
, bool high_odd
,
2138 gen_helper_gvec_3
*fn
)
2140 if (!sve_access_check(s
)) {
2144 unsigned vsz
= pred_full_reg_size(s
);
2146 /* Predicate sizes may be smaller and cannot use simd_desc.
2147 We cannot round up, as we do elsewhere, because we need
2148 the exact size for ZIP2 and REV. We retain the style for
2149 the other helpers for consistency. */
2150 TCGv_ptr t_d
= tcg_temp_new_ptr();
2151 TCGv_ptr t_n
= tcg_temp_new_ptr();
2152 TCGv_ptr t_m
= tcg_temp_new_ptr();
2157 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2158 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2160 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2161 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2162 tcg_gen_addi_ptr(t_m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2163 t_desc
= tcg_const_i32(desc
);
2165 fn(t_d
, t_n
, t_m
, t_desc
);
2167 tcg_temp_free_ptr(t_d
);
2168 tcg_temp_free_ptr(t_n
);
2169 tcg_temp_free_ptr(t_m
);
2170 tcg_temp_free_i32(t_desc
);
2174 static bool do_perm_pred2(DisasContext
*s
, arg_rr_esz
*a
, bool high_odd
,
2175 gen_helper_gvec_2
*fn
)
2177 if (!sve_access_check(s
)) {
2181 unsigned vsz
= pred_full_reg_size(s
);
2182 TCGv_ptr t_d
= tcg_temp_new_ptr();
2183 TCGv_ptr t_n
= tcg_temp_new_ptr();
2187 tcg_gen_addi_ptr(t_d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2188 tcg_gen_addi_ptr(t_n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2190 /* Predicate sizes may be smaller and cannot use simd_desc.
2191 We cannot round up, as we do elsewhere, because we need
2192 the exact size for ZIP2 and REV. We retain the style for
2193 the other helpers for consistency. */
2196 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, a
->esz
);
2197 desc
= deposit32(desc
, SIMD_DATA_SHIFT
+ 2, 2, high_odd
);
2198 t_desc
= tcg_const_i32(desc
);
2200 fn(t_d
, t_n
, t_desc
);
2202 tcg_temp_free_i32(t_desc
);
2203 tcg_temp_free_ptr(t_d
);
2204 tcg_temp_free_ptr(t_n
);
2208 static bool trans_ZIP1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2210 return do_perm_pred3(s
, a
, 0, gen_helper_sve_zip_p
);
2213 static bool trans_ZIP2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2215 return do_perm_pred3(s
, a
, 1, gen_helper_sve_zip_p
);
2218 static bool trans_UZP1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2220 return do_perm_pred3(s
, a
, 0, gen_helper_sve_uzp_p
);
2223 static bool trans_UZP2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2225 return do_perm_pred3(s
, a
, 1, gen_helper_sve_uzp_p
);
2228 static bool trans_TRN1_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2230 return do_perm_pred3(s
, a
, 0, gen_helper_sve_trn_p
);
2233 static bool trans_TRN2_p(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2235 return do_perm_pred3(s
, a
, 1, gen_helper_sve_trn_p
);
2238 static bool trans_REV_p(DisasContext
*s
, arg_rr_esz
*a
, uint32_t insn
)
2240 return do_perm_pred2(s
, a
, 0, gen_helper_sve_rev_p
);
2243 static bool trans_PUNPKLO(DisasContext
*s
, arg_PUNPKLO
*a
, uint32_t insn
)
2245 return do_perm_pred2(s
, a
, 0, gen_helper_sve_punpk_p
);
2248 static bool trans_PUNPKHI(DisasContext
*s
, arg_PUNPKHI
*a
, uint32_t insn
)
2250 return do_perm_pred2(s
, a
, 1, gen_helper_sve_punpk_p
);
2254 *** SVE Permute - Interleaving Group
2257 static bool do_zip(DisasContext
*s
, arg_rrr_esz
*a
, bool high
)
2259 static gen_helper_gvec_3
* const fns
[4] = {
2260 gen_helper_sve_zip_b
, gen_helper_sve_zip_h
,
2261 gen_helper_sve_zip_s
, gen_helper_sve_zip_d
,
2264 if (sve_access_check(s
)) {
2265 unsigned vsz
= vec_full_reg_size(s
);
2266 unsigned high_ofs
= high ? vsz
/ 2 : 0;
2267 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2268 vec_full_reg_offset(s
, a
->rn
) + high_ofs
,
2269 vec_full_reg_offset(s
, a
->rm
) + high_ofs
,
2270 vsz
, vsz
, 0, fns
[a
->esz
]);
2275 static bool do_zzz_data_ool(DisasContext
*s
, arg_rrr_esz
*a
, int data
,
2276 gen_helper_gvec_3
*fn
)
2278 if (sve_access_check(s
)) {
2279 unsigned vsz
= vec_full_reg_size(s
);
2280 tcg_gen_gvec_3_ool(vec_full_reg_offset(s
, a
->rd
),
2281 vec_full_reg_offset(s
, a
->rn
),
2282 vec_full_reg_offset(s
, a
->rm
),
2283 vsz
, vsz
, data
, fn
);
2288 static bool trans_ZIP1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2290 return do_zip(s
, a
, false);
2293 static bool trans_ZIP2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2295 return do_zip(s
, a
, true);
2298 static gen_helper_gvec_3
* const uzp_fns
[4] = {
2299 gen_helper_sve_uzp_b
, gen_helper_sve_uzp_h
,
2300 gen_helper_sve_uzp_s
, gen_helper_sve_uzp_d
,
2303 static bool trans_UZP1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2305 return do_zzz_data_ool(s
, a
, 0, uzp_fns
[a
->esz
]);
2308 static bool trans_UZP2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2310 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, uzp_fns
[a
->esz
]);
2313 static gen_helper_gvec_3
* const trn_fns
[4] = {
2314 gen_helper_sve_trn_b
, gen_helper_sve_trn_h
,
2315 gen_helper_sve_trn_s
, gen_helper_sve_trn_d
,
2318 static bool trans_TRN1_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2320 return do_zzz_data_ool(s
, a
, 0, trn_fns
[a
->esz
]);
2323 static bool trans_TRN2_z(DisasContext
*s
, arg_rrr_esz
*a
, uint32_t insn
)
2325 return do_zzz_data_ool(s
, a
, 1 << a
->esz
, trn_fns
[a
->esz
]);
2329 *** SVE Permute Vector - Predicated Group
2332 static bool trans_COMPACT(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2334 static gen_helper_gvec_3
* const fns
[4] = {
2335 NULL
, NULL
, gen_helper_sve_compact_s
, gen_helper_sve_compact_d
2337 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2340 /* Call the helper that computes the ARM LastActiveElement pseudocode
2341 * function, scaled by the element size. This includes the not found
2342 * indication; e.g. not found for esz=3 is -8.
2344 static void find_last_active(DisasContext
*s
, TCGv_i32 ret
, int esz
, int pg
)
2346 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2347 * round up, as we do elsewhere, because we need the exact size.
2349 TCGv_ptr t_p
= tcg_temp_new_ptr();
2351 unsigned vsz
= pred_full_reg_size(s
);
2355 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
2357 tcg_gen_addi_ptr(t_p
, cpu_env
, pred_full_reg_offset(s
, pg
));
2358 t_desc
= tcg_const_i32(desc
);
2360 gen_helper_sve_last_active_element(ret
, t_p
, t_desc
);
2362 tcg_temp_free_i32(t_desc
);
2363 tcg_temp_free_ptr(t_p
);
2366 /* Increment LAST to the offset of the next element in the vector,
2367 * wrapping around to 0.
2369 static void incr_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2371 unsigned vsz
= vec_full_reg_size(s
);
2373 tcg_gen_addi_i32(last
, last
, 1 << esz
);
2374 if (is_power_of_2(vsz
)) {
2375 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2377 TCGv_i32 max
= tcg_const_i32(vsz
);
2378 TCGv_i32 zero
= tcg_const_i32(0);
2379 tcg_gen_movcond_i32(TCG_COND_GEU
, last
, last
, max
, zero
, last
);
2380 tcg_temp_free_i32(max
);
2381 tcg_temp_free_i32(zero
);
2385 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2386 static void wrap_last_active(DisasContext
*s
, TCGv_i32 last
, int esz
)
2388 unsigned vsz
= vec_full_reg_size(s
);
2390 if (is_power_of_2(vsz
)) {
2391 tcg_gen_andi_i32(last
, last
, vsz
- 1);
2393 TCGv_i32 max
= tcg_const_i32(vsz
- (1 << esz
));
2394 TCGv_i32 zero
= tcg_const_i32(0);
2395 tcg_gen_movcond_i32(TCG_COND_LT
, last
, last
, zero
, max
, last
);
2396 tcg_temp_free_i32(max
);
2397 tcg_temp_free_i32(zero
);
2401 /* Load an unsigned element of ESZ from BASE+OFS. */
2402 static TCGv_i64
load_esz(TCGv_ptr base
, int ofs
, int esz
)
2404 TCGv_i64 r
= tcg_temp_new_i64();
2408 tcg_gen_ld8u_i64(r
, base
, ofs
);
2411 tcg_gen_ld16u_i64(r
, base
, ofs
);
2414 tcg_gen_ld32u_i64(r
, base
, ofs
);
2417 tcg_gen_ld_i64(r
, base
, ofs
);
2420 g_assert_not_reached();
2425 /* Load an unsigned element of ESZ from RM[LAST]. */
2426 static TCGv_i64
load_last_active(DisasContext
*s
, TCGv_i32 last
,
2429 TCGv_ptr p
= tcg_temp_new_ptr();
2432 /* Convert offset into vector into offset into ENV.
2433 * The final adjustment for the vector register base
2434 * is added via constant offset to the load.
2436 #ifdef HOST_WORDS_BIGENDIAN
2437 /* Adjust for element ordering. See vec_reg_offset. */
2439 tcg_gen_xori_i32(last
, last
, 8 - (1 << esz
));
2442 tcg_gen_ext_i32_ptr(p
, last
);
2443 tcg_gen_add_ptr(p
, p
, cpu_env
);
2445 r
= load_esz(p
, vec_full_reg_offset(s
, rm
), esz
);
2446 tcg_temp_free_ptr(p
);
2451 /* Compute CLAST for a Zreg. */
2452 static bool do_clast_vector(DisasContext
*s
, arg_rprr_esz
*a
, bool before
)
2457 unsigned vsz
, esz
= a
->esz
;
2459 if (!sve_access_check(s
)) {
2463 last
= tcg_temp_local_new_i32();
2464 over
= gen_new_label();
2466 find_last_active(s
, last
, esz
, a
->pg
);
2468 /* There is of course no movcond for a 2048-bit vector,
2469 * so we must branch over the actual store.
2471 tcg_gen_brcondi_i32(TCG_COND_LT
, last
, 0, over
);
2474 incr_last_active(s
, last
, esz
);
2477 ele
= load_last_active(s
, last
, a
->rm
, esz
);
2478 tcg_temp_free_i32(last
);
2480 vsz
= vec_full_reg_size(s
);
2481 tcg_gen_gvec_dup_i64(esz
, vec_full_reg_offset(s
, a
->rd
), vsz
, vsz
, ele
);
2482 tcg_temp_free_i64(ele
);
2484 /* If this insn used MOVPRFX, we may need a second move. */
2485 if (a
->rd
!= a
->rn
) {
2486 TCGLabel
*done
= gen_new_label();
2489 gen_set_label(over
);
2490 do_mov_z(s
, a
->rd
, a
->rn
);
2492 gen_set_label(done
);
2494 gen_set_label(over
);
2499 static bool trans_CLASTA_z(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2501 return do_clast_vector(s
, a
, false);
2504 static bool trans_CLASTB_z(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2506 return do_clast_vector(s
, a
, true);
2509 /* Compute CLAST for a scalar. */
2510 static void do_clast_scalar(DisasContext
*s
, int esz
, int pg
, int rm
,
2511 bool before
, TCGv_i64 reg_val
)
2513 TCGv_i32 last
= tcg_temp_new_i32();
2514 TCGv_i64 ele
, cmp
, zero
;
2516 find_last_active(s
, last
, esz
, pg
);
2518 /* Extend the original value of last prior to incrementing. */
2519 cmp
= tcg_temp_new_i64();
2520 tcg_gen_ext_i32_i64(cmp
, last
);
2523 incr_last_active(s
, last
, esz
);
2526 /* The conceit here is that while last < 0 indicates not found, after
2527 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2528 * from which we can load garbage. We then discard the garbage with
2529 * a conditional move.
2531 ele
= load_last_active(s
, last
, rm
, esz
);
2532 tcg_temp_free_i32(last
);
2534 zero
= tcg_const_i64(0);
2535 tcg_gen_movcond_i64(TCG_COND_GE
, reg_val
, cmp
, zero
, ele
, reg_val
);
2537 tcg_temp_free_i64(zero
);
2538 tcg_temp_free_i64(cmp
);
2539 tcg_temp_free_i64(ele
);
2542 /* Compute CLAST for a Vreg. */
2543 static bool do_clast_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2545 if (sve_access_check(s
)) {
2547 int ofs
= vec_reg_offset(s
, a
->rd
, 0, esz
);
2548 TCGv_i64 reg
= load_esz(cpu_env
, ofs
, esz
);
2550 do_clast_scalar(s
, esz
, a
->pg
, a
->rn
, before
, reg
);
2551 write_fp_dreg(s
, a
->rd
, reg
);
2552 tcg_temp_free_i64(reg
);
2557 static bool trans_CLASTA_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2559 return do_clast_fp(s
, a
, false);
2562 static bool trans_CLASTB_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2564 return do_clast_fp(s
, a
, true);
2567 /* Compute CLAST for a Xreg. */
2568 static bool do_clast_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2572 if (!sve_access_check(s
)) {
2576 reg
= cpu_reg(s
, a
->rd
);
2579 tcg_gen_ext8u_i64(reg
, reg
);
2582 tcg_gen_ext16u_i64(reg
, reg
);
2585 tcg_gen_ext32u_i64(reg
, reg
);
2590 g_assert_not_reached();
2593 do_clast_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
, reg
);
2597 static bool trans_CLASTA_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2599 return do_clast_general(s
, a
, false);
2602 static bool trans_CLASTB_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2604 return do_clast_general(s
, a
, true);
2607 /* Compute LAST for a scalar. */
2608 static TCGv_i64
do_last_scalar(DisasContext
*s
, int esz
,
2609 int pg
, int rm
, bool before
)
2611 TCGv_i32 last
= tcg_temp_new_i32();
2614 find_last_active(s
, last
, esz
, pg
);
2616 wrap_last_active(s
, last
, esz
);
2618 incr_last_active(s
, last
, esz
);
2621 ret
= load_last_active(s
, last
, rm
, esz
);
2622 tcg_temp_free_i32(last
);
2626 /* Compute LAST for a Vreg. */
2627 static bool do_last_fp(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2629 if (sve_access_check(s
)) {
2630 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2631 write_fp_dreg(s
, a
->rd
, val
);
2632 tcg_temp_free_i64(val
);
2637 static bool trans_LASTA_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2639 return do_last_fp(s
, a
, false);
2642 static bool trans_LASTB_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2644 return do_last_fp(s
, a
, true);
2647 /* Compute LAST for a Xreg. */
2648 static bool do_last_general(DisasContext
*s
, arg_rpr_esz
*a
, bool before
)
2650 if (sve_access_check(s
)) {
2651 TCGv_i64 val
= do_last_scalar(s
, a
->esz
, a
->pg
, a
->rn
, before
);
2652 tcg_gen_mov_i64(cpu_reg(s
, a
->rd
), val
);
2653 tcg_temp_free_i64(val
);
2658 static bool trans_LASTA_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2660 return do_last_general(s
, a
, false);
2663 static bool trans_LASTB_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2665 return do_last_general(s
, a
, true);
2668 static bool trans_CPY_m_r(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2670 if (sve_access_check(s
)) {
2671 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, cpu_reg_sp(s
, a
->rn
));
2676 static bool trans_CPY_m_v(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2678 if (sve_access_check(s
)) {
2679 int ofs
= vec_reg_offset(s
, a
->rn
, 0, a
->esz
);
2680 TCGv_i64 t
= load_esz(cpu_env
, ofs
, a
->esz
);
2681 do_cpy_m(s
, a
->esz
, a
->rd
, a
->rd
, a
->pg
, t
);
2682 tcg_temp_free_i64(t
);
2687 static bool trans_REVB(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2689 static gen_helper_gvec_3
* const fns
[4] = {
2691 gen_helper_sve_revb_h
,
2692 gen_helper_sve_revb_s
,
2693 gen_helper_sve_revb_d
,
2695 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2698 static bool trans_REVH(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2700 static gen_helper_gvec_3
* const fns
[4] = {
2703 gen_helper_sve_revh_s
,
2704 gen_helper_sve_revh_d
,
2706 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2709 static bool trans_REVW(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2711 return do_zpz_ool(s
, a
, a
->esz
== 3 ? gen_helper_sve_revw_d
: NULL
);
2714 static bool trans_RBIT(DisasContext
*s
, arg_rpr_esz
*a
, uint32_t insn
)
2716 static gen_helper_gvec_3
* const fns
[4] = {
2717 gen_helper_sve_rbit_b
,
2718 gen_helper_sve_rbit_h
,
2719 gen_helper_sve_rbit_s
,
2720 gen_helper_sve_rbit_d
,
2722 return do_zpz_ool(s
, a
, fns
[a
->esz
]);
2725 static bool trans_SPLICE(DisasContext
*s
, arg_rprr_esz
*a
, uint32_t insn
)
2727 if (sve_access_check(s
)) {
2728 unsigned vsz
= vec_full_reg_size(s
);
2729 tcg_gen_gvec_4_ool(vec_full_reg_offset(s
, a
->rd
),
2730 vec_full_reg_offset(s
, a
->rn
),
2731 vec_full_reg_offset(s
, a
->rm
),
2732 pred_full_reg_offset(s
, a
->pg
),
2733 vsz
, vsz
, a
->esz
, gen_helper_sve_splice
);
2739 *** SVE Integer Compare - Vectors Group
2742 static bool do_ppzz_flags(DisasContext
*s
, arg_rprr_esz
*a
,
2743 gen_helper_gvec_flags_4
*gen_fn
)
2745 TCGv_ptr pd
, zn
, zm
, pg
;
2749 if (gen_fn
== NULL
) {
2752 if (!sve_access_check(s
)) {
2756 vsz
= vec_full_reg_size(s
);
2757 t
= tcg_const_i32(simd_desc(vsz
, vsz
, 0));
2758 pd
= tcg_temp_new_ptr();
2759 zn
= tcg_temp_new_ptr();
2760 zm
= tcg_temp_new_ptr();
2761 pg
= tcg_temp_new_ptr();
2763 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2764 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2765 tcg_gen_addi_ptr(zm
, cpu_env
, vec_full_reg_offset(s
, a
->rm
));
2766 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2768 gen_fn(t
, pd
, zn
, zm
, pg
, t
);
2770 tcg_temp_free_ptr(pd
);
2771 tcg_temp_free_ptr(zn
);
2772 tcg_temp_free_ptr(zm
);
2773 tcg_temp_free_ptr(pg
);
2777 tcg_temp_free_i32(t
);
2781 #define DO_PPZZ(NAME, name) \
2782 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2785 static gen_helper_gvec_flags_4 * const fns[4] = { \
2786 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2787 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2789 return do_ppzz_flags(s, a, fns[a->esz]); \
2792 DO_PPZZ(CMPEQ
, cmpeq
)
2793 DO_PPZZ(CMPNE
, cmpne
)
2794 DO_PPZZ(CMPGT
, cmpgt
)
2795 DO_PPZZ(CMPGE
, cmpge
)
2796 DO_PPZZ(CMPHI
, cmphi
)
2797 DO_PPZZ(CMPHS
, cmphs
)
2801 #define DO_PPZW(NAME, name) \
2802 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2805 static gen_helper_gvec_flags_4 * const fns[4] = { \
2806 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2807 gen_helper_sve_##name##_ppzw_s, NULL \
2809 return do_ppzz_flags(s, a, fns[a->esz]); \
2812 DO_PPZW(CMPEQ
, cmpeq
)
2813 DO_PPZW(CMPNE
, cmpne
)
2814 DO_PPZW(CMPGT
, cmpgt
)
2815 DO_PPZW(CMPGE
, cmpge
)
2816 DO_PPZW(CMPHI
, cmphi
)
2817 DO_PPZW(CMPHS
, cmphs
)
2818 DO_PPZW(CMPLT
, cmplt
)
2819 DO_PPZW(CMPLE
, cmple
)
2820 DO_PPZW(CMPLO
, cmplo
)
2821 DO_PPZW(CMPLS
, cmpls
)
2826 *** SVE Integer Compare - Immediate Groups
2829 static bool do_ppzi_flags(DisasContext
*s
, arg_rpri_esz
*a
,
2830 gen_helper_gvec_flags_3
*gen_fn
)
2832 TCGv_ptr pd
, zn
, pg
;
2836 if (gen_fn
== NULL
) {
2839 if (!sve_access_check(s
)) {
2843 vsz
= vec_full_reg_size(s
);
2844 t
= tcg_const_i32(simd_desc(vsz
, vsz
, a
->imm
));
2845 pd
= tcg_temp_new_ptr();
2846 zn
= tcg_temp_new_ptr();
2847 pg
= tcg_temp_new_ptr();
2849 tcg_gen_addi_ptr(pd
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2850 tcg_gen_addi_ptr(zn
, cpu_env
, vec_full_reg_offset(s
, a
->rn
));
2851 tcg_gen_addi_ptr(pg
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2853 gen_fn(t
, pd
, zn
, pg
, t
);
2855 tcg_temp_free_ptr(pd
);
2856 tcg_temp_free_ptr(zn
);
2857 tcg_temp_free_ptr(pg
);
2861 tcg_temp_free_i32(t
);
2865 #define DO_PPZI(NAME, name) \
2866 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2869 static gen_helper_gvec_flags_3 * const fns[4] = { \
2870 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2871 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2873 return do_ppzi_flags(s, a, fns[a->esz]); \
2876 DO_PPZI(CMPEQ
, cmpeq
)
2877 DO_PPZI(CMPNE
, cmpne
)
2878 DO_PPZI(CMPGT
, cmpgt
)
2879 DO_PPZI(CMPGE
, cmpge
)
2880 DO_PPZI(CMPHI
, cmphi
)
2881 DO_PPZI(CMPHS
, cmphs
)
2882 DO_PPZI(CMPLT
, cmplt
)
2883 DO_PPZI(CMPLE
, cmple
)
2884 DO_PPZI(CMPLO
, cmplo
)
2885 DO_PPZI(CMPLS
, cmpls
)
2890 *** SVE Partition Break Group
2893 static bool do_brk3(DisasContext
*s
, arg_rprr_s
*a
,
2894 gen_helper_gvec_4
*fn
, gen_helper_gvec_flags_4
*fn_s
)
2896 if (!sve_access_check(s
)) {
2900 unsigned vsz
= pred_full_reg_size(s
);
2902 /* Predicate sizes may be smaller and cannot use simd_desc. */
2903 TCGv_ptr d
= tcg_temp_new_ptr();
2904 TCGv_ptr n
= tcg_temp_new_ptr();
2905 TCGv_ptr m
= tcg_temp_new_ptr();
2906 TCGv_ptr g
= tcg_temp_new_ptr();
2907 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2909 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2910 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2911 tcg_gen_addi_ptr(m
, cpu_env
, pred_full_reg_offset(s
, a
->rm
));
2912 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2915 fn_s(t
, d
, n
, m
, g
, t
);
2920 tcg_temp_free_ptr(d
);
2921 tcg_temp_free_ptr(n
);
2922 tcg_temp_free_ptr(m
);
2923 tcg_temp_free_ptr(g
);
2924 tcg_temp_free_i32(t
);
2928 static bool do_brk2(DisasContext
*s
, arg_rpr_s
*a
,
2929 gen_helper_gvec_3
*fn
, gen_helper_gvec_flags_3
*fn_s
)
2931 if (!sve_access_check(s
)) {
2935 unsigned vsz
= pred_full_reg_size(s
);
2937 /* Predicate sizes may be smaller and cannot use simd_desc. */
2938 TCGv_ptr d
= tcg_temp_new_ptr();
2939 TCGv_ptr n
= tcg_temp_new_ptr();
2940 TCGv_ptr g
= tcg_temp_new_ptr();
2941 TCGv_i32 t
= tcg_const_i32(vsz
- 2);
2943 tcg_gen_addi_ptr(d
, cpu_env
, pred_full_reg_offset(s
, a
->rd
));
2944 tcg_gen_addi_ptr(n
, cpu_env
, pred_full_reg_offset(s
, a
->rn
));
2945 tcg_gen_addi_ptr(g
, cpu_env
, pred_full_reg_offset(s
, a
->pg
));
2948 fn_s(t
, d
, n
, g
, t
);
2953 tcg_temp_free_ptr(d
);
2954 tcg_temp_free_ptr(n
);
2955 tcg_temp_free_ptr(g
);
2956 tcg_temp_free_i32(t
);
2960 static bool trans_BRKPA(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
2962 return do_brk3(s
, a
, gen_helper_sve_brkpa
, gen_helper_sve_brkpas
);
2965 static bool trans_BRKPB(DisasContext
*s
, arg_rprr_s
*a
, uint32_t insn
)
2967 return do_brk3(s
, a
, gen_helper_sve_brkpb
, gen_helper_sve_brkpbs
);
2970 static bool trans_BRKA_m(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2972 return do_brk2(s
, a
, gen_helper_sve_brka_m
, gen_helper_sve_brkas_m
);
2975 static bool trans_BRKB_m(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2977 return do_brk2(s
, a
, gen_helper_sve_brkb_m
, gen_helper_sve_brkbs_m
);
2980 static bool trans_BRKA_z(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2982 return do_brk2(s
, a
, gen_helper_sve_brka_z
, gen_helper_sve_brkas_z
);
2985 static bool trans_BRKB_z(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2987 return do_brk2(s
, a
, gen_helper_sve_brkb_z
, gen_helper_sve_brkbs_z
);
2990 static bool trans_BRKN(DisasContext
*s
, arg_rpr_s
*a
, uint32_t insn
)
2992 return do_brk2(s
, a
, gen_helper_sve_brkn
, gen_helper_sve_brkns
);
2996 *** SVE Predicate Count Group
2999 static void do_cntp(DisasContext
*s
, TCGv_i64 val
, int esz
, int pn
, int pg
)
3001 unsigned psz
= pred_full_reg_size(s
);
3006 tcg_gen_ld_i64(val
, cpu_env
, pred_full_reg_offset(s
, pn
));
3008 TCGv_i64 g
= tcg_temp_new_i64();
3009 tcg_gen_ld_i64(g
, cpu_env
, pred_full_reg_offset(s
, pg
));
3010 tcg_gen_and_i64(val
, val
, g
);
3011 tcg_temp_free_i64(g
);
3014 /* Reduce the pred_esz_masks value simply to reduce the
3015 * size of the code generated here.
3017 psz_mask
= MAKE_64BIT_MASK(0, psz
* 8);
3018 tcg_gen_andi_i64(val
, val
, pred_esz_masks
[esz
] & psz_mask
);
3020 tcg_gen_ctpop_i64(val
, val
);
3022 TCGv_ptr t_pn
= tcg_temp_new_ptr();
3023 TCGv_ptr t_pg
= tcg_temp_new_ptr();
3028 desc
= deposit32(desc
, SIMD_DATA_SHIFT
, 2, esz
);
3030 tcg_gen_addi_ptr(t_pn
, cpu_env
, pred_full_reg_offset(s
, pn
));
3031 tcg_gen_addi_ptr(t_pg
, cpu_env
, pred_full_reg_offset(s
, pg
));
3032 t_desc
= tcg_const_i32(desc
);
3034 gen_helper_sve_cntp(val
, t_pn
, t_pg
, t_desc
);
3035 tcg_temp_free_ptr(t_pn
);
3036 tcg_temp_free_ptr(t_pg
);
3037 tcg_temp_free_i32(t_desc
);
3041 static bool trans_CNTP(DisasContext
*s
, arg_CNTP
*a
, uint32_t insn
)
3043 if (sve_access_check(s
)) {
3044 do_cntp(s
, cpu_reg(s
, a
->rd
), a
->esz
, a
->rn
, a
->pg
);
3049 static bool trans_INCDECP_r(DisasContext
*s
, arg_incdec_pred
*a
,
3052 if (sve_access_check(s
)) {
3053 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3054 TCGv_i64 val
= tcg_temp_new_i64();
3056 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3058 tcg_gen_sub_i64(reg
, reg
, val
);
3060 tcg_gen_add_i64(reg
, reg
, val
);
3062 tcg_temp_free_i64(val
);
3067 static bool trans_INCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
,
3073 if (sve_access_check(s
)) {
3074 unsigned vsz
= vec_full_reg_size(s
);
3075 TCGv_i64 val
= tcg_temp_new_i64();
3076 GVecGen2sFn
*gvec_fn
= a
->d ? tcg_gen_gvec_subs
: tcg_gen_gvec_adds
;
3078 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3079 gvec_fn(a
->esz
, vec_full_reg_offset(s
, a
->rd
),
3080 vec_full_reg_offset(s
, a
->rn
), val
, vsz
, vsz
);
3085 static bool trans_SINCDECP_r_32(DisasContext
*s
, arg_incdec_pred
*a
,
3088 if (sve_access_check(s
)) {
3089 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3090 TCGv_i64 val
= tcg_temp_new_i64();
3092 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3093 do_sat_addsub_32(reg
, val
, a
->u
, a
->d
);
3098 static bool trans_SINCDECP_r_64(DisasContext
*s
, arg_incdec_pred
*a
,
3101 if (sve_access_check(s
)) {
3102 TCGv_i64 reg
= cpu_reg(s
, a
->rd
);
3103 TCGv_i64 val
= tcg_temp_new_i64();
3105 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3106 do_sat_addsub_64(reg
, val
, a
->u
, a
->d
);
3111 static bool trans_SINCDECP_z(DisasContext
*s
, arg_incdec2_pred
*a
,
3117 if (sve_access_check(s
)) {
3118 TCGv_i64 val
= tcg_temp_new_i64();
3119 do_cntp(s
, val
, a
->esz
, a
->pg
, a
->pg
);
3120 do_sat_addsub_vec(s
, a
->esz
, a
->rd
, a
->rn
, val
, a
->u
, a
->d
);
3126 *** SVE Integer Compare Scalars Group
3129 static bool trans_CTERM(DisasContext
*s
, arg_CTERM
*a
, uint32_t insn
)
3131 if (!sve_access_check(s
)) {
3135 TCGCond cond
= (a
->ne ? TCG_COND_NE
: TCG_COND_EQ
);
3136 TCGv_i64 rn
= read_cpu_reg(s
, a
->rn
, a
->sf
);
3137 TCGv_i64 rm
= read_cpu_reg(s
, a
->rm
, a
->sf
);
3138 TCGv_i64 cmp
= tcg_temp_new_i64();
3140 tcg_gen_setcond_i64(cond
, cmp
, rn
, rm
);
3141 tcg_gen_extrl_i64_i32(cpu_NF
, cmp
);
3142 tcg_temp_free_i64(cmp
);
3144 /* VF = !NF & !CF. */
3145 tcg_gen_xori_i32(cpu_VF
, cpu_NF
, 1);
3146 tcg_gen_andc_i32(cpu_VF
, cpu_VF
, cpu_CF
);
3148 /* Both NF and VF actually look at bit 31. */
3149 tcg_gen_neg_i32(cpu_NF
, cpu_NF
);
3150 tcg_gen_neg_i32(cpu_VF
, cpu_VF
);
3154 static bool trans_WHILE(DisasContext
*s
, arg_WHILE
*a
, uint32_t insn
)
3156 if (!sve_access_check(s
)) {
3160 TCGv_i64 op0
= read_cpu_reg(s
, a
->rn
, 1);
3161 TCGv_i64 op1
= read_cpu_reg(s
, a
->rm
, 1);
3162 TCGv_i64 t0
= tcg_temp_new_i64();
3163 TCGv_i64 t1
= tcg_temp_new_i64();
3166 unsigned desc
, vsz
= vec_full_reg_size(s
);
3171 tcg_gen_ext32u_i64(op0
, op0
);
3172 tcg_gen_ext32u_i64(op1
, op1
);
3174 tcg_gen_ext32s_i64(op0
, op0
);
3175 tcg_gen_ext32s_i64(op1
, op1
);
3179 /* For the helper, compress the different conditions into a computation
3180 * of how many iterations for which the condition is true.
3182 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3183 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3184 * aren't that large, so any value >= predicate size is sufficient.