target/riscv: add vector stride load and store instructions
[qemu.git] / target / riscv / insn_trans / trans_rvv.inc.c
1 /*
2 * RISC-V translation routines for the RVV Standard Extension.
3 *
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "tcg/tcg-op-gvec.h"
19 #include "tcg/tcg-gvec-desc.h"
20 #include "internals.h"
21
22 static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a)
23 {
24 TCGv s1, s2, dst;
25
26 if (!has_ext(ctx, RVV)) {
27 return false;
28 }
29
30 s2 = tcg_temp_new();
31 dst = tcg_temp_new();
32
33 /* Using x0 as the rs1 register specifier, encodes an infinite AVL */
34 if (a->rs1 == 0) {
35 /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
36 s1 = tcg_const_tl(RV_VLEN_MAX);
37 } else {
38 s1 = tcg_temp_new();
39 gen_get_gpr(s1, a->rs1);
40 }
41 gen_get_gpr(s2, a->rs2);
42 gen_helper_vsetvl(dst, cpu_env, s1, s2);
43 gen_set_gpr(a->rd, dst);
44 tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
45 lookup_and_goto_ptr(ctx);
46 ctx->base.is_jmp = DISAS_NORETURN;
47
48 tcg_temp_free(s1);
49 tcg_temp_free(s2);
50 tcg_temp_free(dst);
51 return true;
52 }
53
54 static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a)
55 {
56 TCGv s1, s2, dst;
57
58 if (!has_ext(ctx, RVV)) {
59 return false;
60 }
61
62 s2 = tcg_const_tl(a->zimm);
63 dst = tcg_temp_new();
64
65 /* Using x0 as the rs1 register specifier, encodes an infinite AVL */
66 if (a->rs1 == 0) {
67 /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
68 s1 = tcg_const_tl(RV_VLEN_MAX);
69 } else {
70 s1 = tcg_temp_new();
71 gen_get_gpr(s1, a->rs1);
72 }
73 gen_helper_vsetvl(dst, cpu_env, s1, s2);
74 gen_set_gpr(a->rd, dst);
75 gen_goto_tb(ctx, 0, ctx->pc_succ_insn);
76 ctx->base.is_jmp = DISAS_NORETURN;
77
78 tcg_temp_free(s1);
79 tcg_temp_free(s2);
80 tcg_temp_free(dst);
81 return true;
82 }
83
84 /* vector register offset from env */
85 static uint32_t vreg_ofs(DisasContext *s, int reg)
86 {
87 return offsetof(CPURISCVState, vreg) + reg * s->vlen / 8;
88 }
89
90 /* check functions */
91
92 /*
93 * In cpu_get_tb_cpu_state(), set VILL if RVV was not present.
94 * So RVV is also be checked in this function.
95 */
96 static bool vext_check_isa_ill(DisasContext *s)
97 {
98 return !s->vill;
99 }
100
101 /*
102 * There are two rules check here.
103 *
104 * 1. Vector register numbers are multiples of LMUL. (Section 3.2)
105 *
106 * 2. For all widening instructions, the destination LMUL value must also be
107 * a supported LMUL value. (Section 11.2)
108 */
109 static bool vext_check_reg(DisasContext *s, uint32_t reg, bool widen)
110 {
111 /*
112 * The destination vector register group results are arranged as if both
113 * SEW and LMUL were at twice their current settings. (Section 11.2).
114 */
115 int legal = widen ? 2 << s->lmul : 1 << s->lmul;
116
117 return !((s->lmul == 0x3 && widen) || (reg % legal));
118 }
119
120 /*
121 * There are two rules check here.
122 *
123 * 1. The destination vector register group for a masked vector instruction can
124 * only overlap the source mask register (v0) when LMUL=1. (Section 5.3)
125 *
126 * 2. In widen instructions and some other insturctions, like vslideup.vx,
127 * there is no need to check whether LMUL=1.
128 */
129 static bool vext_check_overlap_mask(DisasContext *s, uint32_t vd, bool vm,
130 bool force)
131 {
132 return (vm != 0 || vd != 0) || (!force && (s->lmul == 0));
133 }
134
135 /* The LMUL setting must be such that LMUL * NFIELDS <= 8. (Section 7.8) */
136 static bool vext_check_nf(DisasContext *s, uint32_t nf)
137 {
138 return (1 << s->lmul) * nf <= 8;
139 }
140
141 /* common translation macro */
142 #define GEN_VEXT_TRANS(NAME, SEQ, ARGTYPE, OP, CHECK) \
143 static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE *a)\
144 { \
145 if (CHECK(s, a)) { \
146 return OP(s, a, SEQ); \
147 } \
148 return false; \
149 }
150
151 /*
152 *** unit stride load and store
153 */
154 typedef void gen_helper_ldst_us(TCGv_ptr, TCGv_ptr, TCGv,
155 TCGv_env, TCGv_i32);
156
157 static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
158 gen_helper_ldst_us *fn, DisasContext *s)
159 {
160 TCGv_ptr dest, mask;
161 TCGv base;
162 TCGv_i32 desc;
163
164 TCGLabel *over = gen_new_label();
165 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
166
167 dest = tcg_temp_new_ptr();
168 mask = tcg_temp_new_ptr();
169 base = tcg_temp_new();
170
171 /*
172 * As simd_desc supports at most 256 bytes, and in this implementation,
173 * the max vector group length is 2048 bytes. So split it into two parts.
174 *
175 * The first part is vlen in bytes, encoded in maxsz of simd_desc.
176 * The second part is lmul, encoded in data of simd_desc.
177 */
178 desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
179
180 gen_get_gpr(base, rs1);
181 tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
182 tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
183
184 fn(dest, mask, base, cpu_env, desc);
185
186 tcg_temp_free_ptr(dest);
187 tcg_temp_free_ptr(mask);
188 tcg_temp_free(base);
189 tcg_temp_free_i32(desc);
190 gen_set_label(over);
191 return true;
192 }
193
194 static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
195 {
196 uint32_t data = 0;
197 gen_helper_ldst_us *fn;
198 static gen_helper_ldst_us * const fns[2][7][4] = {
199 /* masked unit stride load */
200 { { gen_helper_vlb_v_b_mask, gen_helper_vlb_v_h_mask,
201 gen_helper_vlb_v_w_mask, gen_helper_vlb_v_d_mask },
202 { NULL, gen_helper_vlh_v_h_mask,
203 gen_helper_vlh_v_w_mask, gen_helper_vlh_v_d_mask },
204 { NULL, NULL,
205 gen_helper_vlw_v_w_mask, gen_helper_vlw_v_d_mask },
206 { gen_helper_vle_v_b_mask, gen_helper_vle_v_h_mask,
207 gen_helper_vle_v_w_mask, gen_helper_vle_v_d_mask },
208 { gen_helper_vlbu_v_b_mask, gen_helper_vlbu_v_h_mask,
209 gen_helper_vlbu_v_w_mask, gen_helper_vlbu_v_d_mask },
210 { NULL, gen_helper_vlhu_v_h_mask,
211 gen_helper_vlhu_v_w_mask, gen_helper_vlhu_v_d_mask },
212 { NULL, NULL,
213 gen_helper_vlwu_v_w_mask, gen_helper_vlwu_v_d_mask } },
214 /* unmasked unit stride load */
215 { { gen_helper_vlb_v_b, gen_helper_vlb_v_h,
216 gen_helper_vlb_v_w, gen_helper_vlb_v_d },
217 { NULL, gen_helper_vlh_v_h,
218 gen_helper_vlh_v_w, gen_helper_vlh_v_d },
219 { NULL, NULL,
220 gen_helper_vlw_v_w, gen_helper_vlw_v_d },
221 { gen_helper_vle_v_b, gen_helper_vle_v_h,
222 gen_helper_vle_v_w, gen_helper_vle_v_d },
223 { gen_helper_vlbu_v_b, gen_helper_vlbu_v_h,
224 gen_helper_vlbu_v_w, gen_helper_vlbu_v_d },
225 { NULL, gen_helper_vlhu_v_h,
226 gen_helper_vlhu_v_w, gen_helper_vlhu_v_d },
227 { NULL, NULL,
228 gen_helper_vlwu_v_w, gen_helper_vlwu_v_d } }
229 };
230
231 fn = fns[a->vm][seq][s->sew];
232 if (fn == NULL) {
233 return false;
234 }
235
236 data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
237 data = FIELD_DP32(data, VDATA, VM, a->vm);
238 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
239 data = FIELD_DP32(data, VDATA, NF, a->nf);
240 return ldst_us_trans(a->rd, a->rs1, data, fn, s);
241 }
242
243 static bool ld_us_check(DisasContext *s, arg_r2nfvm* a)
244 {
245 return (vext_check_isa_ill(s) &&
246 vext_check_overlap_mask(s, a->rd, a->vm, false) &&
247 vext_check_reg(s, a->rd, false) &&
248 vext_check_nf(s, a->nf));
249 }
250
251 GEN_VEXT_TRANS(vlb_v, 0, r2nfvm, ld_us_op, ld_us_check)
252 GEN_VEXT_TRANS(vlh_v, 1, r2nfvm, ld_us_op, ld_us_check)
253 GEN_VEXT_TRANS(vlw_v, 2, r2nfvm, ld_us_op, ld_us_check)
254 GEN_VEXT_TRANS(vle_v, 3, r2nfvm, ld_us_op, ld_us_check)
255 GEN_VEXT_TRANS(vlbu_v, 4, r2nfvm, ld_us_op, ld_us_check)
256 GEN_VEXT_TRANS(vlhu_v, 5, r2nfvm, ld_us_op, ld_us_check)
257 GEN_VEXT_TRANS(vlwu_v, 6, r2nfvm, ld_us_op, ld_us_check)
258
259 static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t seq)
260 {
261 uint32_t data = 0;
262 gen_helper_ldst_us *fn;
263 static gen_helper_ldst_us * const fns[2][4][4] = {
264 /* masked unit stride load and store */
265 { { gen_helper_vsb_v_b_mask, gen_helper_vsb_v_h_mask,
266 gen_helper_vsb_v_w_mask, gen_helper_vsb_v_d_mask },
267 { NULL, gen_helper_vsh_v_h_mask,
268 gen_helper_vsh_v_w_mask, gen_helper_vsh_v_d_mask },
269 { NULL, NULL,
270 gen_helper_vsw_v_w_mask, gen_helper_vsw_v_d_mask },
271 { gen_helper_vse_v_b_mask, gen_helper_vse_v_h_mask,
272 gen_helper_vse_v_w_mask, gen_helper_vse_v_d_mask } },
273 /* unmasked unit stride store */
274 { { gen_helper_vsb_v_b, gen_helper_vsb_v_h,
275 gen_helper_vsb_v_w, gen_helper_vsb_v_d },
276 { NULL, gen_helper_vsh_v_h,
277 gen_helper_vsh_v_w, gen_helper_vsh_v_d },
278 { NULL, NULL,
279 gen_helper_vsw_v_w, gen_helper_vsw_v_d },
280 { gen_helper_vse_v_b, gen_helper_vse_v_h,
281 gen_helper_vse_v_w, gen_helper_vse_v_d } }
282 };
283
284 fn = fns[a->vm][seq][s->sew];
285 if (fn == NULL) {
286 return false;
287 }
288
289 data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
290 data = FIELD_DP32(data, VDATA, VM, a->vm);
291 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
292 data = FIELD_DP32(data, VDATA, NF, a->nf);
293 return ldst_us_trans(a->rd, a->rs1, data, fn, s);
294 }
295
296 static bool st_us_check(DisasContext *s, arg_r2nfvm* a)
297 {
298 return (vext_check_isa_ill(s) &&
299 vext_check_reg(s, a->rd, false) &&
300 vext_check_nf(s, a->nf));
301 }
302
303 GEN_VEXT_TRANS(vsb_v, 0, r2nfvm, st_us_op, st_us_check)
304 GEN_VEXT_TRANS(vsh_v, 1, r2nfvm, st_us_op, st_us_check)
305 GEN_VEXT_TRANS(vsw_v, 2, r2nfvm, st_us_op, st_us_check)
306 GEN_VEXT_TRANS(vse_v, 3, r2nfvm, st_us_op, st_us_check)
307
308 /*
309 *** stride load and store
310 */
311 typedef void gen_helper_ldst_stride(TCGv_ptr, TCGv_ptr, TCGv,
312 TCGv, TCGv_env, TCGv_i32);
313
314 static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
315 uint32_t data, gen_helper_ldst_stride *fn,
316 DisasContext *s)
317 {
318 TCGv_ptr dest, mask;
319 TCGv base, stride;
320 TCGv_i32 desc;
321
322 TCGLabel *over = gen_new_label();
323 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
324
325 dest = tcg_temp_new_ptr();
326 mask = tcg_temp_new_ptr();
327 base = tcg_temp_new();
328 stride = tcg_temp_new();
329 desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
330
331 gen_get_gpr(base, rs1);
332 gen_get_gpr(stride, rs2);
333 tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
334 tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
335
336 fn(dest, mask, base, stride, cpu_env, desc);
337
338 tcg_temp_free_ptr(dest);
339 tcg_temp_free_ptr(mask);
340 tcg_temp_free(base);
341 tcg_temp_free(stride);
342 tcg_temp_free_i32(desc);
343 gen_set_label(over);
344 return true;
345 }
346
347 static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
348 {
349 uint32_t data = 0;
350 gen_helper_ldst_stride *fn;
351 static gen_helper_ldst_stride * const fns[7][4] = {
352 { gen_helper_vlsb_v_b, gen_helper_vlsb_v_h,
353 gen_helper_vlsb_v_w, gen_helper_vlsb_v_d },
354 { NULL, gen_helper_vlsh_v_h,
355 gen_helper_vlsh_v_w, gen_helper_vlsh_v_d },
356 { NULL, NULL,
357 gen_helper_vlsw_v_w, gen_helper_vlsw_v_d },
358 { gen_helper_vlse_v_b, gen_helper_vlse_v_h,
359 gen_helper_vlse_v_w, gen_helper_vlse_v_d },
360 { gen_helper_vlsbu_v_b, gen_helper_vlsbu_v_h,
361 gen_helper_vlsbu_v_w, gen_helper_vlsbu_v_d },
362 { NULL, gen_helper_vlshu_v_h,
363 gen_helper_vlshu_v_w, gen_helper_vlshu_v_d },
364 { NULL, NULL,
365 gen_helper_vlswu_v_w, gen_helper_vlswu_v_d },
366 };
367
368 fn = fns[seq][s->sew];
369 if (fn == NULL) {
370 return false;
371 }
372
373 data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
374 data = FIELD_DP32(data, VDATA, VM, a->vm);
375 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
376 data = FIELD_DP32(data, VDATA, NF, a->nf);
377 return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
378 }
379
380 static bool ld_stride_check(DisasContext *s, arg_rnfvm* a)
381 {
382 return (vext_check_isa_ill(s) &&
383 vext_check_overlap_mask(s, a->rd, a->vm, false) &&
384 vext_check_reg(s, a->rd, false) &&
385 vext_check_nf(s, a->nf));
386 }
387
388 GEN_VEXT_TRANS(vlsb_v, 0, rnfvm, ld_stride_op, ld_stride_check)
389 GEN_VEXT_TRANS(vlsh_v, 1, rnfvm, ld_stride_op, ld_stride_check)
390 GEN_VEXT_TRANS(vlsw_v, 2, rnfvm, ld_stride_op, ld_stride_check)
391 GEN_VEXT_TRANS(vlse_v, 3, rnfvm, ld_stride_op, ld_stride_check)
392 GEN_VEXT_TRANS(vlsbu_v, 4, rnfvm, ld_stride_op, ld_stride_check)
393 GEN_VEXT_TRANS(vlshu_v, 5, rnfvm, ld_stride_op, ld_stride_check)
394 GEN_VEXT_TRANS(vlswu_v, 6, rnfvm, ld_stride_op, ld_stride_check)
395
396 static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t seq)
397 {
398 uint32_t data = 0;
399 gen_helper_ldst_stride *fn;
400 static gen_helper_ldst_stride * const fns[4][4] = {
401 /* masked stride store */
402 { gen_helper_vssb_v_b, gen_helper_vssb_v_h,
403 gen_helper_vssb_v_w, gen_helper_vssb_v_d },
404 { NULL, gen_helper_vssh_v_h,
405 gen_helper_vssh_v_w, gen_helper_vssh_v_d },
406 { NULL, NULL,
407 gen_helper_vssw_v_w, gen_helper_vssw_v_d },
408 { gen_helper_vsse_v_b, gen_helper_vsse_v_h,
409 gen_helper_vsse_v_w, gen_helper_vsse_v_d }
410 };
411
412 data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
413 data = FIELD_DP32(data, VDATA, VM, a->vm);
414 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
415 data = FIELD_DP32(data, VDATA, NF, a->nf);
416 fn = fns[seq][s->sew];
417 if (fn == NULL) {
418 return false;
419 }
420
421 return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s);
422 }
423
424 static bool st_stride_check(DisasContext *s, arg_rnfvm* a)
425 {
426 return (vext_check_isa_ill(s) &&
427 vext_check_reg(s, a->rd, false) &&
428 vext_check_nf(s, a->nf));
429 }
430
431 GEN_VEXT_TRANS(vssb_v, 0, rnfvm, st_stride_op, st_stride_check)
432 GEN_VEXT_TRANS(vssh_v, 1, rnfvm, st_stride_op, st_stride_check)
433 GEN_VEXT_TRANS(vssw_v, 2, rnfvm, st_stride_op, st_stride_check)
434 GEN_VEXT_TRANS(vsse_v, 3, rnfvm, st_stride_op, st_stride_check)