migration: increase max-bandwidth to 128 MiB/s (1 Gib/s)
[qemu.git] / target / s390x / vec_helper.c
1 /*
2 * QEMU TCG support -- s390x vector support instructions
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12 #include "qemu/osdep.h"
13 #include "cpu.h"
14 #include "internal.h"
15 #include "vec.h"
16 #include "tcg/tcg.h"
17 #include "tcg/tcg-gvec-desc.h"
18 #include "exec/helper-proto.h"
19 #include "exec/cpu_ldst.h"
20 #include "exec/exec-all.h"
21
22 void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes)
23 {
24 if (likely(bytes >= 16)) {
25 uint64_t t0, t1;
26
27 t0 = cpu_ldq_data_ra(env, addr, GETPC());
28 addr = wrap_address(env, addr + 8);
29 t1 = cpu_ldq_data_ra(env, addr, GETPC());
30 s390_vec_write_element64(v1, 0, t0);
31 s390_vec_write_element64(v1, 1, t1);
32 } else {
33 S390Vector tmp = {};
34 int i;
35
36 for (i = 0; i < bytes; i++) {
37 uint8_t byte = cpu_ldub_data_ra(env, addr, GETPC());
38
39 s390_vec_write_element8(&tmp, i, byte);
40 addr = wrap_address(env, addr + 1);
41 }
42 *(S390Vector *)v1 = tmp;
43 }
44 }
45
46 #define DEF_VPK_HFN(BITS, TBITS) \
47 typedef uint##TBITS##_t (*vpk##BITS##_fn)(uint##BITS##_t, int *); \
48 static int vpk##BITS##_hfn(S390Vector *v1, const S390Vector *v2, \
49 const S390Vector *v3, vpk##BITS##_fn fn) \
50 { \
51 int i, saturated = 0; \
52 S390Vector tmp; \
53 \
54 for (i = 0; i < (128 / TBITS); i++) { \
55 uint##BITS##_t src; \
56 \
57 if (i < (128 / BITS)) { \
58 src = s390_vec_read_element##BITS(v2, i); \
59 } else { \
60 src = s390_vec_read_element##BITS(v3, i - (128 / BITS)); \
61 } \
62 s390_vec_write_element##TBITS(&tmp, i, fn(src, &saturated)); \
63 } \
64 *v1 = tmp; \
65 return saturated; \
66 }
67 DEF_VPK_HFN(64, 32)
68 DEF_VPK_HFN(32, 16)
69 DEF_VPK_HFN(16, 8)
70
71 #define DEF_VPK(BITS, TBITS) \
72 static uint##TBITS##_t vpk##BITS##e(uint##BITS##_t src, int *saturated) \
73 { \
74 return src; \
75 } \
76 void HELPER(gvec_vpk##BITS)(void *v1, const void *v2, const void *v3, \
77 uint32_t desc) \
78 { \
79 vpk##BITS##_hfn(v1, v2, v3, vpk##BITS##e); \
80 }
81 DEF_VPK(64, 32)
82 DEF_VPK(32, 16)
83 DEF_VPK(16, 8)
84
85 #define DEF_VPKS(BITS, TBITS) \
86 static uint##TBITS##_t vpks##BITS##e(uint##BITS##_t src, int *saturated) \
87 { \
88 if ((int##BITS##_t)src > INT##TBITS##_MAX) { \
89 (*saturated)++; \
90 return INT##TBITS##_MAX; \
91 } else if ((int##BITS##_t)src < INT##TBITS##_MIN) { \
92 (*saturated)++; \
93 return INT##TBITS##_MIN; \
94 } \
95 return src; \
96 } \
97 void HELPER(gvec_vpks##BITS)(void *v1, const void *v2, const void *v3, \
98 uint32_t desc) \
99 { \
100 vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e); \
101 } \
102 void HELPER(gvec_vpks_cc##BITS)(void *v1, const void *v2, const void *v3, \
103 CPUS390XState *env, uint32_t desc) \
104 { \
105 int saturated = vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e); \
106 \
107 if (saturated == (128 / TBITS)) { \
108 env->cc_op = 3; \
109 } else if (saturated) { \
110 env->cc_op = 1; \
111 } else { \
112 env->cc_op = 0; \
113 } \
114 }
115 DEF_VPKS(64, 32)
116 DEF_VPKS(32, 16)
117 DEF_VPKS(16, 8)
118
119 #define DEF_VPKLS(BITS, TBITS) \
120 static uint##TBITS##_t vpkls##BITS##e(uint##BITS##_t src, int *saturated) \
121 { \
122 if (src > UINT##TBITS##_MAX) { \
123 (*saturated)++; \
124 return UINT##TBITS##_MAX; \
125 } \
126 return src; \
127 } \
128 void HELPER(gvec_vpkls##BITS)(void *v1, const void *v2, const void *v3, \
129 uint32_t desc) \
130 { \
131 vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e); \
132 } \
133 void HELPER(gvec_vpkls_cc##BITS)(void *v1, const void *v2, const void *v3, \
134 CPUS390XState *env, uint32_t desc) \
135 { \
136 int saturated = vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e); \
137 \
138 if (saturated == (128 / TBITS)) { \
139 env->cc_op = 3; \
140 } else if (saturated) { \
141 env->cc_op = 1; \
142 } else { \
143 env->cc_op = 0; \
144 } \
145 }
146 DEF_VPKLS(64, 32)
147 DEF_VPKLS(32, 16)
148 DEF_VPKLS(16, 8)
149
150 void HELPER(gvec_vperm)(void *v1, const void *v2, const void *v3,
151 const void *v4, uint32_t desc)
152 {
153 S390Vector tmp;
154 int i;
155
156 for (i = 0; i < 16; i++) {
157 const uint8_t selector = s390_vec_read_element8(v4, i) & 0x1f;
158 uint8_t byte;
159
160 if (selector < 16) {
161 byte = s390_vec_read_element8(v2, selector);
162 } else {
163 byte = s390_vec_read_element8(v3, selector - 16);
164 }
165 s390_vec_write_element8(&tmp, i, byte);
166 }
167 *(S390Vector *)v1 = tmp;
168 }
169
170 void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr,
171 uint64_t bytes)
172 {
173 /* Probe write access before actually modifying memory */
174 probe_write_access(env, addr, bytes, GETPC());
175
176 if (likely(bytes >= 16)) {
177 cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 0), GETPC());
178 addr = wrap_address(env, addr + 8);
179 cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 1), GETPC());
180 } else {
181 S390Vector tmp = {};
182 int i;
183
184 for (i = 0; i < bytes; i++) {
185 uint8_t byte = s390_vec_read_element8(v1, i);
186
187 cpu_stb_data_ra(env, addr, byte, GETPC());
188 addr = wrap_address(env, addr + 1);
189 }
190 *(S390Vector *)v1 = tmp;
191 }
192 }