target-sparc: Implement BMASK/BSHUFFLE.
[qemu.git] / target-sparc / vis_helper.c
1 /*
2 * VIS op helpers
3 *
4 * Copyright (c) 2003-2005 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "cpu.h"
21 #include "helper.h"
22
23 /* This function uses non-native bit order */
24 #define GET_FIELD(X, FROM, TO) \
25 ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
26
27 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
28 #define GET_FIELD_SP(X, FROM, TO) \
29 GET_FIELD(X, 63 - (TO), 63 - (FROM))
30
31 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
32 {
33 return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
34 (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
35 (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
36 (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
37 (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
38 (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
39 (((pixel_addr >> 55) & 1) << 4) |
40 (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
41 GET_FIELD_SP(pixel_addr, 11, 12);
42 }
43
44 uint64_t helper_faligndata(CPUState *env, uint64_t src1, uint64_t src2)
45 {
46 uint64_t tmp;
47
48 tmp = src1 << ((env->gsr & 7) * 8);
49 /* on many architectures a shift of 64 does nothing */
50 if ((env->gsr & 7) != 0) {
51 tmp |= src2 >> (64 - (env->gsr & 7) * 8);
52 }
53 return tmp;
54 }
55
56 #ifdef HOST_WORDS_BIGENDIAN
57 #define VIS_B64(n) b[7 - (n)]
58 #define VIS_W64(n) w[3 - (n)]
59 #define VIS_SW64(n) sw[3 - (n)]
60 #define VIS_L64(n) l[1 - (n)]
61 #define VIS_B32(n) b[3 - (n)]
62 #define VIS_W32(n) w[1 - (n)]
63 #else
64 #define VIS_B64(n) b[n]
65 #define VIS_W64(n) w[n]
66 #define VIS_SW64(n) sw[n]
67 #define VIS_L64(n) l[n]
68 #define VIS_B32(n) b[n]
69 #define VIS_W32(n) w[n]
70 #endif
71
72 typedef union {
73 uint8_t b[8];
74 uint16_t w[4];
75 int16_t sw[4];
76 uint32_t l[2];
77 uint64_t ll;
78 float64 d;
79 } VIS64;
80
81 typedef union {
82 uint8_t b[4];
83 uint16_t w[2];
84 uint32_t l;
85 float32 f;
86 } VIS32;
87
88 uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
89 {
90 VIS64 s, d;
91
92 s.ll = src1;
93 d.ll = src2;
94
95 /* Reverse calculation order to handle overlap */
96 d.VIS_B64(7) = s.VIS_B64(3);
97 d.VIS_B64(6) = d.VIS_B64(3);
98 d.VIS_B64(5) = s.VIS_B64(2);
99 d.VIS_B64(4) = d.VIS_B64(2);
100 d.VIS_B64(3) = s.VIS_B64(1);
101 d.VIS_B64(2) = d.VIS_B64(1);
102 d.VIS_B64(1) = s.VIS_B64(0);
103 /* d.VIS_B64(0) = d.VIS_B64(0); */
104
105 return d.ll;
106 }
107
108 uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
109 {
110 VIS64 s, d;
111 uint32_t tmp;
112
113 s.ll = src1;
114 d.ll = src2;
115
116 #define PMUL(r) \
117 tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \
118 if ((tmp & 0xff) > 0x7f) { \
119 tmp += 0x100; \
120 } \
121 d.VIS_W64(r) = tmp >> 8;
122
123 PMUL(0);
124 PMUL(1);
125 PMUL(2);
126 PMUL(3);
127 #undef PMUL
128
129 return d.ll;
130 }
131
132 uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
133 {
134 VIS64 s, d;
135 uint32_t tmp;
136
137 s.ll = src1;
138 d.ll = src2;
139
140 #define PMUL(r) \
141 tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \
142 if ((tmp & 0xff) > 0x7f) { \
143 tmp += 0x100; \
144 } \
145 d.VIS_W64(r) = tmp >> 8;
146
147 PMUL(0);
148 PMUL(1);
149 PMUL(2);
150 PMUL(3);
151 #undef PMUL
152
153 return d.ll;
154 }
155
156 uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
157 {
158 VIS64 s, d;
159 uint32_t tmp;
160
161 s.ll = src1;
162 d.ll = src2;
163
164 #define PMUL(r) \
165 tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \
166 if ((tmp & 0xff) > 0x7f) { \
167 tmp += 0x100; \
168 } \
169 d.VIS_W64(r) = tmp >> 8;
170
171 PMUL(0);
172 PMUL(1);
173 PMUL(2);
174 PMUL(3);
175 #undef PMUL
176
177 return d.ll;
178 }
179
180 uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
181 {
182 VIS64 s, d;
183 uint32_t tmp;
184
185 s.ll = src1;
186 d.ll = src2;
187
188 #define PMUL(r) \
189 tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
190 if ((tmp & 0xff) > 0x7f) { \
191 tmp += 0x100; \
192 } \
193 d.VIS_W64(r) = tmp >> 8;
194
195 PMUL(0);
196 PMUL(1);
197 PMUL(2);
198 PMUL(3);
199 #undef PMUL
200
201 return d.ll;
202 }
203
204 uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
205 {
206 VIS64 s, d;
207 uint32_t tmp;
208
209 s.ll = src1;
210 d.ll = src2;
211
212 #define PMUL(r) \
213 tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
214 if ((tmp & 0xff) > 0x7f) { \
215 tmp += 0x100; \
216 } \
217 d.VIS_W64(r) = tmp >> 8;
218
219 PMUL(0);
220 PMUL(1);
221 PMUL(2);
222 PMUL(3);
223 #undef PMUL
224
225 return d.ll;
226 }
227
228 uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
229 {
230 VIS64 s, d;
231 uint32_t tmp;
232
233 s.ll = src1;
234 d.ll = src2;
235
236 #define PMUL(r) \
237 tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
238 if ((tmp & 0xff) > 0x7f) { \
239 tmp += 0x100; \
240 } \
241 d.VIS_L64(r) = tmp;
242
243 /* Reverse calculation order to handle overlap */
244 PMUL(1);
245 PMUL(0);
246 #undef PMUL
247
248 return d.ll;
249 }
250
251 uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
252 {
253 VIS64 s, d;
254 uint32_t tmp;
255
256 s.ll = src1;
257 d.ll = src2;
258
259 #define PMUL(r) \
260 tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
261 if ((tmp & 0xff) > 0x7f) { \
262 tmp += 0x100; \
263 } \
264 d.VIS_L64(r) = tmp;
265
266 /* Reverse calculation order to handle overlap */
267 PMUL(1);
268 PMUL(0);
269 #undef PMUL
270
271 return d.ll;
272 }
273
274 uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
275 {
276 VIS32 s;
277 VIS64 d;
278
279 s.l = (uint32_t)src1;
280 d.ll = src2;
281 d.VIS_W64(0) = s.VIS_B32(0) << 4;
282 d.VIS_W64(1) = s.VIS_B32(1) << 4;
283 d.VIS_W64(2) = s.VIS_B32(2) << 4;
284 d.VIS_W64(3) = s.VIS_B32(3) << 4;
285
286 return d.ll;
287 }
288
289 #define VIS_HELPER(name, F) \
290 uint64_t name##16(uint64_t src1, uint64_t src2) \
291 { \
292 VIS64 s, d; \
293 \
294 s.ll = src1; \
295 d.ll = src2; \
296 \
297 d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \
298 d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \
299 d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \
300 d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \
301 \
302 return d.ll; \
303 } \
304 \
305 uint32_t name##16s(uint32_t src1, uint32_t src2) \
306 { \
307 VIS32 s, d; \
308 \
309 s.l = src1; \
310 d.l = src2; \
311 \
312 d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \
313 d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \
314 \
315 return d.l; \
316 } \
317 \
318 uint64_t name##32(uint64_t src1, uint64_t src2) \
319 { \
320 VIS64 s, d; \
321 \
322 s.ll = src1; \
323 d.ll = src2; \
324 \
325 d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \
326 d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \
327 \
328 return d.ll; \
329 } \
330 \
331 uint32_t name##32s(uint32_t src1, uint32_t src2) \
332 { \
333 VIS32 s, d; \
334 \
335 s.l = src1; \
336 d.l = src2; \
337 \
338 d.l = F(d.l, s.l); \
339 \
340 return d.l; \
341 }
342
343 #define FADD(a, b) ((a) + (b))
344 #define FSUB(a, b) ((a) - (b))
345 VIS_HELPER(helper_fpadd, FADD)
346 VIS_HELPER(helper_fpsub, FSUB)
347
348 #define VIS_CMPHELPER(name, F) \
349 uint64_t name##16(uint64_t src1, uint64_t src2) \
350 { \
351 VIS64 s, d; \
352 \
353 s.ll = src1; \
354 d.ll = src2; \
355 \
356 d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \
357 d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \
358 d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \
359 d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \
360 d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \
361 \
362 return d.ll; \
363 } \
364 \
365 uint64_t name##32(uint64_t src1, uint64_t src2) \
366 { \
367 VIS64 s, d; \
368 \
369 s.ll = src1; \
370 d.ll = src2; \
371 \
372 d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \
373 d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \
374 d.VIS_L64(1) = 0; \
375 \
376 return d.ll; \
377 }
378
379 #define FCMPGT(a, b) ((a) > (b))
380 #define FCMPEQ(a, b) ((a) == (b))
381 #define FCMPLE(a, b) ((a) <= (b))
382 #define FCMPNE(a, b) ((a) != (b))
383
384 VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
385 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
386 VIS_CMPHELPER(helper_fcmple, FCMPLE)
387 VIS_CMPHELPER(helper_fcmpne, FCMPNE)
388
389 uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
390 {
391 int i;
392 for (i = 0; i < 8; i++) {
393 int s1, s2;
394
395 s1 = (src1 >> (56 - (i * 8))) & 0xff;
396 s2 = (src2 >> (56 - (i * 8))) & 0xff;
397
398 /* Absolute value of difference. */
399 s1 -= s2;
400 if (s1 < 0) {
401 s1 = -s1;
402 }
403
404 sum += s1;
405 }
406
407 return sum;
408 }
409
410 uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
411 {
412 int scale = (gsr >> 3) & 0xf;
413 uint32_t ret = 0;
414 int byte;
415
416 for (byte = 0; byte < 4; byte++) {
417 uint32_t val;
418 int16_t src = rs2 >> (byte * 16);
419 int32_t scaled = src << scale;
420 int32_t from_fixed = scaled >> 7;
421
422 val = (from_fixed < 0 ? 0 :
423 from_fixed > 255 ? 255 : from_fixed);
424
425 ret |= val << (8 * byte);
426 }
427
428 return ret;
429 }
430
431 uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
432 {
433 int scale = (gsr >> 3) & 0x1f;
434 uint64_t ret = 0;
435 int word;
436
437 ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
438 for (word = 0; word < 2; word++) {
439 uint64_t val;
440 int32_t src = rs2 >> (word * 32);
441 int64_t scaled = (int64_t)src << scale;
442 int64_t from_fixed = scaled >> 23;
443
444 val = (from_fixed < 0 ? 0 :
445 (from_fixed > 255) ? 255 : from_fixed);
446
447 ret |= val << (32 * word);
448 }
449
450 return ret;
451 }
452
453 uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
454 {
455 int scale = (gsr >> 3) & 0x1f;
456 uint32_t ret = 0;
457 int word;
458
459 for (word = 0; word < 2; word++) {
460 uint32_t val;
461 int32_t src = rs2 >> (word * 32);
462 int64_t scaled = src << scale;
463 int64_t from_fixed = scaled >> 16;
464
465 val = (from_fixed < -32768 ? -32768 :
466 from_fixed > 32767 ? 32767 : from_fixed);
467
468 ret |= (val & 0xffff) << (word * 16);
469 }
470
471 return ret;
472 }
473
474 uint64 helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
475 {
476 union {
477 uint64_t ll[2];
478 uint8_t b[16];
479 } s;
480 VIS64 r;
481 uint32_t i, mask, host;
482
483 /* Set up S such that we can index across all of the bytes. */
484 #ifdef HOST_WORDS_BIGENDIAN
485 s.ll[0] = src1;
486 s.ll[1] = src2;
487 host = 0;
488 #else
489 s.ll[1] = src1;
490 s.ll[0] = src2;
491 host = 15;
492 #endif
493 mask = gsr >> 32;
494
495 for (i = 0; i < 8; ++i) {
496 unsigned e = (mask >> (28 - i*4)) & 0xf;
497 r.VIS_B64(i) = s.b[e ^ host];
498 }
499
500 return r.ll;
501 }