target-ppc: Add missing 'static' and 'const' attributes
[qemu.git] / target-ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "cpu.h"
20 #include "qemu/host-utils.h"
21 #include "helper.h"
22
23 #include "helper_regs.h"
24 /*****************************************************************************/
25 /* Fixed point operations helpers */
26 #if defined(TARGET_PPC64)
27
28 uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
29 {
30 int64_t th;
31 uint64_t tl;
32
33 muls64(&tl, (uint64_t *)&th, arg1, arg2);
34 /* If th != 0 && th != -1, then we had an overflow */
35 if (likely((uint64_t)(th + 1) <= 1)) {
36 env->ov = 0;
37 } else {
38 env->so = env->ov = 1;
39 }
40 return (int64_t)tl;
41 }
42 #endif
43
44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
45 uint32_t oe)
46 {
47 uint64_t rt = 0;
48 int overflow = 0;
49
50 uint64_t dividend = (uint64_t)ra << 32;
51 uint64_t divisor = (uint32_t)rb;
52
53 if (unlikely(divisor == 0)) {
54 overflow = 1;
55 } else {
56 rt = dividend / divisor;
57 overflow = rt > UINT32_MAX;
58 }
59
60 if (unlikely(overflow)) {
61 rt = 0; /* Undefined */
62 }
63
64 if (oe) {
65 if (unlikely(overflow)) {
66 env->so = env->ov = 1;
67 } else {
68 env->ov = 0;
69 }
70 }
71
72 return (target_ulong)rt;
73 }
74
75 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
76 uint32_t oe)
77 {
78 int64_t rt = 0;
79 int overflow = 0;
80
81 int64_t dividend = (int64_t)ra << 32;
82 int64_t divisor = (int64_t)((int32_t)rb);
83
84 if (unlikely((divisor == 0) ||
85 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
86 overflow = 1;
87 } else {
88 rt = dividend / divisor;
89 overflow = rt != (int32_t)rt;
90 }
91
92 if (unlikely(overflow)) {
93 rt = 0; /* Undefined */
94 }
95
96 if (oe) {
97 if (unlikely(overflow)) {
98 env->so = env->ov = 1;
99 } else {
100 env->ov = 0;
101 }
102 }
103
104 return (target_ulong)rt;
105 }
106
107 #if defined(TARGET_PPC64)
108
109 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
110 {
111 uint64_t rt = 0;
112 int overflow = 0;
113
114 overflow = divu128(&rt, &ra, rb);
115
116 if (unlikely(overflow)) {
117 rt = 0; /* Undefined */
118 }
119
120 if (oe) {
121 if (unlikely(overflow)) {
122 env->so = env->ov = 1;
123 } else {
124 env->ov = 0;
125 }
126 }
127
128 return rt;
129 }
130
131 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
132 {
133 int64_t rt = 0;
134 int64_t ra = (int64_t)rau;
135 int64_t rb = (int64_t)rbu;
136 int overflow = divs128(&rt, &ra, rb);
137
138 if (unlikely(overflow)) {
139 rt = 0; /* Undefined */
140 }
141
142 if (oe) {
143
144 if (unlikely(overflow)) {
145 env->so = env->ov = 1;
146 } else {
147 env->ov = 0;
148 }
149 }
150
151 return rt;
152 }
153
154 #endif
155
156
157 target_ulong helper_cntlzw(target_ulong t)
158 {
159 return clz32(t);
160 }
161
162 #if defined(TARGET_PPC64)
163 target_ulong helper_cntlzd(target_ulong t)
164 {
165 return clz64(t);
166 }
167 #endif
168
169 #if defined(TARGET_PPC64)
170
171 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
172 {
173 int i;
174 uint64_t ra = 0;
175
176 for (i = 0; i < 8; i++) {
177 int index = (rs >> (i*8)) & 0xFF;
178 if (index < 64) {
179 if (rb & (1ull << (63-index))) {
180 ra |= 1 << i;
181 }
182 }
183 }
184 return ra;
185 }
186
187 #endif
188
189 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
190 {
191 target_ulong mask = 0xff;
192 target_ulong ra = 0;
193 int i;
194
195 for (i = 0; i < sizeof(target_ulong); i++) {
196 if ((rs & mask) == (rb & mask)) {
197 ra |= mask;
198 }
199 mask <<= 8;
200 }
201 return ra;
202 }
203
204 /* shift right arithmetic helper */
205 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
206 target_ulong shift)
207 {
208 int32_t ret;
209
210 if (likely(!(shift & 0x20))) {
211 if (likely((uint32_t)shift != 0)) {
212 shift &= 0x1f;
213 ret = (int32_t)value >> shift;
214 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
215 env->ca = 0;
216 } else {
217 env->ca = 1;
218 }
219 } else {
220 ret = (int32_t)value;
221 env->ca = 0;
222 }
223 } else {
224 ret = (int32_t)value >> 31;
225 env->ca = (ret != 0);
226 }
227 return (target_long)ret;
228 }
229
230 #if defined(TARGET_PPC64)
231 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
232 target_ulong shift)
233 {
234 int64_t ret;
235
236 if (likely(!(shift & 0x40))) {
237 if (likely((uint64_t)shift != 0)) {
238 shift &= 0x3f;
239 ret = (int64_t)value >> shift;
240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
241 env->ca = 0;
242 } else {
243 env->ca = 1;
244 }
245 } else {
246 ret = (int64_t)value;
247 env->ca = 0;
248 }
249 } else {
250 ret = (int64_t)value >> 63;
251 env->ca = (ret != 0);
252 }
253 return ret;
254 }
255 #endif
256
257 #if defined(TARGET_PPC64)
258 target_ulong helper_popcntb(target_ulong val)
259 {
260 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
261 0x5555555555555555ULL);
262 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
263 0x3333333333333333ULL);
264 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
265 0x0f0f0f0f0f0f0f0fULL);
266 return val;
267 }
268
269 target_ulong helper_popcntw(target_ulong val)
270 {
271 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
272 0x5555555555555555ULL);
273 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
274 0x3333333333333333ULL);
275 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
276 0x0f0f0f0f0f0f0f0fULL);
277 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
278 0x00ff00ff00ff00ffULL);
279 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
280 0x0000ffff0000ffffULL);
281 return val;
282 }
283
284 target_ulong helper_popcntd(target_ulong val)
285 {
286 return ctpop64(val);
287 }
288 #else
289 target_ulong helper_popcntb(target_ulong val)
290 {
291 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
292 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
293 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
294 return val;
295 }
296
297 target_ulong helper_popcntw(target_ulong val)
298 {
299 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
300 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
301 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
302 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
303 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
304 return val;
305 }
306 #endif
307
308 /*****************************************************************************/
309 /* PowerPC 601 specific instructions (POWER bridge) */
310 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
311 {
312 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
313
314 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
315 (int32_t)arg2 == 0) {
316 env->spr[SPR_MQ] = 0;
317 return INT32_MIN;
318 } else {
319 env->spr[SPR_MQ] = tmp % arg2;
320 return tmp / (int32_t)arg2;
321 }
322 }
323
324 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
325 target_ulong arg2)
326 {
327 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
328
329 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
330 (int32_t)arg2 == 0) {
331 env->so = env->ov = 1;
332 env->spr[SPR_MQ] = 0;
333 return INT32_MIN;
334 } else {
335 env->spr[SPR_MQ] = tmp % arg2;
336 tmp /= (int32_t)arg2;
337 if ((int32_t)tmp != tmp) {
338 env->so = env->ov = 1;
339 } else {
340 env->ov = 0;
341 }
342 return tmp;
343 }
344 }
345
346 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
347 target_ulong arg2)
348 {
349 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
350 (int32_t)arg2 == 0) {
351 env->spr[SPR_MQ] = 0;
352 return INT32_MIN;
353 } else {
354 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
355 return (int32_t)arg1 / (int32_t)arg2;
356 }
357 }
358
359 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
360 target_ulong arg2)
361 {
362 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
363 (int32_t)arg2 == 0) {
364 env->so = env->ov = 1;
365 env->spr[SPR_MQ] = 0;
366 return INT32_MIN;
367 } else {
368 env->ov = 0;
369 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
370 return (int32_t)arg1 / (int32_t)arg2;
371 }
372 }
373
374 /*****************************************************************************/
375 /* 602 specific instructions */
376 /* mfrom is the most crazy instruction ever seen, imho ! */
377 /* Real implementation uses a ROM table. Do the same */
378 /* Extremely decomposed:
379 * -arg / 256
380 * return 256 * log10(10 + 1.0) + 0.5
381 */
382 #if !defined(CONFIG_USER_ONLY)
383 target_ulong helper_602_mfrom(target_ulong arg)
384 {
385 if (likely(arg < 602)) {
386 #include "mfrom_table.c"
387 return mfrom_ROM_table[arg];
388 } else {
389 return 0;
390 }
391 }
392 #endif
393
394 /*****************************************************************************/
395 /* Altivec extension helpers */
396 #if defined(HOST_WORDS_BIGENDIAN)
397 #define HI_IDX 0
398 #define LO_IDX 1
399 #else
400 #define HI_IDX 1
401 #define LO_IDX 0
402 #endif
403
404 #if defined(HOST_WORDS_BIGENDIAN)
405 #define VECTOR_FOR_INORDER_I(index, element) \
406 for (index = 0; index < ARRAY_SIZE(r->element); index++)
407 #else
408 #define VECTOR_FOR_INORDER_I(index, element) \
409 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
410 #endif
411
412 /* Saturating arithmetic helpers. */
413 #define SATCVT(from, to, from_type, to_type, min, max) \
414 static inline to_type cvt##from##to(from_type x, int *sat) \
415 { \
416 to_type r; \
417 \
418 if (x < (from_type)min) { \
419 r = min; \
420 *sat = 1; \
421 } else if (x > (from_type)max) { \
422 r = max; \
423 *sat = 1; \
424 } else { \
425 r = x; \
426 } \
427 return r; \
428 }
429 #define SATCVTU(from, to, from_type, to_type, min, max) \
430 static inline to_type cvt##from##to(from_type x, int *sat) \
431 { \
432 to_type r; \
433 \
434 if (x > (from_type)max) { \
435 r = max; \
436 *sat = 1; \
437 } else { \
438 r = x; \
439 } \
440 return r; \
441 }
442 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
443 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
444 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
445
446 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
447 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
448 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
449 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
450 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
451 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
452 #undef SATCVT
453 #undef SATCVTU
454
455 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
456 {
457 int i, j = (sh & 0xf);
458
459 VECTOR_FOR_INORDER_I(i, u8) {
460 r->u8[i] = j++;
461 }
462 }
463
464 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
465 {
466 int i, j = 0x10 - (sh & 0xf);
467
468 VECTOR_FOR_INORDER_I(i, u8) {
469 r->u8[i] = j++;
470 }
471 }
472
473 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
474 {
475 #if defined(HOST_WORDS_BIGENDIAN)
476 env->vscr = r->u32[3];
477 #else
478 env->vscr = r->u32[0];
479 #endif
480 set_flush_to_zero(vscr_nj, &env->vec_status);
481 }
482
483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
484 {
485 int i;
486
487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
488 r->u32[i] = ~a->u32[i] < b->u32[i];
489 }
490 }
491
492 #define VARITH_DO(name, op, element) \
493 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
494 { \
495 int i; \
496 \
497 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
498 r->element[i] = a->element[i] op b->element[i]; \
499 } \
500 }
501 #define VARITH(suffix, element) \
502 VARITH_DO(add##suffix, +, element) \
503 VARITH_DO(sub##suffix, -, element)
504 VARITH(ubm, u8)
505 VARITH(uhm, u16)
506 VARITH(uwm, u32)
507 VARITH(udm, u64)
508 VARITH_DO(muluwm, *, u32)
509 #undef VARITH_DO
510 #undef VARITH
511
512 #define VARITHFP(suffix, func) \
513 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
514 ppc_avr_t *b) \
515 { \
516 int i; \
517 \
518 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
519 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
520 } \
521 }
522 VARITHFP(addfp, float32_add)
523 VARITHFP(subfp, float32_sub)
524 VARITHFP(minfp, float32_min)
525 VARITHFP(maxfp, float32_max)
526 #undef VARITHFP
527
528 #define VARITHFPFMA(suffix, type) \
529 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
530 ppc_avr_t *b, ppc_avr_t *c) \
531 { \
532 int i; \
533 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
534 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
535 type, &env->vec_status); \
536 } \
537 }
538 VARITHFPFMA(maddfp, 0);
539 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
540 #undef VARITHFPFMA
541
542 #define VARITHSAT_CASE(type, op, cvt, element) \
543 { \
544 type result = (type)a->element[i] op (type)b->element[i]; \
545 r->element[i] = cvt(result, &sat); \
546 }
547
548 #define VARITHSAT_DO(name, op, optype, cvt, element) \
549 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
550 ppc_avr_t *b) \
551 { \
552 int sat = 0; \
553 int i; \
554 \
555 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
556 switch (sizeof(r->element[0])) { \
557 case 1: \
558 VARITHSAT_CASE(optype, op, cvt, element); \
559 break; \
560 case 2: \
561 VARITHSAT_CASE(optype, op, cvt, element); \
562 break; \
563 case 4: \
564 VARITHSAT_CASE(optype, op, cvt, element); \
565 break; \
566 } \
567 } \
568 if (sat) { \
569 env->vscr |= (1 << VSCR_SAT); \
570 } \
571 }
572 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
573 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
574 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
575 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
576 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
577 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
578 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
579 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
580 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
581 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
582 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
583 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
584 #undef VARITHSAT_CASE
585 #undef VARITHSAT_DO
586 #undef VARITHSAT_SIGNED
587 #undef VARITHSAT_UNSIGNED
588
589 #define VAVG_DO(name, element, etype) \
590 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
591 { \
592 int i; \
593 \
594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
595 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
596 r->element[i] = x >> 1; \
597 } \
598 }
599
600 #define VAVG(type, signed_element, signed_type, unsigned_element, \
601 unsigned_type) \
602 VAVG_DO(avgs##type, signed_element, signed_type) \
603 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
604 VAVG(b, s8, int16_t, u8, uint16_t)
605 VAVG(h, s16, int32_t, u16, uint32_t)
606 VAVG(w, s32, int64_t, u32, uint64_t)
607 #undef VAVG_DO
608 #undef VAVG
609
610 #define VCF(suffix, cvt, element) \
611 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
612 ppc_avr_t *b, uint32_t uim) \
613 { \
614 int i; \
615 \
616 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
617 float32 t = cvt(b->element[i], &env->vec_status); \
618 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
619 } \
620 }
621 VCF(ux, uint32_to_float32, u32)
622 VCF(sx, int32_to_float32, s32)
623 #undef VCF
624
625 #define VCMP_DO(suffix, compare, element, record) \
626 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
627 ppc_avr_t *a, ppc_avr_t *b) \
628 { \
629 uint64_t ones = (uint64_t)-1; \
630 uint64_t all = ones; \
631 uint64_t none = 0; \
632 int i; \
633 \
634 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
635 uint64_t result = (a->element[i] compare b->element[i] ? \
636 ones : 0x0); \
637 switch (sizeof(a->element[0])) { \
638 case 8: \
639 r->u64[i] = result; \
640 break; \
641 case 4: \
642 r->u32[i] = result; \
643 break; \
644 case 2: \
645 r->u16[i] = result; \
646 break; \
647 case 1: \
648 r->u8[i] = result; \
649 break; \
650 } \
651 all &= result; \
652 none |= result; \
653 } \
654 if (record) { \
655 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
656 } \
657 }
658 #define VCMP(suffix, compare, element) \
659 VCMP_DO(suffix, compare, element, 0) \
660 VCMP_DO(suffix##_dot, compare, element, 1)
661 VCMP(equb, ==, u8)
662 VCMP(equh, ==, u16)
663 VCMP(equw, ==, u32)
664 VCMP(equd, ==, u64)
665 VCMP(gtub, >, u8)
666 VCMP(gtuh, >, u16)
667 VCMP(gtuw, >, u32)
668 VCMP(gtud, >, u64)
669 VCMP(gtsb, >, s8)
670 VCMP(gtsh, >, s16)
671 VCMP(gtsw, >, s32)
672 VCMP(gtsd, >, s64)
673 #undef VCMP_DO
674 #undef VCMP
675
676 #define VCMPFP_DO(suffix, compare, order, record) \
677 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
678 ppc_avr_t *a, ppc_avr_t *b) \
679 { \
680 uint32_t ones = (uint32_t)-1; \
681 uint32_t all = ones; \
682 uint32_t none = 0; \
683 int i; \
684 \
685 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
686 uint32_t result; \
687 int rel = float32_compare_quiet(a->f[i], b->f[i], \
688 &env->vec_status); \
689 if (rel == float_relation_unordered) { \
690 result = 0; \
691 } else if (rel compare order) { \
692 result = ones; \
693 } else { \
694 result = 0; \
695 } \
696 r->u32[i] = result; \
697 all &= result; \
698 none |= result; \
699 } \
700 if (record) { \
701 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
702 } \
703 }
704 #define VCMPFP(suffix, compare, order) \
705 VCMPFP_DO(suffix, compare, order, 0) \
706 VCMPFP_DO(suffix##_dot, compare, order, 1)
707 VCMPFP(eqfp, ==, float_relation_equal)
708 VCMPFP(gefp, !=, float_relation_less)
709 VCMPFP(gtfp, ==, float_relation_greater)
710 #undef VCMPFP_DO
711 #undef VCMPFP
712
713 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
714 ppc_avr_t *a, ppc_avr_t *b, int record)
715 {
716 int i;
717 int all_in = 0;
718
719 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
720 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
721 if (le_rel == float_relation_unordered) {
722 r->u32[i] = 0xc0000000;
723 /* ALL_IN does not need to be updated here. */
724 } else {
725 float32 bneg = float32_chs(b->f[i]);
726 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
727 int le = le_rel != float_relation_greater;
728 int ge = ge_rel != float_relation_less;
729
730 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
731 all_in |= (!le | !ge);
732 }
733 }
734 if (record) {
735 env->crf[6] = (all_in == 0) << 1;
736 }
737 }
738
739 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
740 {
741 vcmpbfp_internal(env, r, a, b, 0);
742 }
743
744 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
745 ppc_avr_t *b)
746 {
747 vcmpbfp_internal(env, r, a, b, 1);
748 }
749
750 #define VCT(suffix, satcvt, element) \
751 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
752 ppc_avr_t *b, uint32_t uim) \
753 { \
754 int i; \
755 int sat = 0; \
756 float_status s = env->vec_status; \
757 \
758 set_float_rounding_mode(float_round_to_zero, &s); \
759 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
760 if (float32_is_any_nan(b->f[i])) { \
761 r->element[i] = 0; \
762 } else { \
763 float64 t = float32_to_float64(b->f[i], &s); \
764 int64_t j; \
765 \
766 t = float64_scalbn(t, uim, &s); \
767 j = float64_to_int64(t, &s); \
768 r->element[i] = satcvt(j, &sat); \
769 } \
770 } \
771 if (sat) { \
772 env->vscr |= (1 << VSCR_SAT); \
773 } \
774 }
775 VCT(uxs, cvtsduw, u32)
776 VCT(sxs, cvtsdsw, s32)
777 #undef VCT
778
779 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
780 ppc_avr_t *b, ppc_avr_t *c)
781 {
782 int sat = 0;
783 int i;
784
785 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
786 int32_t prod = a->s16[i] * b->s16[i];
787 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
788
789 r->s16[i] = cvtswsh(t, &sat);
790 }
791
792 if (sat) {
793 env->vscr |= (1 << VSCR_SAT);
794 }
795 }
796
797 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
798 ppc_avr_t *b, ppc_avr_t *c)
799 {
800 int sat = 0;
801 int i;
802
803 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
804 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
805 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
806 r->s16[i] = cvtswsh(t, &sat);
807 }
808
809 if (sat) {
810 env->vscr |= (1 << VSCR_SAT);
811 }
812 }
813
814 #define VMINMAX_DO(name, compare, element) \
815 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
816 { \
817 int i; \
818 \
819 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
820 if (a->element[i] compare b->element[i]) { \
821 r->element[i] = b->element[i]; \
822 } else { \
823 r->element[i] = a->element[i]; \
824 } \
825 } \
826 }
827 #define VMINMAX(suffix, element) \
828 VMINMAX_DO(min##suffix, >, element) \
829 VMINMAX_DO(max##suffix, <, element)
830 VMINMAX(sb, s8)
831 VMINMAX(sh, s16)
832 VMINMAX(sw, s32)
833 VMINMAX(sd, s64)
834 VMINMAX(ub, u8)
835 VMINMAX(uh, u16)
836 VMINMAX(uw, u32)
837 VMINMAX(ud, u64)
838 #undef VMINMAX_DO
839 #undef VMINMAX
840
841 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
842 {
843 int i;
844
845 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
846 int32_t prod = a->s16[i] * b->s16[i];
847 r->s16[i] = (int16_t) (prod + c->s16[i]);
848 }
849 }
850
851 #define VMRG_DO(name, element, highp) \
852 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
853 { \
854 ppc_avr_t result; \
855 int i; \
856 size_t n_elems = ARRAY_SIZE(r->element); \
857 \
858 for (i = 0; i < n_elems / 2; i++) { \
859 if (highp) { \
860 result.element[i*2+HI_IDX] = a->element[i]; \
861 result.element[i*2+LO_IDX] = b->element[i]; \
862 } else { \
863 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
864 b->element[n_elems - i - 1]; \
865 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
866 a->element[n_elems - i - 1]; \
867 } \
868 } \
869 *r = result; \
870 }
871 #if defined(HOST_WORDS_BIGENDIAN)
872 #define MRGHI 0
873 #define MRGLO 1
874 #else
875 #define MRGHI 1
876 #define MRGLO 0
877 #endif
878 #define VMRG(suffix, element) \
879 VMRG_DO(mrgl##suffix, element, MRGHI) \
880 VMRG_DO(mrgh##suffix, element, MRGLO)
881 VMRG(b, u8)
882 VMRG(h, u16)
883 VMRG(w, u32)
884 #undef VMRG_DO
885 #undef VMRG
886 #undef MRGHI
887 #undef MRGLO
888
889 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
890 ppc_avr_t *b, ppc_avr_t *c)
891 {
892 int32_t prod[16];
893 int i;
894
895 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
896 prod[i] = (int32_t)a->s8[i] * b->u8[i];
897 }
898
899 VECTOR_FOR_INORDER_I(i, s32) {
900 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
901 prod[4 * i + 2] + prod[4 * i + 3];
902 }
903 }
904
905 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
906 ppc_avr_t *b, ppc_avr_t *c)
907 {
908 int32_t prod[8];
909 int i;
910
911 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
912 prod[i] = a->s16[i] * b->s16[i];
913 }
914
915 VECTOR_FOR_INORDER_I(i, s32) {
916 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
917 }
918 }
919
920 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
921 ppc_avr_t *b, ppc_avr_t *c)
922 {
923 int32_t prod[8];
924 int i;
925 int sat = 0;
926
927 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
928 prod[i] = (int32_t)a->s16[i] * b->s16[i];
929 }
930
931 VECTOR_FOR_INORDER_I(i, s32) {
932 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
933
934 r->u32[i] = cvtsdsw(t, &sat);
935 }
936
937 if (sat) {
938 env->vscr |= (1 << VSCR_SAT);
939 }
940 }
941
942 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
943 ppc_avr_t *b, ppc_avr_t *c)
944 {
945 uint16_t prod[16];
946 int i;
947
948 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
949 prod[i] = a->u8[i] * b->u8[i];
950 }
951
952 VECTOR_FOR_INORDER_I(i, u32) {
953 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
954 prod[4 * i + 2] + prod[4 * i + 3];
955 }
956 }
957
958 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
959 ppc_avr_t *b, ppc_avr_t *c)
960 {
961 uint32_t prod[8];
962 int i;
963
964 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
965 prod[i] = a->u16[i] * b->u16[i];
966 }
967
968 VECTOR_FOR_INORDER_I(i, u32) {
969 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
970 }
971 }
972
973 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
974 ppc_avr_t *b, ppc_avr_t *c)
975 {
976 uint32_t prod[8];
977 int i;
978 int sat = 0;
979
980 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
981 prod[i] = a->u16[i] * b->u16[i];
982 }
983
984 VECTOR_FOR_INORDER_I(i, s32) {
985 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
986
987 r->u32[i] = cvtuduw(t, &sat);
988 }
989
990 if (sat) {
991 env->vscr |= (1 << VSCR_SAT);
992 }
993 }
994
995 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
996 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
997 { \
998 int i; \
999 \
1000 VECTOR_FOR_INORDER_I(i, prod_element) { \
1001 if (evenp) { \
1002 r->prod_element[i] = \
1003 (cast)a->mul_element[i * 2 + HI_IDX] * \
1004 (cast)b->mul_element[i * 2 + HI_IDX]; \
1005 } else { \
1006 r->prod_element[i] = \
1007 (cast)a->mul_element[i * 2 + LO_IDX] * \
1008 (cast)b->mul_element[i * 2 + LO_IDX]; \
1009 } \
1010 } \
1011 }
1012 #define VMUL(suffix, mul_element, prod_element, cast) \
1013 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1014 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1015 VMUL(sb, s8, s16, int16_t)
1016 VMUL(sh, s16, s32, int32_t)
1017 VMUL(sw, s32, s64, int64_t)
1018 VMUL(ub, u8, u16, uint16_t)
1019 VMUL(uh, u16, u32, uint32_t)
1020 VMUL(uw, u32, u64, uint64_t)
1021 #undef VMUL_DO
1022 #undef VMUL
1023
1024 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1025 ppc_avr_t *c)
1026 {
1027 ppc_avr_t result;
1028 int i;
1029
1030 VECTOR_FOR_INORDER_I(i, u8) {
1031 int s = c->u8[i] & 0x1f;
1032 #if defined(HOST_WORDS_BIGENDIAN)
1033 int index = s & 0xf;
1034 #else
1035 int index = 15 - (s & 0xf);
1036 #endif
1037
1038 if (s & 0x10) {
1039 result.u8[i] = b->u8[index];
1040 } else {
1041 result.u8[i] = a->u8[index];
1042 }
1043 }
1044 *r = result;
1045 }
1046
1047 #if defined(HOST_WORDS_BIGENDIAN)
1048 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1049 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1050 #else
1051 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1052 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1053 #endif
1054
1055 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1056 {
1057 int i;
1058 uint64_t perm = 0;
1059
1060 VECTOR_FOR_INORDER_I(i, u8) {
1061 int index = VBPERMQ_INDEX(b, i);
1062
1063 if (index < 128) {
1064 uint64_t mask = (1ull << (63-(index & 0x3F)));
1065 if (a->u64[VBPERMQ_DW(index)] & mask) {
1066 perm |= (0x8000 >> i);
1067 }
1068 }
1069 }
1070
1071 r->u64[HI_IDX] = perm;
1072 r->u64[LO_IDX] = 0;
1073 }
1074
1075 #undef VBPERMQ_INDEX
1076 #undef VBPERMQ_DW
1077
1078 static const uint64_t VGBBD_MASKS[256] = {
1079 0x0000000000000000ull, /* 00 */
1080 0x0000000000000080ull, /* 01 */
1081 0x0000000000008000ull, /* 02 */
1082 0x0000000000008080ull, /* 03 */
1083 0x0000000000800000ull, /* 04 */
1084 0x0000000000800080ull, /* 05 */
1085 0x0000000000808000ull, /* 06 */
1086 0x0000000000808080ull, /* 07 */
1087 0x0000000080000000ull, /* 08 */
1088 0x0000000080000080ull, /* 09 */
1089 0x0000000080008000ull, /* 0A */
1090 0x0000000080008080ull, /* 0B */
1091 0x0000000080800000ull, /* 0C */
1092 0x0000000080800080ull, /* 0D */
1093 0x0000000080808000ull, /* 0E */
1094 0x0000000080808080ull, /* 0F */
1095 0x0000008000000000ull, /* 10 */
1096 0x0000008000000080ull, /* 11 */
1097 0x0000008000008000ull, /* 12 */
1098 0x0000008000008080ull, /* 13 */
1099 0x0000008000800000ull, /* 14 */
1100 0x0000008000800080ull, /* 15 */
1101 0x0000008000808000ull, /* 16 */
1102 0x0000008000808080ull, /* 17 */
1103 0x0000008080000000ull, /* 18 */
1104 0x0000008080000080ull, /* 19 */
1105 0x0000008080008000ull, /* 1A */
1106 0x0000008080008080ull, /* 1B */
1107 0x0000008080800000ull, /* 1C */
1108 0x0000008080800080ull, /* 1D */
1109 0x0000008080808000ull, /* 1E */
1110 0x0000008080808080ull, /* 1F */
1111 0x0000800000000000ull, /* 20 */
1112 0x0000800000000080ull, /* 21 */
1113 0x0000800000008000ull, /* 22 */
1114 0x0000800000008080ull, /* 23 */
1115 0x0000800000800000ull, /* 24 */
1116 0x0000800000800080ull, /* 25 */
1117 0x0000800000808000ull, /* 26 */
1118 0x0000800000808080ull, /* 27 */
1119 0x0000800080000000ull, /* 28 */
1120 0x0000800080000080ull, /* 29 */
1121 0x0000800080008000ull, /* 2A */
1122 0x0000800080008080ull, /* 2B */
1123 0x0000800080800000ull, /* 2C */
1124 0x0000800080800080ull, /* 2D */
1125 0x0000800080808000ull, /* 2E */
1126 0x0000800080808080ull, /* 2F */
1127 0x0000808000000000ull, /* 30 */
1128 0x0000808000000080ull, /* 31 */
1129 0x0000808000008000ull, /* 32 */
1130 0x0000808000008080ull, /* 33 */
1131 0x0000808000800000ull, /* 34 */
1132 0x0000808000800080ull, /* 35 */
1133 0x0000808000808000ull, /* 36 */
1134 0x0000808000808080ull, /* 37 */
1135 0x0000808080000000ull, /* 38 */
1136 0x0000808080000080ull, /* 39 */
1137 0x0000808080008000ull, /* 3A */
1138 0x0000808080008080ull, /* 3B */
1139 0x0000808080800000ull, /* 3C */
1140 0x0000808080800080ull, /* 3D */
1141 0x0000808080808000ull, /* 3E */
1142 0x0000808080808080ull, /* 3F */
1143 0x0080000000000000ull, /* 40 */
1144 0x0080000000000080ull, /* 41 */
1145 0x0080000000008000ull, /* 42 */
1146 0x0080000000008080ull, /* 43 */
1147 0x0080000000800000ull, /* 44 */
1148 0x0080000000800080ull, /* 45 */
1149 0x0080000000808000ull, /* 46 */
1150 0x0080000000808080ull, /* 47 */
1151 0x0080000080000000ull, /* 48 */
1152 0x0080000080000080ull, /* 49 */
1153 0x0080000080008000ull, /* 4A */
1154 0x0080000080008080ull, /* 4B */
1155 0x0080000080800000ull, /* 4C */
1156 0x0080000080800080ull, /* 4D */
1157 0x0080000080808000ull, /* 4E */
1158 0x0080000080808080ull, /* 4F */
1159 0x0080008000000000ull, /* 50 */
1160 0x0080008000000080ull, /* 51 */
1161 0x0080008000008000ull, /* 52 */
1162 0x0080008000008080ull, /* 53 */
1163 0x0080008000800000ull, /* 54 */
1164 0x0080008000800080ull, /* 55 */
1165 0x0080008000808000ull, /* 56 */
1166 0x0080008000808080ull, /* 57 */
1167 0x0080008080000000ull, /* 58 */
1168 0x0080008080000080ull, /* 59 */
1169 0x0080008080008000ull, /* 5A */
1170 0x0080008080008080ull, /* 5B */
1171 0x0080008080800000ull, /* 5C */
1172 0x0080008080800080ull, /* 5D */
1173 0x0080008080808000ull, /* 5E */
1174 0x0080008080808080ull, /* 5F */
1175 0x0080800000000000ull, /* 60 */
1176 0x0080800000000080ull, /* 61 */
1177 0x0080800000008000ull, /* 62 */
1178 0x0080800000008080ull, /* 63 */
1179 0x0080800000800000ull, /* 64 */
1180 0x0080800000800080ull, /* 65 */
1181 0x0080800000808000ull, /* 66 */
1182 0x0080800000808080ull, /* 67 */
1183 0x0080800080000000ull, /* 68 */
1184 0x0080800080000080ull, /* 69 */
1185 0x0080800080008000ull, /* 6A */
1186 0x0080800080008080ull, /* 6B */
1187 0x0080800080800000ull, /* 6C */
1188 0x0080800080800080ull, /* 6D */
1189 0x0080800080808000ull, /* 6E */
1190 0x0080800080808080ull, /* 6F */
1191 0x0080808000000000ull, /* 70 */
1192 0x0080808000000080ull, /* 71 */
1193 0x0080808000008000ull, /* 72 */
1194 0x0080808000008080ull, /* 73 */
1195 0x0080808000800000ull, /* 74 */
1196 0x0080808000800080ull, /* 75 */
1197 0x0080808000808000ull, /* 76 */
1198 0x0080808000808080ull, /* 77 */
1199 0x0080808080000000ull, /* 78 */
1200 0x0080808080000080ull, /* 79 */
1201 0x0080808080008000ull, /* 7A */
1202 0x0080808080008080ull, /* 7B */
1203 0x0080808080800000ull, /* 7C */
1204 0x0080808080800080ull, /* 7D */
1205 0x0080808080808000ull, /* 7E */
1206 0x0080808080808080ull, /* 7F */
1207 0x8000000000000000ull, /* 80 */
1208 0x8000000000000080ull, /* 81 */
1209 0x8000000000008000ull, /* 82 */
1210 0x8000000000008080ull, /* 83 */
1211 0x8000000000800000ull, /* 84 */
1212 0x8000000000800080ull, /* 85 */
1213 0x8000000000808000ull, /* 86 */
1214 0x8000000000808080ull, /* 87 */
1215 0x8000000080000000ull, /* 88 */
1216 0x8000000080000080ull, /* 89 */
1217 0x8000000080008000ull, /* 8A */
1218 0x8000000080008080ull, /* 8B */
1219 0x8000000080800000ull, /* 8C */
1220 0x8000000080800080ull, /* 8D */
1221 0x8000000080808000ull, /* 8E */
1222 0x8000000080808080ull, /* 8F */
1223 0x8000008000000000ull, /* 90 */
1224 0x8000008000000080ull, /* 91 */
1225 0x8000008000008000ull, /* 92 */
1226 0x8000008000008080ull, /* 93 */
1227 0x8000008000800000ull, /* 94 */
1228 0x8000008000800080ull, /* 95 */
1229 0x8000008000808000ull, /* 96 */
1230 0x8000008000808080ull, /* 97 */
1231 0x8000008080000000ull, /* 98 */
1232 0x8000008080000080ull, /* 99 */
1233 0x8000008080008000ull, /* 9A */
1234 0x8000008080008080ull, /* 9B */
1235 0x8000008080800000ull, /* 9C */
1236 0x8000008080800080ull, /* 9D */
1237 0x8000008080808000ull, /* 9E */
1238 0x8000008080808080ull, /* 9F */
1239 0x8000800000000000ull, /* A0 */
1240 0x8000800000000080ull, /* A1 */
1241 0x8000800000008000ull, /* A2 */
1242 0x8000800000008080ull, /* A3 */
1243 0x8000800000800000ull, /* A4 */
1244 0x8000800000800080ull, /* A5 */
1245 0x8000800000808000ull, /* A6 */
1246 0x8000800000808080ull, /* A7 */
1247 0x8000800080000000ull, /* A8 */
1248 0x8000800080000080ull, /* A9 */
1249 0x8000800080008000ull, /* AA */
1250 0x8000800080008080ull, /* AB */
1251 0x8000800080800000ull, /* AC */
1252 0x8000800080800080ull, /* AD */
1253 0x8000800080808000ull, /* AE */
1254 0x8000800080808080ull, /* AF */
1255 0x8000808000000000ull, /* B0 */
1256 0x8000808000000080ull, /* B1 */
1257 0x8000808000008000ull, /* B2 */
1258 0x8000808000008080ull, /* B3 */
1259 0x8000808000800000ull, /* B4 */
1260 0x8000808000800080ull, /* B5 */
1261 0x8000808000808000ull, /* B6 */
1262 0x8000808000808080ull, /* B7 */
1263 0x8000808080000000ull, /* B8 */
1264 0x8000808080000080ull, /* B9 */
1265 0x8000808080008000ull, /* BA */
1266 0x8000808080008080ull, /* BB */
1267 0x8000808080800000ull, /* BC */
1268 0x8000808080800080ull, /* BD */
1269 0x8000808080808000ull, /* BE */
1270 0x8000808080808080ull, /* BF */
1271 0x8080000000000000ull, /* C0 */
1272 0x8080000000000080ull, /* C1 */
1273 0x8080000000008000ull, /* C2 */
1274 0x8080000000008080ull, /* C3 */
1275 0x8080000000800000ull, /* C4 */
1276 0x8080000000800080ull, /* C5 */
1277 0x8080000000808000ull, /* C6 */
1278 0x8080000000808080ull, /* C7 */
1279 0x8080000080000000ull, /* C8 */
1280 0x8080000080000080ull, /* C9 */
1281 0x8080000080008000ull, /* CA */
1282 0x8080000080008080ull, /* CB */
1283 0x8080000080800000ull, /* CC */
1284 0x8080000080800080ull, /* CD */
1285 0x8080000080808000ull, /* CE */
1286 0x8080000080808080ull, /* CF */
1287 0x8080008000000000ull, /* D0 */
1288 0x8080008000000080ull, /* D1 */
1289 0x8080008000008000ull, /* D2 */
1290 0x8080008000008080ull, /* D3 */
1291 0x8080008000800000ull, /* D4 */
1292 0x8080008000800080ull, /* D5 */
1293 0x8080008000808000ull, /* D6 */
1294 0x8080008000808080ull, /* D7 */
1295 0x8080008080000000ull, /* D8 */
1296 0x8080008080000080ull, /* D9 */
1297 0x8080008080008000ull, /* DA */
1298 0x8080008080008080ull, /* DB */
1299 0x8080008080800000ull, /* DC */
1300 0x8080008080800080ull, /* DD */
1301 0x8080008080808000ull, /* DE */
1302 0x8080008080808080ull, /* DF */
1303 0x8080800000000000ull, /* E0 */
1304 0x8080800000000080ull, /* E1 */
1305 0x8080800000008000ull, /* E2 */
1306 0x8080800000008080ull, /* E3 */
1307 0x8080800000800000ull, /* E4 */
1308 0x8080800000800080ull, /* E5 */
1309 0x8080800000808000ull, /* E6 */
1310 0x8080800000808080ull, /* E7 */
1311 0x8080800080000000ull, /* E8 */
1312 0x8080800080000080ull, /* E9 */
1313 0x8080800080008000ull, /* EA */
1314 0x8080800080008080ull, /* EB */
1315 0x8080800080800000ull, /* EC */
1316 0x8080800080800080ull, /* ED */
1317 0x8080800080808000ull, /* EE */
1318 0x8080800080808080ull, /* EF */
1319 0x8080808000000000ull, /* F0 */
1320 0x8080808000000080ull, /* F1 */
1321 0x8080808000008000ull, /* F2 */
1322 0x8080808000008080ull, /* F3 */
1323 0x8080808000800000ull, /* F4 */
1324 0x8080808000800080ull, /* F5 */
1325 0x8080808000808000ull, /* F6 */
1326 0x8080808000808080ull, /* F7 */
1327 0x8080808080000000ull, /* F8 */
1328 0x8080808080000080ull, /* F9 */
1329 0x8080808080008000ull, /* FA */
1330 0x8080808080008080ull, /* FB */
1331 0x8080808080800000ull, /* FC */
1332 0x8080808080800080ull, /* FD */
1333 0x8080808080808000ull, /* FE */
1334 0x8080808080808080ull, /* FF */
1335 };
1336
1337 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1338 {
1339 int i;
1340 uint64_t t[2] = { 0, 0 };
1341
1342 VECTOR_FOR_INORDER_I(i, u8) {
1343 #if defined(HOST_WORDS_BIGENDIAN)
1344 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1345 #else
1346 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1347 #endif
1348 }
1349
1350 r->u64[0] = t[0];
1351 r->u64[1] = t[1];
1352 }
1353
1354 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1355 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1356 { \
1357 int i, j; \
1358 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1359 \
1360 VECTOR_FOR_INORDER_I(i, srcfld) { \
1361 prod[i] = 0; \
1362 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1363 if (a->srcfld[i] & (1ull<<j)) { \
1364 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1365 } \
1366 } \
1367 } \
1368 \
1369 VECTOR_FOR_INORDER_I(i, trgfld) { \
1370 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1371 } \
1372 }
1373
1374 PMSUM(vpmsumb, u8, u16, uint16_t)
1375 PMSUM(vpmsumh, u16, u32, uint32_t)
1376 PMSUM(vpmsumw, u32, u64, uint64_t)
1377
1378 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1379 {
1380
1381 #ifdef CONFIG_INT128
1382 int i, j;
1383 __uint128_t prod[2];
1384
1385 VECTOR_FOR_INORDER_I(i, u64) {
1386 prod[i] = 0;
1387 for (j = 0; j < 64; j++) {
1388 if (a->u64[i] & (1ull<<j)) {
1389 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1390 }
1391 }
1392 }
1393
1394 r->u128 = prod[0] ^ prod[1];
1395
1396 #else
1397 int i, j;
1398 ppc_avr_t prod[2];
1399
1400 VECTOR_FOR_INORDER_I(i, u64) {
1401 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1402 for (j = 0; j < 64; j++) {
1403 if (a->u64[i] & (1ull<<j)) {
1404 ppc_avr_t bshift;
1405 if (j == 0) {
1406 bshift.u64[HI_IDX] = 0;
1407 bshift.u64[LO_IDX] = b->u64[i];
1408 } else {
1409 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1410 bshift.u64[LO_IDX] = b->u64[i] << j;
1411 }
1412 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1413 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1414 }
1415 }
1416 }
1417
1418 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1419 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1420 #endif
1421 }
1422
1423
1424 #if defined(HOST_WORDS_BIGENDIAN)
1425 #define PKBIG 1
1426 #else
1427 #define PKBIG 0
1428 #endif
1429 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1430 {
1431 int i, j;
1432 ppc_avr_t result;
1433 #if defined(HOST_WORDS_BIGENDIAN)
1434 const ppc_avr_t *x[2] = { a, b };
1435 #else
1436 const ppc_avr_t *x[2] = { b, a };
1437 #endif
1438
1439 VECTOR_FOR_INORDER_I(i, u64) {
1440 VECTOR_FOR_INORDER_I(j, u32) {
1441 uint32_t e = x[i]->u32[j];
1442
1443 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1444 ((e >> 6) & 0x3e0) |
1445 ((e >> 3) & 0x1f));
1446 }
1447 }
1448 *r = result;
1449 }
1450
1451 #define VPK(suffix, from, to, cvt, dosat) \
1452 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1453 ppc_avr_t *a, ppc_avr_t *b) \
1454 { \
1455 int i; \
1456 int sat = 0; \
1457 ppc_avr_t result; \
1458 ppc_avr_t *a0 = PKBIG ? a : b; \
1459 ppc_avr_t *a1 = PKBIG ? b : a; \
1460 \
1461 VECTOR_FOR_INORDER_I(i, from) { \
1462 result.to[i] = cvt(a0->from[i], &sat); \
1463 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1464 } \
1465 *r = result; \
1466 if (dosat && sat) { \
1467 env->vscr |= (1 << VSCR_SAT); \
1468 } \
1469 }
1470 #define I(x, y) (x)
1471 VPK(shss, s16, s8, cvtshsb, 1)
1472 VPK(shus, s16, u8, cvtshub, 1)
1473 VPK(swss, s32, s16, cvtswsh, 1)
1474 VPK(swus, s32, u16, cvtswuh, 1)
1475 VPK(sdss, s64, s32, cvtsdsw, 1)
1476 VPK(sdus, s64, u32, cvtsduw, 1)
1477 VPK(uhus, u16, u8, cvtuhub, 1)
1478 VPK(uwus, u32, u16, cvtuwuh, 1)
1479 VPK(udus, u64, u32, cvtuduw, 1)
1480 VPK(uhum, u16, u8, I, 0)
1481 VPK(uwum, u32, u16, I, 0)
1482 VPK(udum, u64, u32, I, 0)
1483 #undef I
1484 #undef VPK
1485 #undef PKBIG
1486
1487 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1488 {
1489 int i;
1490
1491 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1492 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1493 }
1494 }
1495
1496 #define VRFI(suffix, rounding) \
1497 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1498 ppc_avr_t *b) \
1499 { \
1500 int i; \
1501 float_status s = env->vec_status; \
1502 \
1503 set_float_rounding_mode(rounding, &s); \
1504 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1505 r->f[i] = float32_round_to_int (b->f[i], &s); \
1506 } \
1507 }
1508 VRFI(n, float_round_nearest_even)
1509 VRFI(m, float_round_down)
1510 VRFI(p, float_round_up)
1511 VRFI(z, float_round_to_zero)
1512 #undef VRFI
1513
1514 #define VROTATE(suffix, element, mask) \
1515 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1516 { \
1517 int i; \
1518 \
1519 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1520 unsigned int shift = b->element[i] & mask; \
1521 r->element[i] = (a->element[i] << shift) | \
1522 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1523 } \
1524 }
1525 VROTATE(b, u8, 0x7)
1526 VROTATE(h, u16, 0xF)
1527 VROTATE(w, u32, 0x1F)
1528 VROTATE(d, u64, 0x3F)
1529 #undef VROTATE
1530
1531 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1532 {
1533 int i;
1534
1535 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1536 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1537
1538 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1539 }
1540 }
1541
1542 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1543 ppc_avr_t *c)
1544 {
1545 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1546 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1547 }
1548
1549 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1550 {
1551 int i;
1552
1553 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1554 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1555 }
1556 }
1557
1558 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1559 {
1560 int i;
1561
1562 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1563 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1564 }
1565 }
1566
1567 #if defined(HOST_WORDS_BIGENDIAN)
1568 #define LEFT 0
1569 #define RIGHT 1
1570 #else
1571 #define LEFT 1
1572 #define RIGHT 0
1573 #endif
1574 /* The specification says that the results are undefined if all of the
1575 * shift counts are not identical. We check to make sure that they are
1576 * to conform to what real hardware appears to do. */
1577 #define VSHIFT(suffix, leftp) \
1578 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1579 { \
1580 int shift = b->u8[LO_IDX*15] & 0x7; \
1581 int doit = 1; \
1582 int i; \
1583 \
1584 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1585 doit = doit && ((b->u8[i] & 0x7) == shift); \
1586 } \
1587 if (doit) { \
1588 if (shift == 0) { \
1589 *r = *a; \
1590 } else if (leftp) { \
1591 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1592 \
1593 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1594 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1595 } else { \
1596 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1597 \
1598 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1599 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1600 } \
1601 } \
1602 }
1603 VSHIFT(l, LEFT)
1604 VSHIFT(r, RIGHT)
1605 #undef VSHIFT
1606 #undef LEFT
1607 #undef RIGHT
1608
1609 #define VSL(suffix, element, mask) \
1610 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1611 { \
1612 int i; \
1613 \
1614 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1615 unsigned int shift = b->element[i] & mask; \
1616 \
1617 r->element[i] = a->element[i] << shift; \
1618 } \
1619 }
1620 VSL(b, u8, 0x7)
1621 VSL(h, u16, 0x0F)
1622 VSL(w, u32, 0x1F)
1623 VSL(d, u64, 0x3F)
1624 #undef VSL
1625
1626 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1627 {
1628 int sh = shift & 0xf;
1629 int i;
1630 ppc_avr_t result;
1631
1632 #if defined(HOST_WORDS_BIGENDIAN)
1633 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1634 int index = sh + i;
1635 if (index > 0xf) {
1636 result.u8[i] = b->u8[index - 0x10];
1637 } else {
1638 result.u8[i] = a->u8[index];
1639 }
1640 }
1641 #else
1642 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1643 int index = (16 - sh) + i;
1644 if (index > 0xf) {
1645 result.u8[i] = a->u8[index - 0x10];
1646 } else {
1647 result.u8[i] = b->u8[index];
1648 }
1649 }
1650 #endif
1651 *r = result;
1652 }
1653
1654 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1655 {
1656 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1657
1658 #if defined(HOST_WORDS_BIGENDIAN)
1659 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1660 memset(&r->u8[16-sh], 0, sh);
1661 #else
1662 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1663 memset(&r->u8[0], 0, sh);
1664 #endif
1665 }
1666
1667 /* Experimental testing shows that hardware masks the immediate. */
1668 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1669 #if defined(HOST_WORDS_BIGENDIAN)
1670 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1671 #else
1672 #define SPLAT_ELEMENT(element) \
1673 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1674 #endif
1675 #define VSPLT(suffix, element) \
1676 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1677 { \
1678 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1679 int i; \
1680 \
1681 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1682 r->element[i] = s; \
1683 } \
1684 }
1685 VSPLT(b, u8)
1686 VSPLT(h, u16)
1687 VSPLT(w, u32)
1688 #undef VSPLT
1689 #undef SPLAT_ELEMENT
1690 #undef _SPLAT_MASKED
1691
1692 #define VSPLTI(suffix, element, splat_type) \
1693 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1694 { \
1695 splat_type x = (int8_t)(splat << 3) >> 3; \
1696 int i; \
1697 \
1698 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1699 r->element[i] = x; \
1700 } \
1701 }
1702 VSPLTI(b, s8, int8_t)
1703 VSPLTI(h, s16, int16_t)
1704 VSPLTI(w, s32, int32_t)
1705 #undef VSPLTI
1706
1707 #define VSR(suffix, element, mask) \
1708 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1709 { \
1710 int i; \
1711 \
1712 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1713 unsigned int shift = b->element[i] & mask; \
1714 r->element[i] = a->element[i] >> shift; \
1715 } \
1716 }
1717 VSR(ab, s8, 0x7)
1718 VSR(ah, s16, 0xF)
1719 VSR(aw, s32, 0x1F)
1720 VSR(ad, s64, 0x3F)
1721 VSR(b, u8, 0x7)
1722 VSR(h, u16, 0xF)
1723 VSR(w, u32, 0x1F)
1724 VSR(d, u64, 0x3F)
1725 #undef VSR
1726
1727 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1728 {
1729 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1730
1731 #if defined(HOST_WORDS_BIGENDIAN)
1732 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1733 memset(&r->u8[0], 0, sh);
1734 #else
1735 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1736 memset(&r->u8[16 - sh], 0, sh);
1737 #endif
1738 }
1739
1740 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1741 {
1742 int i;
1743
1744 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1745 r->u32[i] = a->u32[i] >= b->u32[i];
1746 }
1747 }
1748
1749 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1750 {
1751 int64_t t;
1752 int i, upper;
1753 ppc_avr_t result;
1754 int sat = 0;
1755
1756 #if defined(HOST_WORDS_BIGENDIAN)
1757 upper = ARRAY_SIZE(r->s32)-1;
1758 #else
1759 upper = 0;
1760 #endif
1761 t = (int64_t)b->s32[upper];
1762 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1763 t += a->s32[i];
1764 result.s32[i] = 0;
1765 }
1766 result.s32[upper] = cvtsdsw(t, &sat);
1767 *r = result;
1768
1769 if (sat) {
1770 env->vscr |= (1 << VSCR_SAT);
1771 }
1772 }
1773
1774 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1775 {
1776 int i, j, upper;
1777 ppc_avr_t result;
1778 int sat = 0;
1779
1780 #if defined(HOST_WORDS_BIGENDIAN)
1781 upper = 1;
1782 #else
1783 upper = 0;
1784 #endif
1785 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1786 int64_t t = (int64_t)b->s32[upper + i * 2];
1787
1788 result.u64[i] = 0;
1789 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1790 t += a->s32[2 * i + j];
1791 }
1792 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1793 }
1794
1795 *r = result;
1796 if (sat) {
1797 env->vscr |= (1 << VSCR_SAT);
1798 }
1799 }
1800
1801 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1802 {
1803 int i, j;
1804 int sat = 0;
1805
1806 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1807 int64_t t = (int64_t)b->s32[i];
1808
1809 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1810 t += a->s8[4 * i + j];
1811 }
1812 r->s32[i] = cvtsdsw(t, &sat);
1813 }
1814
1815 if (sat) {
1816 env->vscr |= (1 << VSCR_SAT);
1817 }
1818 }
1819
1820 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1821 {
1822 int sat = 0;
1823 int i;
1824
1825 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1826 int64_t t = (int64_t)b->s32[i];
1827
1828 t += a->s16[2 * i] + a->s16[2 * i + 1];
1829 r->s32[i] = cvtsdsw(t, &sat);
1830 }
1831
1832 if (sat) {
1833 env->vscr |= (1 << VSCR_SAT);
1834 }
1835 }
1836
1837 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1838 {
1839 int i, j;
1840 int sat = 0;
1841
1842 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1843 uint64_t t = (uint64_t)b->u32[i];
1844
1845 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1846 t += a->u8[4 * i + j];
1847 }
1848 r->u32[i] = cvtuduw(t, &sat);
1849 }
1850
1851 if (sat) {
1852 env->vscr |= (1 << VSCR_SAT);
1853 }
1854 }
1855
1856 #if defined(HOST_WORDS_BIGENDIAN)
1857 #define UPKHI 1
1858 #define UPKLO 0
1859 #else
1860 #define UPKHI 0
1861 #define UPKLO 1
1862 #endif
1863 #define VUPKPX(suffix, hi) \
1864 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1865 { \
1866 int i; \
1867 ppc_avr_t result; \
1868 \
1869 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1870 uint16_t e = b->u16[hi ? i : i+4]; \
1871 uint8_t a = (e >> 15) ? 0xff : 0; \
1872 uint8_t r = (e >> 10) & 0x1f; \
1873 uint8_t g = (e >> 5) & 0x1f; \
1874 uint8_t b = e & 0x1f; \
1875 \
1876 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1877 } \
1878 *r = result; \
1879 }
1880 VUPKPX(lpx, UPKLO)
1881 VUPKPX(hpx, UPKHI)
1882 #undef VUPKPX
1883
1884 #define VUPK(suffix, unpacked, packee, hi) \
1885 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1886 { \
1887 int i; \
1888 ppc_avr_t result; \
1889 \
1890 if (hi) { \
1891 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1892 result.unpacked[i] = b->packee[i]; \
1893 } \
1894 } else { \
1895 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1896 i++) { \
1897 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1898 } \
1899 } \
1900 *r = result; \
1901 }
1902 VUPK(hsb, s16, s8, UPKHI)
1903 VUPK(hsh, s32, s16, UPKHI)
1904 VUPK(hsw, s64, s32, UPKHI)
1905 VUPK(lsb, s16, s8, UPKLO)
1906 VUPK(lsh, s32, s16, UPKLO)
1907 VUPK(lsw, s64, s32, UPKLO)
1908 #undef VUPK
1909 #undef UPKHI
1910 #undef UPKLO
1911
1912 #define VGENERIC_DO(name, element) \
1913 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1914 { \
1915 int i; \
1916 \
1917 VECTOR_FOR_INORDER_I(i, element) { \
1918 r->element[i] = name(b->element[i]); \
1919 } \
1920 }
1921
1922 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1923 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1924 #define clzw(v) clz32((v))
1925 #define clzd(v) clz64((v))
1926
1927 VGENERIC_DO(clzb, u8)
1928 VGENERIC_DO(clzh, u16)
1929 VGENERIC_DO(clzw, u32)
1930 VGENERIC_DO(clzd, u64)
1931
1932 #undef clzb
1933 #undef clzh
1934 #undef clzw
1935 #undef clzd
1936
1937 #define popcntb(v) ctpop8(v)
1938 #define popcnth(v) ctpop16(v)
1939 #define popcntw(v) ctpop32(v)
1940 #define popcntd(v) ctpop64(v)
1941
1942 VGENERIC_DO(popcntb, u8)
1943 VGENERIC_DO(popcnth, u16)
1944 VGENERIC_DO(popcntw, u32)
1945 VGENERIC_DO(popcntd, u64)
1946
1947 #undef popcntb
1948 #undef popcnth
1949 #undef popcntw
1950 #undef popcntd
1951
1952 #undef VGENERIC_DO
1953
1954 #if defined(HOST_WORDS_BIGENDIAN)
1955 #define QW_ONE { .u64 = { 0, 1 } }
1956 #else
1957 #define QW_ONE { .u64 = { 1, 0 } }
1958 #endif
1959
1960 #ifndef CONFIG_INT128
1961
1962 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1963 {
1964 t->u64[0] = ~a.u64[0];
1965 t->u64[1] = ~a.u64[1];
1966 }
1967
1968 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1969 {
1970 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1971 return -1;
1972 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1973 return 1;
1974 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1975 return -1;
1976 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1977 return 1;
1978 } else {
1979 return 0;
1980 }
1981 }
1982
1983 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1984 {
1985 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1986 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1987 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1988 }
1989
1990 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1991 {
1992 ppc_avr_t not_a;
1993 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1994 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1995 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1996 avr_qw_not(&not_a, a);
1997 return avr_qw_cmpu(not_a, b) < 0;
1998 }
1999
2000 #endif
2001
2002 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2003 {
2004 #ifdef CONFIG_INT128
2005 r->u128 = a->u128 + b->u128;
2006 #else
2007 avr_qw_add(r, *a, *b);
2008 #endif
2009 }
2010
2011 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2012 {
2013 #ifdef CONFIG_INT128
2014 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2015 #else
2016
2017 if (c->u64[LO_IDX] & 1) {
2018 ppc_avr_t tmp;
2019
2020 tmp.u64[HI_IDX] = 0;
2021 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2022 avr_qw_add(&tmp, *a, tmp);
2023 avr_qw_add(r, tmp, *b);
2024 } else {
2025 avr_qw_add(r, *a, *b);
2026 }
2027 #endif
2028 }
2029
2030 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2031 {
2032 #ifdef CONFIG_INT128
2033 r->u128 = (~a->u128 < b->u128);
2034 #else
2035 ppc_avr_t not_a;
2036
2037 avr_qw_not(&not_a, *a);
2038
2039 r->u64[HI_IDX] = 0;
2040 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2041 #endif
2042 }
2043
2044 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2045 {
2046 #ifdef CONFIG_INT128
2047 int carry_out = (~a->u128 < b->u128);
2048 if (!carry_out && (c->u128 & 1)) {
2049 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2050 ((a->u128 != 0) || (b->u128 != 0));
2051 }
2052 r->u128 = carry_out;
2053 #else
2054
2055 int carry_in = c->u64[LO_IDX] & 1;
2056 int carry_out = 0;
2057 ppc_avr_t tmp;
2058
2059 carry_out = avr_qw_addc(&tmp, *a, *b);
2060
2061 if (!carry_out && carry_in) {
2062 ppc_avr_t one = QW_ONE;
2063 carry_out = avr_qw_addc(&tmp, tmp, one);
2064 }
2065 r->u64[HI_IDX] = 0;
2066 r->u64[LO_IDX] = carry_out;
2067 #endif
2068 }
2069
2070 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2071 {
2072 #ifdef CONFIG_INT128
2073 r->u128 = a->u128 - b->u128;
2074 #else
2075 ppc_avr_t tmp;
2076 ppc_avr_t one = QW_ONE;
2077
2078 avr_qw_not(&tmp, *b);
2079 avr_qw_add(&tmp, *a, tmp);
2080 avr_qw_add(r, tmp, one);
2081 #endif
2082 }
2083
2084 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2085 {
2086 #ifdef CONFIG_INT128
2087 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2088 #else
2089 ppc_avr_t tmp, sum;
2090
2091 avr_qw_not(&tmp, *b);
2092 avr_qw_add(&sum, *a, tmp);
2093
2094 tmp.u64[HI_IDX] = 0;
2095 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2096 avr_qw_add(r, sum, tmp);
2097 #endif
2098 }
2099
2100 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2101 {
2102 #ifdef CONFIG_INT128
2103 r->u128 = (~a->u128 < ~b->u128) ||
2104 (a->u128 + ~b->u128 == (__uint128_t)-1);
2105 #else
2106 int carry = (avr_qw_cmpu(*a, *b) > 0);
2107 if (!carry) {
2108 ppc_avr_t tmp;
2109 avr_qw_not(&tmp, *b);
2110 avr_qw_add(&tmp, *a, tmp);
2111 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2112 }
2113 r->u64[HI_IDX] = 0;
2114 r->u64[LO_IDX] = carry;
2115 #endif
2116 }
2117
2118 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2119 {
2120 #ifdef CONFIG_INT128
2121 r->u128 =
2122 (~a->u128 < ~b->u128) ||
2123 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2124 #else
2125 int carry_in = c->u64[LO_IDX] & 1;
2126 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2127 if (!carry_out && carry_in) {
2128 ppc_avr_t tmp;
2129 avr_qw_not(&tmp, *b);
2130 avr_qw_add(&tmp, *a, tmp);
2131 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2132 }
2133
2134 r->u64[HI_IDX] = 0;
2135 r->u64[LO_IDX] = carry_out;
2136 #endif
2137 }
2138
2139 #define BCD_PLUS_PREF_1 0xC
2140 #define BCD_PLUS_PREF_2 0xF
2141 #define BCD_PLUS_ALT_1 0xA
2142 #define BCD_NEG_PREF 0xD
2143 #define BCD_NEG_ALT 0xB
2144 #define BCD_PLUS_ALT_2 0xE
2145
2146 #if defined(HOST_WORDS_BIGENDIAN)
2147 #define BCD_DIG_BYTE(n) (15 - (n/2))
2148 #else
2149 #define BCD_DIG_BYTE(n) (n/2)
2150 #endif
2151
2152 static int bcd_get_sgn(ppc_avr_t *bcd)
2153 {
2154 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2155 case BCD_PLUS_PREF_1:
2156 case BCD_PLUS_PREF_2:
2157 case BCD_PLUS_ALT_1:
2158 case BCD_PLUS_ALT_2:
2159 {
2160 return 1;
2161 }
2162
2163 case BCD_NEG_PREF:
2164 case BCD_NEG_ALT:
2165 {
2166 return -1;
2167 }
2168
2169 default:
2170 {
2171 return 0;
2172 }
2173 }
2174 }
2175
2176 static int bcd_preferred_sgn(int sgn, int ps)
2177 {
2178 if (sgn >= 0) {
2179 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2180 } else {
2181 return BCD_NEG_PREF;
2182 }
2183 }
2184
2185 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2186 {
2187 uint8_t result;
2188 if (n & 1) {
2189 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2190 } else {
2191 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2192 }
2193
2194 if (unlikely(result > 9)) {
2195 *invalid = true;
2196 }
2197 return result;
2198 }
2199
2200 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2201 {
2202 if (n & 1) {
2203 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2204 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2205 } else {
2206 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2207 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2208 }
2209 }
2210
2211 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2212 {
2213 int i;
2214 int invalid = 0;
2215 for (i = 31; i > 0; i--) {
2216 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2217 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2218 if (unlikely(invalid)) {
2219 return 0; /* doesn't matter */
2220 } else if (dig_a > dig_b) {
2221 return 1;
2222 } else if (dig_a < dig_b) {
2223 return -1;
2224 }
2225 }
2226
2227 return 0;
2228 }
2229
2230 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2231 int *overflow)
2232 {
2233 int carry = 0;
2234 int i;
2235 int is_zero = 1;
2236 for (i = 1; i <= 31; i++) {
2237 uint8_t digit = bcd_get_digit(a, i, invalid) +
2238 bcd_get_digit(b, i, invalid) + carry;
2239 is_zero &= (digit == 0);
2240 if (digit > 9) {
2241 carry = 1;
2242 digit -= 10;
2243 } else {
2244 carry = 0;
2245 }
2246
2247 bcd_put_digit(t, digit, i);
2248
2249 if (unlikely(*invalid)) {
2250 return -1;
2251 }
2252 }
2253
2254 *overflow = carry;
2255 return is_zero;
2256 }
2257
2258 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2259 int *overflow)
2260 {
2261 int carry = 0;
2262 int i;
2263 int is_zero = 1;
2264 for (i = 1; i <= 31; i++) {
2265 uint8_t digit = bcd_get_digit(a, i, invalid) -
2266 bcd_get_digit(b, i, invalid) + carry;
2267 is_zero &= (digit == 0);
2268 if (digit & 0x80) {
2269 carry = -1;
2270 digit += 10;
2271 } else {
2272 carry = 0;
2273 }
2274
2275 bcd_put_digit(t, digit, i);
2276
2277 if (unlikely(*invalid)) {
2278 return -1;
2279 }
2280 }
2281
2282 *overflow = carry;
2283 return is_zero;
2284 }
2285
2286 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2287 {
2288
2289 int sgna = bcd_get_sgn(a);
2290 int sgnb = bcd_get_sgn(b);
2291 int invalid = (sgna == 0) || (sgnb == 0);
2292 int overflow = 0;
2293 int zero = 0;
2294 uint32_t cr = 0;
2295 ppc_avr_t result = { .u64 = { 0, 0 } };
2296
2297 if (!invalid) {
2298 if (sgna == sgnb) {
2299 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2300 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2301 cr = (sgna > 0) ? 4 : 8;
2302 } else if (bcd_cmp_mag(a, b) > 0) {
2303 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2304 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2305 cr = (sgna > 0) ? 4 : 8;
2306 } else {
2307 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2308 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2309 cr = (sgnb > 0) ? 4 : 8;
2310 }
2311 }
2312
2313 if (unlikely(invalid)) {
2314 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2315 cr = 1;
2316 } else if (overflow) {
2317 cr |= 1;
2318 } else if (zero) {
2319 cr = 2;
2320 }
2321
2322 *r = result;
2323
2324 return cr;
2325 }
2326
2327 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2328 {
2329 ppc_avr_t bcopy = *b;
2330 int sgnb = bcd_get_sgn(b);
2331 if (sgnb < 0) {
2332 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2333 } else if (sgnb > 0) {
2334 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2335 }
2336 /* else invalid ... defer to bcdadd code for proper handling */
2337
2338 return helper_bcdadd(r, a, &bcopy, ps);
2339 }
2340
2341 static uint8_t SBOX[256] = {
2342 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
2343 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
2344 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
2345 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
2346 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
2347 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
2348 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
2349 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
2350 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
2351 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
2352 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
2353 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
2354 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
2355 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
2356 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
2357 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
2358 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
2359 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
2360 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
2361 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
2362 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
2363 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
2364 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
2365 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
2366 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
2367 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
2368 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
2369 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
2370 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
2371 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
2372 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
2373 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
2374 };
2375
2376 static void SubBytes(ppc_avr_t *r, ppc_avr_t *a)
2377 {
2378 int i;
2379 VECTOR_FOR_INORDER_I(i, u8) {
2380 r->u8[i] = SBOX[a->u8[i]];
2381 }
2382 }
2383
2384 static uint8_t InvSBOX[256] = {
2385 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
2386 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
2387 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
2388 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
2389 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
2390 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
2391 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
2392 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
2393 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
2394 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
2395 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
2396 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
2397 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
2398 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
2399 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
2400 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
2401 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
2402 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
2403 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
2404 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
2405 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
2406 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
2407 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
2408 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
2409 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
2410 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
2411 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
2412 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
2413 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
2414 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
2415 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
2416 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D,
2417 };
2418
2419 static void InvSubBytes(ppc_avr_t *r, ppc_avr_t *a)
2420 {
2421 int i;
2422 VECTOR_FOR_INORDER_I(i, u8) {
2423 r->u8[i] = InvSBOX[a->u8[i]];
2424 }
2425 }
2426
2427 static uint8_t ROTL8(uint8_t x, int n)
2428 {
2429 return (x << n) | (x >> (8-n));
2430 }
2431
2432 static inline int BIT8(uint8_t x, int n)
2433 {
2434 return (x & (0x80 >> n)) != 0;
2435 }
2436
2437 static uint8_t GFx02(uint8_t x)
2438 {
2439 return ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0);
2440 }
2441
2442 static uint8_t GFx03(uint8_t x)
2443 {
2444 return x ^ ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0);
2445 }
2446
2447 static uint8_t GFx09(uint8_t x)
2448 {
2449 uint8_t term2 = ROTL8(x, 3);
2450 uint8_t term3 = (BIT8(x, 0) ? 0x68 : 0) | (BIT8(x, 1) ? 0x14 : 0) |
2451 (BIT8(x, 2) ? 0x02 : 0);
2452 uint8_t term4 = (BIT8(x, 1) ? 0x20 : 0) | (BIT8(x, 2) ? 0x18 : 0);
2453 return x ^ term2 ^ term3 ^ term4;
2454 }
2455
2456 static uint8_t GFx0B(uint8_t x)
2457 {
2458 uint8_t term2 = ROTL8(x, 1);
2459 uint8_t term3 = (x << 3) | (BIT8(x, 0) ? 0x06 : 0) |
2460 (BIT8(x, 2) ? 0x01 : 0);
2461 uint8_t term4 = (BIT8(x, 0) ? 0x70 : 0) | (BIT8(x, 1) ? 0x06 : 0) |
2462 (BIT8(x, 2) ? 0x08 : 0);
2463 uint8_t term5 = (BIT8(x, 1) ? 0x30 : 0) | (BIT8(x, 2) ? 0x02 : 0);
2464 uint8_t term6 = BIT8(x, 2) ? 0x10 : 0;
2465 return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6;
2466 }
2467
2468 static uint8_t GFx0D(uint8_t x)
2469 {
2470 uint8_t term2 = ROTL8(x, 2);
2471 uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) |
2472 (BIT8(x, 2) ? 0x03 : 0);
2473 uint8_t term4 = (BIT8(x, 0) ? 0x58 : 0) | (BIT8(x, 1) ? 0x20 : 0);
2474 uint8_t term5 = (BIT8(x, 1) ? 0x08 : 0) | (BIT8(x, 2) ? 0x10 : 0);
2475 uint8_t term6 = BIT8(x, 2) ? 0x08 : 0;
2476 return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6;
2477 }
2478
2479 static uint8_t GFx0E(uint8_t x)
2480 {
2481 uint8_t term1 = ROTL8(x, 1);
2482 uint8_t term2 = (x << 2) | (BIT8(x, 2) ? 0x02 : 0) |
2483 (BIT8(x, 1) ? 0x01 : 0);
2484 uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) |
2485 (BIT8(x, 2) ? 0x01 : 0);
2486 uint8_t term4 = (BIT8(x, 0) ? 0x40 : 0) | (BIT8(x, 1) ? 0x28 : 0) |
2487 (BIT8(x, 2) ? 0x10 : 0);
2488 uint8_t term5 = (BIT8(x, 2) ? 0x08 : 0);
2489 return term1 ^ term2 ^ term3 ^ term4 ^ term5;
2490 }
2491
2492 #if defined(HOST_WORDS_BIGENDIAN)
2493 #define MCB(x, i, b) ((x)->u8[(i)*4 + (b)])
2494 #else
2495 #define MCB(x, i, b) ((x)->u8[15 - ((i)*4 + (b))])
2496 #endif
2497
2498 static void MixColumns(ppc_avr_t *r, ppc_avr_t *x)
2499 {
2500 int i;
2501 for (i = 0; i < 4; i++) {
2502 MCB(r, i, 0) = GFx02(MCB(x, i, 0)) ^ GFx03(MCB(x, i, 1)) ^
2503 MCB(x, i, 2) ^ MCB(x, i, 3);
2504 MCB(r, i, 1) = MCB(x, i, 0) ^ GFx02(MCB(x, i, 1)) ^
2505 GFx03(MCB(x, i, 2)) ^ MCB(x, i, 3);
2506 MCB(r, i, 2) = MCB(x, i, 0) ^ MCB(x, i, 1) ^
2507 GFx02(MCB(x, i, 2)) ^ GFx03(MCB(x, i, 3));
2508 MCB(r, i, 3) = GFx03(MCB(x, i, 0)) ^ MCB(x, i, 1) ^
2509 MCB(x, i, 2) ^ GFx02(MCB(x, i, 3));
2510 }
2511 }
2512
2513 static void InvMixColumns(ppc_avr_t *r, ppc_avr_t *x)
2514 {
2515 int i;
2516 for (i = 0; i < 4; i++) {
2517 MCB(r, i, 0) = GFx0E(MCB(x, i, 0)) ^ GFx0B(MCB(x, i, 1)) ^
2518 GFx0D(MCB(x, i, 2)) ^ GFx09(MCB(x, i, 3));
2519 MCB(r, i, 1) = GFx09(MCB(x, i, 0)) ^ GFx0E(MCB(x, i, 1)) ^
2520 GFx0B(MCB(x, i, 2)) ^ GFx0D(MCB(x, i, 3));
2521 MCB(r, i, 2) = GFx0D(MCB(x, i, 0)) ^ GFx09(MCB(x, i, 1)) ^
2522 GFx0E(MCB(x, i, 2)) ^ GFx0B(MCB(x, i, 3));
2523 MCB(r, i, 3) = GFx0B(MCB(x, i, 0)) ^ GFx0D(MCB(x, i, 1)) ^
2524 GFx09(MCB(x, i, 2)) ^ GFx0E(MCB(x, i, 3));
2525 }
2526 }
2527
2528 static void ShiftRows(ppc_avr_t *r, ppc_avr_t *x)
2529 {
2530 MCB(r, 0, 0) = MCB(x, 0, 0);
2531 MCB(r, 1, 0) = MCB(x, 1, 0);
2532 MCB(r, 2, 0) = MCB(x, 2, 0);
2533 MCB(r, 3, 0) = MCB(x, 3, 0);
2534
2535 MCB(r, 0, 1) = MCB(x, 1, 1);
2536 MCB(r, 1, 1) = MCB(x, 2, 1);
2537 MCB(r, 2, 1) = MCB(x, 3, 1);
2538 MCB(r, 3, 1) = MCB(x, 0, 1);
2539
2540 MCB(r, 0, 2) = MCB(x, 2, 2);
2541 MCB(r, 1, 2) = MCB(x, 3, 2);
2542 MCB(r, 2, 2) = MCB(x, 0, 2);
2543 MCB(r, 3, 2) = MCB(x, 1, 2);
2544
2545 MCB(r, 0, 3) = MCB(x, 3, 3);
2546 MCB(r, 1, 3) = MCB(x, 0, 3);
2547 MCB(r, 2, 3) = MCB(x, 1, 3);
2548 MCB(r, 3, 3) = MCB(x, 2, 3);
2549 }
2550
2551 static void InvShiftRows(ppc_avr_t *r, ppc_avr_t *x)
2552 {
2553 MCB(r, 0, 0) = MCB(x, 0, 0);
2554 MCB(r, 1, 0) = MCB(x, 1, 0);
2555 MCB(r, 2, 0) = MCB(x, 2, 0);
2556 MCB(r, 3, 0) = MCB(x, 3, 0);
2557
2558 MCB(r, 0, 1) = MCB(x, 3, 1);
2559 MCB(r, 1, 1) = MCB(x, 0, 1);
2560 MCB(r, 2, 1) = MCB(x, 1, 1);
2561 MCB(r, 3, 1) = MCB(x, 2, 1);
2562
2563 MCB(r, 0, 2) = MCB(x, 2, 2);
2564 MCB(r, 1, 2) = MCB(x, 3, 2);
2565 MCB(r, 2, 2) = MCB(x, 0, 2);
2566 MCB(r, 3, 2) = MCB(x, 1, 2);
2567
2568 MCB(r, 0, 3) = MCB(x, 1, 3);
2569 MCB(r, 1, 3) = MCB(x, 2, 3);
2570 MCB(r, 2, 3) = MCB(x, 3, 3);
2571 MCB(r, 3, 3) = MCB(x, 0, 3);
2572 }
2573
2574 #undef MCB
2575
2576 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2577 {
2578 SubBytes(r, a);
2579 }
2580
2581 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2582 {
2583 ppc_avr_t vtemp1, vtemp2, vtemp3;
2584 SubBytes(&vtemp1, a);
2585 ShiftRows(&vtemp2, &vtemp1);
2586 MixColumns(&vtemp3, &vtemp2);
2587 r->u64[0] = vtemp3.u64[0] ^ b->u64[0];
2588 r->u64[1] = vtemp3.u64[1] ^ b->u64[1];
2589 }
2590
2591 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2592 {
2593 ppc_avr_t vtemp1, vtemp2;
2594 SubBytes(&vtemp1, a);
2595 ShiftRows(&vtemp2, &vtemp1);
2596 r->u64[0] = vtemp2.u64[0] ^ b->u64[0];
2597 r->u64[1] = vtemp2.u64[1] ^ b->u64[1];
2598 }
2599
2600 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2601 {
2602 /* This differs from what is written in ISA V2.07. The RTL is */
2603 /* incorrect and will be fixed in V2.07B. */
2604 ppc_avr_t vtemp1, vtemp2, vtemp3;
2605 InvShiftRows(&vtemp1, a);
2606 InvSubBytes(&vtemp2, &vtemp1);
2607 vtemp3.u64[0] = vtemp2.u64[0] ^ b->u64[0];
2608 vtemp3.u64[1] = vtemp2.u64[1] ^ b->u64[1];
2609 InvMixColumns(r, &vtemp3);
2610 }
2611
2612 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2613 {
2614 ppc_avr_t vtemp1, vtemp2;
2615 InvShiftRows(&vtemp1, a);
2616 InvSubBytes(&vtemp2, &vtemp1);
2617 r->u64[0] = vtemp2.u64[0] ^ b->u64[0];
2618 r->u64[1] = vtemp2.u64[1] ^ b->u64[1];
2619 }
2620
2621 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2622 #if defined(HOST_WORDS_BIGENDIAN)
2623 #define EL_IDX(i) (i)
2624 #else
2625 #define EL_IDX(i) (3 - (i))
2626 #endif
2627
2628 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2629 {
2630 int st = (st_six & 0x10) != 0;
2631 int six = st_six & 0xF;
2632 int i;
2633
2634 VECTOR_FOR_INORDER_I(i, u32) {
2635 if (st == 0) {
2636 if ((six & (0x8 >> i)) == 0) {
2637 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2638 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2639 (a->u32[EL_IDX(i)] >> 3);
2640 } else { /* six.bit[i] == 1 */
2641 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2642 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2643 (a->u32[EL_IDX(i)] >> 10);
2644 }
2645 } else { /* st == 1 */
2646 if ((six & (0x8 >> i)) == 0) {
2647 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2648 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2649 ROTRu32(a->u32[EL_IDX(i)], 22);
2650 } else { /* six.bit[i] == 1 */
2651 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2652 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2653 ROTRu32(a->u32[EL_IDX(i)], 25);
2654 }
2655 }
2656 }
2657 }
2658
2659 #undef ROTRu32
2660 #undef EL_IDX
2661
2662 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2663 #if defined(HOST_WORDS_BIGENDIAN)
2664 #define EL_IDX(i) (i)
2665 #else
2666 #define EL_IDX(i) (1 - (i))
2667 #endif
2668
2669 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2670 {
2671 int st = (st_six & 0x10) != 0;
2672 int six = st_six & 0xF;
2673 int i;
2674
2675 VECTOR_FOR_INORDER_I(i, u64) {
2676 if (st == 0) {
2677 if ((six & (0x8 >> (2*i))) == 0) {
2678 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2679 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2680 (a->u64[EL_IDX(i)] >> 7);
2681 } else { /* six.bit[2*i] == 1 */
2682 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2683 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2684 (a->u64[EL_IDX(i)] >> 6);
2685 }
2686 } else { /* st == 1 */
2687 if ((six & (0x8 >> (2*i))) == 0) {
2688 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2689 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2690 ROTRu64(a->u64[EL_IDX(i)], 39);
2691 } else { /* six.bit[2*i] == 1 */
2692 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2693 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2694 ROTRu64(a->u64[EL_IDX(i)], 41);
2695 }
2696 }
2697 }
2698 }
2699
2700 #undef ROTRu64
2701 #undef EL_IDX
2702
2703 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2704 {
2705 int i;
2706 VECTOR_FOR_INORDER_I(i, u8) {
2707 int indexA = c->u8[i] >> 4;
2708 int indexB = c->u8[i] & 0xF;
2709 #if defined(HOST_WORDS_BIGENDIAN)
2710 r->u8[i] = a->u8[indexA] ^ b->u8[indexB];
2711 #else
2712 r->u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2713 #endif
2714 }
2715 }
2716
2717 #undef VECTOR_FOR_INORDER_I
2718 #undef HI_IDX
2719 #undef LO_IDX
2720
2721 /*****************************************************************************/
2722 /* SPE extension helpers */
2723 /* Use a table to make this quicker */
2724 static const uint8_t hbrev[16] = {
2725 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2726 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2727 };
2728
2729 static inline uint8_t byte_reverse(uint8_t val)
2730 {
2731 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2732 }
2733
2734 static inline uint32_t word_reverse(uint32_t val)
2735 {
2736 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2737 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2738 }
2739
2740 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2741 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2742 {
2743 uint32_t a, b, d, mask;
2744
2745 mask = UINT32_MAX >> (32 - MASKBITS);
2746 a = arg1 & mask;
2747 b = arg2 & mask;
2748 d = word_reverse(1 + word_reverse(a | ~b));
2749 return (arg1 & ~mask) | (d & b);
2750 }
2751
2752 uint32_t helper_cntlsw32(uint32_t val)
2753 {
2754 if (val & 0x80000000) {
2755 return clz32(~val);
2756 } else {
2757 return clz32(val);
2758 }
2759 }
2760
2761 uint32_t helper_cntlzw32(uint32_t val)
2762 {
2763 return clz32(val);
2764 }
2765
2766 /* 440 specific */
2767 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2768 target_ulong low, uint32_t update_Rc)
2769 {
2770 target_ulong mask;
2771 int i;
2772
2773 i = 1;
2774 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2775 if ((high & mask) == 0) {
2776 if (update_Rc) {
2777 env->crf[0] = 0x4;
2778 }
2779 goto done;
2780 }
2781 i++;
2782 }
2783 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2784 if ((low & mask) == 0) {
2785 if (update_Rc) {
2786 env->crf[0] = 0x8;
2787 }
2788 goto done;
2789 }
2790 i++;
2791 }
2792 if (update_Rc) {
2793 env->crf[0] = 0x2;
2794 }
2795 done:
2796 env->xer = (env->xer & ~0x7F) | i;
2797 if (update_Rc) {
2798 env->crf[0] |= xer_so;
2799 }
2800 return i;
2801 }