block/nbd: split nbd_establish_connection out of nbd_client_connect
[qemu.git] / target / ppc / int_helper.c
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "exec/helper-proto.h"
26 #include "crypto/aes.h"
27 #include "fpu/softfloat.h"
28 #include "qapi/error.h"
29 #include "qemu/guest-random.h"
30
31 #include "helper_regs.h"
32 /*****************************************************************************/
33 /* Fixed point operations helpers */
34
35 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
36 {
37 if (unlikely(ov)) {
38 env->so = env->ov = 1;
39 } else {
40 env->ov = 0;
41 }
42 }
43
44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
45 uint32_t oe)
46 {
47 uint64_t rt = 0;
48 int overflow = 0;
49
50 uint64_t dividend = (uint64_t)ra << 32;
51 uint64_t divisor = (uint32_t)rb;
52
53 if (unlikely(divisor == 0)) {
54 overflow = 1;
55 } else {
56 rt = dividend / divisor;
57 overflow = rt > UINT32_MAX;
58 }
59
60 if (unlikely(overflow)) {
61 rt = 0; /* Undefined */
62 }
63
64 if (oe) {
65 helper_update_ov_legacy(env, overflow);
66 }
67
68 return (target_ulong)rt;
69 }
70
71 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
72 uint32_t oe)
73 {
74 int64_t rt = 0;
75 int overflow = 0;
76
77 int64_t dividend = (int64_t)ra << 32;
78 int64_t divisor = (int64_t)((int32_t)rb);
79
80 if (unlikely((divisor == 0) ||
81 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
82 overflow = 1;
83 } else {
84 rt = dividend / divisor;
85 overflow = rt != (int32_t)rt;
86 }
87
88 if (unlikely(overflow)) {
89 rt = 0; /* Undefined */
90 }
91
92 if (oe) {
93 helper_update_ov_legacy(env, overflow);
94 }
95
96 return (target_ulong)rt;
97 }
98
99 #if defined(TARGET_PPC64)
100
101 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
102 {
103 uint64_t rt = 0;
104 int overflow = 0;
105
106 overflow = divu128(&rt, &ra, rb);
107
108 if (unlikely(overflow)) {
109 rt = 0; /* Undefined */
110 }
111
112 if (oe) {
113 helper_update_ov_legacy(env, overflow);
114 }
115
116 return rt;
117 }
118
119 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
120 {
121 int64_t rt = 0;
122 int64_t ra = (int64_t)rau;
123 int64_t rb = (int64_t)rbu;
124 int overflow = divs128(&rt, &ra, rb);
125
126 if (unlikely(overflow)) {
127 rt = 0; /* Undefined */
128 }
129
130 if (oe) {
131 helper_update_ov_legacy(env, overflow);
132 }
133
134 return rt;
135 }
136
137 #endif
138
139
140 #if defined(TARGET_PPC64)
141 /* if x = 0xab, returns 0xababababababababa */
142 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
143
144 /*
145 * subtract 1 from each byte, and with inverse, check if MSB is set at each
146 * byte.
147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
148 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
149 */
150 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
151
152 /* When you XOR the pattern and there is a match, that byte will be zero */
153 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
154
155 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
156 {
157 return hasvalue(rb, ra) ? CRF_GT : 0;
158 }
159
160 #undef pattern
161 #undef haszero
162 #undef hasvalue
163
164 /*
165 * Return a random number.
166 */
167 uint64_t helper_darn32(void)
168 {
169 Error *err = NULL;
170 uint32_t ret;
171
172 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
173 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
174 error_get_pretty(err));
175 error_free(err);
176 return -1;
177 }
178
179 return ret;
180 }
181
182 uint64_t helper_darn64(void)
183 {
184 Error *err = NULL;
185 uint64_t ret;
186
187 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
188 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
189 error_get_pretty(err));
190 error_free(err);
191 return -1;
192 }
193
194 return ret;
195 }
196
197 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
198 {
199 int i;
200 uint64_t ra = 0;
201
202 for (i = 0; i < 8; i++) {
203 int index = (rs >> (i * 8)) & 0xFF;
204 if (index < 64) {
205 if (rb & PPC_BIT(index)) {
206 ra |= 1 << i;
207 }
208 }
209 }
210 return ra;
211 }
212
213 #endif
214
215 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
216 {
217 target_ulong mask = 0xff;
218 target_ulong ra = 0;
219 int i;
220
221 for (i = 0; i < sizeof(target_ulong); i++) {
222 if ((rs & mask) == (rb & mask)) {
223 ra |= mask;
224 }
225 mask <<= 8;
226 }
227 return ra;
228 }
229
230 /* shift right arithmetic helper */
231 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
232 target_ulong shift)
233 {
234 int32_t ret;
235
236 if (likely(!(shift & 0x20))) {
237 if (likely((uint32_t)shift != 0)) {
238 shift &= 0x1f;
239 ret = (int32_t)value >> shift;
240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
241 env->ca32 = env->ca = 0;
242 } else {
243 env->ca32 = env->ca = 1;
244 }
245 } else {
246 ret = (int32_t)value;
247 env->ca32 = env->ca = 0;
248 }
249 } else {
250 ret = (int32_t)value >> 31;
251 env->ca32 = env->ca = (ret != 0);
252 }
253 return (target_long)ret;
254 }
255
256 #if defined(TARGET_PPC64)
257 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
258 target_ulong shift)
259 {
260 int64_t ret;
261
262 if (likely(!(shift & 0x40))) {
263 if (likely((uint64_t)shift != 0)) {
264 shift &= 0x3f;
265 ret = (int64_t)value >> shift;
266 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
267 env->ca32 = env->ca = 0;
268 } else {
269 env->ca32 = env->ca = 1;
270 }
271 } else {
272 ret = (int64_t)value;
273 env->ca32 = env->ca = 0;
274 }
275 } else {
276 ret = (int64_t)value >> 63;
277 env->ca32 = env->ca = (ret != 0);
278 }
279 return ret;
280 }
281 #endif
282
283 #if defined(TARGET_PPC64)
284 target_ulong helper_popcntb(target_ulong val)
285 {
286 /* Note that we don't fold past bytes */
287 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
288 0x5555555555555555ULL);
289 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
290 0x3333333333333333ULL);
291 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
292 0x0f0f0f0f0f0f0f0fULL);
293 return val;
294 }
295
296 target_ulong helper_popcntw(target_ulong val)
297 {
298 /* Note that we don't fold past words. */
299 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
300 0x5555555555555555ULL);
301 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
302 0x3333333333333333ULL);
303 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
304 0x0f0f0f0f0f0f0f0fULL);
305 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
306 0x00ff00ff00ff00ffULL);
307 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
308 0x0000ffff0000ffffULL);
309 return val;
310 }
311 #else
312 target_ulong helper_popcntb(target_ulong val)
313 {
314 /* Note that we don't fold past bytes */
315 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
316 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
317 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
318 return val;
319 }
320 #endif
321
322 /*****************************************************************************/
323 /* PowerPC 601 specific instructions (POWER bridge) */
324 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
325 {
326 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
327
328 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
329 (int32_t)arg2 == 0) {
330 env->spr[SPR_MQ] = 0;
331 return INT32_MIN;
332 } else {
333 env->spr[SPR_MQ] = tmp % arg2;
334 return tmp / (int32_t)arg2;
335 }
336 }
337
338 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
339 target_ulong arg2)
340 {
341 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
342
343 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
344 (int32_t)arg2 == 0) {
345 env->so = env->ov = 1;
346 env->spr[SPR_MQ] = 0;
347 return INT32_MIN;
348 } else {
349 env->spr[SPR_MQ] = tmp % arg2;
350 tmp /= (int32_t)arg2;
351 if ((int32_t)tmp != tmp) {
352 env->so = env->ov = 1;
353 } else {
354 env->ov = 0;
355 }
356 return tmp;
357 }
358 }
359
360 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
361 target_ulong arg2)
362 {
363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
364 (int32_t)arg2 == 0) {
365 env->spr[SPR_MQ] = 0;
366 return INT32_MIN;
367 } else {
368 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
369 return (int32_t)arg1 / (int32_t)arg2;
370 }
371 }
372
373 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
374 target_ulong arg2)
375 {
376 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
377 (int32_t)arg2 == 0) {
378 env->so = env->ov = 1;
379 env->spr[SPR_MQ] = 0;
380 return INT32_MIN;
381 } else {
382 env->ov = 0;
383 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
384 return (int32_t)arg1 / (int32_t)arg2;
385 }
386 }
387
388 /*****************************************************************************/
389 /* 602 specific instructions */
390 /* mfrom is the most crazy instruction ever seen, imho ! */
391 /* Real implementation uses a ROM table. Do the same */
392 /*
393 * Extremely decomposed:
394 * -arg / 256
395 * return 256 * log10(10 + 1.0) + 0.5
396 */
397 #if !defined(CONFIG_USER_ONLY)
398 target_ulong helper_602_mfrom(target_ulong arg)
399 {
400 if (likely(arg < 602)) {
401 #include "mfrom_table.inc.c"
402 return mfrom_ROM_table[arg];
403 } else {
404 return 0;
405 }
406 }
407 #endif
408
409 /*****************************************************************************/
410 /* Altivec extension helpers */
411 #if defined(HOST_WORDS_BIGENDIAN)
412 #define VECTOR_FOR_INORDER_I(index, element) \
413 for (index = 0; index < ARRAY_SIZE(r->element); index++)
414 #else
415 #define VECTOR_FOR_INORDER_I(index, element) \
416 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
417 #endif
418
419 /* Saturating arithmetic helpers. */
420 #define SATCVT(from, to, from_type, to_type, min, max) \
421 static inline to_type cvt##from##to(from_type x, int *sat) \
422 { \
423 to_type r; \
424 \
425 if (x < (from_type)min) { \
426 r = min; \
427 *sat = 1; \
428 } else if (x > (from_type)max) { \
429 r = max; \
430 *sat = 1; \
431 } else { \
432 r = x; \
433 } \
434 return r; \
435 }
436 #define SATCVTU(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
438 { \
439 to_type r; \
440 \
441 if (x > (from_type)max) { \
442 r = max; \
443 *sat = 1; \
444 } else { \
445 r = x; \
446 } \
447 return r; \
448 }
449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
452
453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
459 #undef SATCVT
460 #undef SATCVTU
461
462 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
463 {
464 env->vscr = vscr & ~(1u << VSCR_SAT);
465 /* Which bit we set is completely arbitrary, but clear the rest. */
466 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
467 env->vscr_sat.u64[1] = 0;
468 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
469 }
470
471 uint32_t helper_mfvscr(CPUPPCState *env)
472 {
473 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
474 return env->vscr | (sat << VSCR_SAT);
475 }
476
477 static inline void set_vscr_sat(CPUPPCState *env)
478 {
479 /* The choice of non-zero value is arbitrary. */
480 env->vscr_sat.u32[0] = 1;
481 }
482
483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
484 {
485 int i;
486
487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
488 r->u32[i] = ~a->u32[i] < b->u32[i];
489 }
490 }
491
492 /* vprtybw */
493 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
494 {
495 int i;
496 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
497 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
498 res ^= res >> 8;
499 r->u32[i] = res & 1;
500 }
501 }
502
503 /* vprtybd */
504 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
505 {
506 int i;
507 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
508 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
509 res ^= res >> 16;
510 res ^= res >> 8;
511 r->u64[i] = res & 1;
512 }
513 }
514
515 /* vprtybq */
516 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
517 {
518 uint64_t res = b->u64[0] ^ b->u64[1];
519 res ^= res >> 32;
520 res ^= res >> 16;
521 res ^= res >> 8;
522 r->VsrD(1) = res & 1;
523 r->VsrD(0) = 0;
524 }
525
526 #define VARITH_DO(name, op, element) \
527 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
528 { \
529 int i; \
530 \
531 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
532 r->element[i] = a->element[i] op b->element[i]; \
533 } \
534 }
535 VARITH_DO(muluwm, *, u32)
536 #undef VARITH_DO
537 #undef VARITH
538
539 #define VARITHFP(suffix, func) \
540 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
541 ppc_avr_t *b) \
542 { \
543 int i; \
544 \
545 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
546 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
547 } \
548 }
549 VARITHFP(addfp, float32_add)
550 VARITHFP(subfp, float32_sub)
551 VARITHFP(minfp, float32_min)
552 VARITHFP(maxfp, float32_max)
553 #undef VARITHFP
554
555 #define VARITHFPFMA(suffix, type) \
556 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
557 ppc_avr_t *b, ppc_avr_t *c) \
558 { \
559 int i; \
560 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
561 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
562 type, &env->vec_status); \
563 } \
564 }
565 VARITHFPFMA(maddfp, 0);
566 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
567 #undef VARITHFPFMA
568
569 #define VARITHSAT_CASE(type, op, cvt, element) \
570 { \
571 type result = (type)a->element[i] op (type)b->element[i]; \
572 r->element[i] = cvt(result, &sat); \
573 }
574
575 #define VARITHSAT_DO(name, op, optype, cvt, element) \
576 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
577 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
578 { \
579 int sat = 0; \
580 int i; \
581 \
582 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
583 VARITHSAT_CASE(optype, op, cvt, element); \
584 } \
585 if (sat) { \
586 vscr_sat->u32[0] = 1; \
587 } \
588 }
589 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
590 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
591 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
592 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
593 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
594 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
595 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
596 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
597 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
598 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
599 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
600 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
601 #undef VARITHSAT_CASE
602 #undef VARITHSAT_DO
603 #undef VARITHSAT_SIGNED
604 #undef VARITHSAT_UNSIGNED
605
606 #define VAVG_DO(name, element, etype) \
607 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
608 { \
609 int i; \
610 \
611 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
612 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
613 r->element[i] = x >> 1; \
614 } \
615 }
616
617 #define VAVG(type, signed_element, signed_type, unsigned_element, \
618 unsigned_type) \
619 VAVG_DO(avgs##type, signed_element, signed_type) \
620 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
621 VAVG(b, s8, int16_t, u8, uint16_t)
622 VAVG(h, s16, int32_t, u16, uint32_t)
623 VAVG(w, s32, int64_t, u32, uint64_t)
624 #undef VAVG_DO
625 #undef VAVG
626
627 #define VABSDU_DO(name, element) \
628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
629 { \
630 int i; \
631 \
632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
633 r->element[i] = (a->element[i] > b->element[i]) ? \
634 (a->element[i] - b->element[i]) : \
635 (b->element[i] - a->element[i]); \
636 } \
637 }
638
639 /*
640 * VABSDU - Vector absolute difference unsigned
641 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
642 * element - element type to access from vector
643 */
644 #define VABSDU(type, element) \
645 VABSDU_DO(absdu##type, element)
646 VABSDU(b, u8)
647 VABSDU(h, u16)
648 VABSDU(w, u32)
649 #undef VABSDU_DO
650 #undef VABSDU
651
652 #define VCF(suffix, cvt, element) \
653 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
654 ppc_avr_t *b, uint32_t uim) \
655 { \
656 int i; \
657 \
658 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
659 float32 t = cvt(b->element[i], &env->vec_status); \
660 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
661 } \
662 }
663 VCF(ux, uint32_to_float32, u32)
664 VCF(sx, int32_to_float32, s32)
665 #undef VCF
666
667 #define VCMP_DO(suffix, compare, element, record) \
668 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
669 ppc_avr_t *a, ppc_avr_t *b) \
670 { \
671 uint64_t ones = (uint64_t)-1; \
672 uint64_t all = ones; \
673 uint64_t none = 0; \
674 int i; \
675 \
676 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
677 uint64_t result = (a->element[i] compare b->element[i] ? \
678 ones : 0x0); \
679 switch (sizeof(a->element[0])) { \
680 case 8: \
681 r->u64[i] = result; \
682 break; \
683 case 4: \
684 r->u32[i] = result; \
685 break; \
686 case 2: \
687 r->u16[i] = result; \
688 break; \
689 case 1: \
690 r->u8[i] = result; \
691 break; \
692 } \
693 all &= result; \
694 none |= result; \
695 } \
696 if (record) { \
697 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
698 } \
699 }
700 #define VCMP(suffix, compare, element) \
701 VCMP_DO(suffix, compare, element, 0) \
702 VCMP_DO(suffix##_dot, compare, element, 1)
703 VCMP(equb, ==, u8)
704 VCMP(equh, ==, u16)
705 VCMP(equw, ==, u32)
706 VCMP(equd, ==, u64)
707 VCMP(gtub, >, u8)
708 VCMP(gtuh, >, u16)
709 VCMP(gtuw, >, u32)
710 VCMP(gtud, >, u64)
711 VCMP(gtsb, >, s8)
712 VCMP(gtsh, >, s16)
713 VCMP(gtsw, >, s32)
714 VCMP(gtsd, >, s64)
715 #undef VCMP_DO
716 #undef VCMP
717
718 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
719 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
720 ppc_avr_t *a, ppc_avr_t *b) \
721 { \
722 etype ones = (etype)-1; \
723 etype all = ones; \
724 etype result, none = 0; \
725 int i; \
726 \
727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
728 if (cmpzero) { \
729 result = ((a->element[i] == 0) \
730 || (b->element[i] == 0) \
731 || (a->element[i] != b->element[i]) ? \
732 ones : 0x0); \
733 } else { \
734 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
735 } \
736 r->element[i] = result; \
737 all &= result; \
738 none |= result; \
739 } \
740 if (record) { \
741 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
742 } \
743 }
744
745 /*
746 * VCMPNEZ - Vector compare not equal to zero
747 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
748 * element - element type to access from vector
749 */
750 #define VCMPNE(suffix, element, etype, cmpzero) \
751 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
752 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
753 VCMPNE(zb, u8, uint8_t, 1)
754 VCMPNE(zh, u16, uint16_t, 1)
755 VCMPNE(zw, u32, uint32_t, 1)
756 VCMPNE(b, u8, uint8_t, 0)
757 VCMPNE(h, u16, uint16_t, 0)
758 VCMPNE(w, u32, uint32_t, 0)
759 #undef VCMPNE_DO
760 #undef VCMPNE
761
762 #define VCMPFP_DO(suffix, compare, order, record) \
763 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
764 ppc_avr_t *a, ppc_avr_t *b) \
765 { \
766 uint32_t ones = (uint32_t)-1; \
767 uint32_t all = ones; \
768 uint32_t none = 0; \
769 int i; \
770 \
771 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
772 uint32_t result; \
773 FloatRelation rel = \
774 float32_compare_quiet(a->f32[i], b->f32[i], \
775 &env->vec_status); \
776 if (rel == float_relation_unordered) { \
777 result = 0; \
778 } else if (rel compare order) { \
779 result = ones; \
780 } else { \
781 result = 0; \
782 } \
783 r->u32[i] = result; \
784 all &= result; \
785 none |= result; \
786 } \
787 if (record) { \
788 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
789 } \
790 }
791 #define VCMPFP(suffix, compare, order) \
792 VCMPFP_DO(suffix, compare, order, 0) \
793 VCMPFP_DO(suffix##_dot, compare, order, 1)
794 VCMPFP(eqfp, ==, float_relation_equal)
795 VCMPFP(gefp, !=, float_relation_less)
796 VCMPFP(gtfp, ==, float_relation_greater)
797 #undef VCMPFP_DO
798 #undef VCMPFP
799
800 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
801 ppc_avr_t *a, ppc_avr_t *b, int record)
802 {
803 int i;
804 int all_in = 0;
805
806 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
807 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
808 &env->vec_status);
809 if (le_rel == float_relation_unordered) {
810 r->u32[i] = 0xc0000000;
811 all_in = 1;
812 } else {
813 float32 bneg = float32_chs(b->f32[i]);
814 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
815 &env->vec_status);
816 int le = le_rel != float_relation_greater;
817 int ge = ge_rel != float_relation_less;
818
819 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
820 all_in |= (!le | !ge);
821 }
822 }
823 if (record) {
824 env->crf[6] = (all_in == 0) << 1;
825 }
826 }
827
828 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
829 {
830 vcmpbfp_internal(env, r, a, b, 0);
831 }
832
833 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
834 ppc_avr_t *b)
835 {
836 vcmpbfp_internal(env, r, a, b, 1);
837 }
838
839 #define VCT(suffix, satcvt, element) \
840 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
841 ppc_avr_t *b, uint32_t uim) \
842 { \
843 int i; \
844 int sat = 0; \
845 float_status s = env->vec_status; \
846 \
847 set_float_rounding_mode(float_round_to_zero, &s); \
848 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
849 if (float32_is_any_nan(b->f32[i])) { \
850 r->element[i] = 0; \
851 } else { \
852 float64 t = float32_to_float64(b->f32[i], &s); \
853 int64_t j; \
854 \
855 t = float64_scalbn(t, uim, &s); \
856 j = float64_to_int64(t, &s); \
857 r->element[i] = satcvt(j, &sat); \
858 } \
859 } \
860 if (sat) { \
861 set_vscr_sat(env); \
862 } \
863 }
864 VCT(uxs, cvtsduw, u32)
865 VCT(sxs, cvtsdsw, s32)
866 #undef VCT
867
868 target_ulong helper_vclzlsbb(ppc_avr_t *r)
869 {
870 target_ulong count = 0;
871 int i;
872 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
873 if (r->VsrB(i) & 0x01) {
874 break;
875 }
876 count++;
877 }
878 return count;
879 }
880
881 target_ulong helper_vctzlsbb(ppc_avr_t *r)
882 {
883 target_ulong count = 0;
884 int i;
885 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
886 if (r->VsrB(i) & 0x01) {
887 break;
888 }
889 count++;
890 }
891 return count;
892 }
893
894 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
895 ppc_avr_t *b, ppc_avr_t *c)
896 {
897 int sat = 0;
898 int i;
899
900 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
901 int32_t prod = a->s16[i] * b->s16[i];
902 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
903
904 r->s16[i] = cvtswsh(t, &sat);
905 }
906
907 if (sat) {
908 set_vscr_sat(env);
909 }
910 }
911
912 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
913 ppc_avr_t *b, ppc_avr_t *c)
914 {
915 int sat = 0;
916 int i;
917
918 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
919 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
920 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
921 r->s16[i] = cvtswsh(t, &sat);
922 }
923
924 if (sat) {
925 set_vscr_sat(env);
926 }
927 }
928
929 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
930 {
931 int i;
932
933 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
934 int32_t prod = a->s16[i] * b->s16[i];
935 r->s16[i] = (int16_t) (prod + c->s16[i]);
936 }
937 }
938
939 #define VMRG_DO(name, element, access, ofs) \
940 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
941 { \
942 ppc_avr_t result; \
943 int i, half = ARRAY_SIZE(r->element) / 2; \
944 \
945 for (i = 0; i < half; i++) { \
946 result.access(i * 2 + 0) = a->access(i + ofs); \
947 result.access(i * 2 + 1) = b->access(i + ofs); \
948 } \
949 *r = result; \
950 }
951
952 #define VMRG(suffix, element, access) \
953 VMRG_DO(mrgl##suffix, element, access, half) \
954 VMRG_DO(mrgh##suffix, element, access, 0)
955 VMRG(b, u8, VsrB)
956 VMRG(h, u16, VsrH)
957 VMRG(w, u32, VsrW)
958 #undef VMRG_DO
959 #undef VMRG
960
961 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
962 ppc_avr_t *b, ppc_avr_t *c)
963 {
964 int32_t prod[16];
965 int i;
966
967 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
968 prod[i] = (int32_t)a->s8[i] * b->u8[i];
969 }
970
971 VECTOR_FOR_INORDER_I(i, s32) {
972 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
973 prod[4 * i + 2] + prod[4 * i + 3];
974 }
975 }
976
977 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
978 ppc_avr_t *b, ppc_avr_t *c)
979 {
980 int32_t prod[8];
981 int i;
982
983 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
984 prod[i] = a->s16[i] * b->s16[i];
985 }
986
987 VECTOR_FOR_INORDER_I(i, s32) {
988 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
989 }
990 }
991
992 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
993 ppc_avr_t *b, ppc_avr_t *c)
994 {
995 int32_t prod[8];
996 int i;
997 int sat = 0;
998
999 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1000 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1001 }
1002
1003 VECTOR_FOR_INORDER_I(i, s32) {
1004 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1005
1006 r->u32[i] = cvtsdsw(t, &sat);
1007 }
1008
1009 if (sat) {
1010 set_vscr_sat(env);
1011 }
1012 }
1013
1014 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1015 ppc_avr_t *b, ppc_avr_t *c)
1016 {
1017 uint16_t prod[16];
1018 int i;
1019
1020 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1021 prod[i] = a->u8[i] * b->u8[i];
1022 }
1023
1024 VECTOR_FOR_INORDER_I(i, u32) {
1025 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1026 prod[4 * i + 2] + prod[4 * i + 3];
1027 }
1028 }
1029
1030 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1031 ppc_avr_t *b, ppc_avr_t *c)
1032 {
1033 uint32_t prod[8];
1034 int i;
1035
1036 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1037 prod[i] = a->u16[i] * b->u16[i];
1038 }
1039
1040 VECTOR_FOR_INORDER_I(i, u32) {
1041 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1042 }
1043 }
1044
1045 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1046 ppc_avr_t *b, ppc_avr_t *c)
1047 {
1048 uint32_t prod[8];
1049 int i;
1050 int sat = 0;
1051
1052 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1053 prod[i] = a->u16[i] * b->u16[i];
1054 }
1055
1056 VECTOR_FOR_INORDER_I(i, s32) {
1057 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1058
1059 r->u32[i] = cvtuduw(t, &sat);
1060 }
1061
1062 if (sat) {
1063 set_vscr_sat(env);
1064 }
1065 }
1066
1067 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1068 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1069 { \
1070 int i; \
1071 \
1072 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1073 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1074 (cast)b->mul_access(i); \
1075 } \
1076 }
1077
1078 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1079 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1080 { \
1081 int i; \
1082 \
1083 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1084 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1085 (cast)b->mul_access(i + 1); \
1086 } \
1087 }
1088
1089 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1090 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1091 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1092 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1093 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1094 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1095 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1096 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1097 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1098 #undef VMUL_DO_EVN
1099 #undef VMUL_DO_ODD
1100 #undef VMUL
1101
1102 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1103 ppc_avr_t *c)
1104 {
1105 ppc_avr_t result;
1106 int i;
1107
1108 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1109 int s = c->VsrB(i) & 0x1f;
1110 int index = s & 0xf;
1111
1112 if (s & 0x10) {
1113 result.VsrB(i) = b->VsrB(index);
1114 } else {
1115 result.VsrB(i) = a->VsrB(index);
1116 }
1117 }
1118 *r = result;
1119 }
1120
1121 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1122 ppc_avr_t *c)
1123 {
1124 ppc_avr_t result;
1125 int i;
1126
1127 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1128 int s = c->VsrB(i) & 0x1f;
1129 int index = 15 - (s & 0xf);
1130
1131 if (s & 0x10) {
1132 result.VsrB(i) = a->VsrB(index);
1133 } else {
1134 result.VsrB(i) = b->VsrB(index);
1135 }
1136 }
1137 *r = result;
1138 }
1139
1140 #if defined(HOST_WORDS_BIGENDIAN)
1141 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1142 #define VBPERMD_INDEX(i) (i)
1143 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1144 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1145 #else
1146 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1147 #define VBPERMD_INDEX(i) (1 - i)
1148 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1149 #define EXTRACT_BIT(avr, i, index) \
1150 (extract64((avr)->u64[1 - i], 63 - index, 1))
1151 #endif
1152
1153 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1154 {
1155 int i, j;
1156 ppc_avr_t result = { .u64 = { 0, 0 } };
1157 VECTOR_FOR_INORDER_I(i, u64) {
1158 for (j = 0; j < 8; j++) {
1159 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1160 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1161 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1162 }
1163 }
1164 }
1165 *r = result;
1166 }
1167
1168 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1169 {
1170 int i;
1171 uint64_t perm = 0;
1172
1173 VECTOR_FOR_INORDER_I(i, u8) {
1174 int index = VBPERMQ_INDEX(b, i);
1175
1176 if (index < 128) {
1177 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1178 if (a->u64[VBPERMQ_DW(index)] & mask) {
1179 perm |= (0x8000 >> i);
1180 }
1181 }
1182 }
1183
1184 r->VsrD(0) = perm;
1185 r->VsrD(1) = 0;
1186 }
1187
1188 #undef VBPERMQ_INDEX
1189 #undef VBPERMQ_DW
1190
1191 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1192 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1193 { \
1194 int i, j; \
1195 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1196 \
1197 VECTOR_FOR_INORDER_I(i, srcfld) { \
1198 prod[i] = 0; \
1199 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1200 if (a->srcfld[i] & (1ull << j)) { \
1201 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1202 } \
1203 } \
1204 } \
1205 \
1206 VECTOR_FOR_INORDER_I(i, trgfld) { \
1207 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1208 } \
1209 }
1210
1211 PMSUM(vpmsumb, u8, u16, uint16_t)
1212 PMSUM(vpmsumh, u16, u32, uint32_t)
1213 PMSUM(vpmsumw, u32, u64, uint64_t)
1214
1215 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1216 {
1217
1218 #ifdef CONFIG_INT128
1219 int i, j;
1220 __uint128_t prod[2];
1221
1222 VECTOR_FOR_INORDER_I(i, u64) {
1223 prod[i] = 0;
1224 for (j = 0; j < 64; j++) {
1225 if (a->u64[i] & (1ull << j)) {
1226 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1227 }
1228 }
1229 }
1230
1231 r->u128 = prod[0] ^ prod[1];
1232
1233 #else
1234 int i, j;
1235 ppc_avr_t prod[2];
1236
1237 VECTOR_FOR_INORDER_I(i, u64) {
1238 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1239 for (j = 0; j < 64; j++) {
1240 if (a->u64[i] & (1ull << j)) {
1241 ppc_avr_t bshift;
1242 if (j == 0) {
1243 bshift.VsrD(0) = 0;
1244 bshift.VsrD(1) = b->u64[i];
1245 } else {
1246 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1247 bshift.VsrD(1) = b->u64[i] << j;
1248 }
1249 prod[i].VsrD(1) ^= bshift.VsrD(1);
1250 prod[i].VsrD(0) ^= bshift.VsrD(0);
1251 }
1252 }
1253 }
1254
1255 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1256 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1257 #endif
1258 }
1259
1260
1261 #if defined(HOST_WORDS_BIGENDIAN)
1262 #define PKBIG 1
1263 #else
1264 #define PKBIG 0
1265 #endif
1266 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1267 {
1268 int i, j;
1269 ppc_avr_t result;
1270 #if defined(HOST_WORDS_BIGENDIAN)
1271 const ppc_avr_t *x[2] = { a, b };
1272 #else
1273 const ppc_avr_t *x[2] = { b, a };
1274 #endif
1275
1276 VECTOR_FOR_INORDER_I(i, u64) {
1277 VECTOR_FOR_INORDER_I(j, u32) {
1278 uint32_t e = x[i]->u32[j];
1279
1280 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1281 ((e >> 6) & 0x3e0) |
1282 ((e >> 3) & 0x1f));
1283 }
1284 }
1285 *r = result;
1286 }
1287
1288 #define VPK(suffix, from, to, cvt, dosat) \
1289 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1290 ppc_avr_t *a, ppc_avr_t *b) \
1291 { \
1292 int i; \
1293 int sat = 0; \
1294 ppc_avr_t result; \
1295 ppc_avr_t *a0 = PKBIG ? a : b; \
1296 ppc_avr_t *a1 = PKBIG ? b : a; \
1297 \
1298 VECTOR_FOR_INORDER_I(i, from) { \
1299 result.to[i] = cvt(a0->from[i], &sat); \
1300 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1301 } \
1302 *r = result; \
1303 if (dosat && sat) { \
1304 set_vscr_sat(env); \
1305 } \
1306 }
1307 #define I(x, y) (x)
1308 VPK(shss, s16, s8, cvtshsb, 1)
1309 VPK(shus, s16, u8, cvtshub, 1)
1310 VPK(swss, s32, s16, cvtswsh, 1)
1311 VPK(swus, s32, u16, cvtswuh, 1)
1312 VPK(sdss, s64, s32, cvtsdsw, 1)
1313 VPK(sdus, s64, u32, cvtsduw, 1)
1314 VPK(uhus, u16, u8, cvtuhub, 1)
1315 VPK(uwus, u32, u16, cvtuwuh, 1)
1316 VPK(udus, u64, u32, cvtuduw, 1)
1317 VPK(uhum, u16, u8, I, 0)
1318 VPK(uwum, u32, u16, I, 0)
1319 VPK(udum, u64, u32, I, 0)
1320 #undef I
1321 #undef VPK
1322 #undef PKBIG
1323
1324 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1325 {
1326 int i;
1327
1328 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1329 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1330 }
1331 }
1332
1333 #define VRFI(suffix, rounding) \
1334 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1335 ppc_avr_t *b) \
1336 { \
1337 int i; \
1338 float_status s = env->vec_status; \
1339 \
1340 set_float_rounding_mode(rounding, &s); \
1341 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1342 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1343 } \
1344 }
1345 VRFI(n, float_round_nearest_even)
1346 VRFI(m, float_round_down)
1347 VRFI(p, float_round_up)
1348 VRFI(z, float_round_to_zero)
1349 #undef VRFI
1350
1351 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1352 {
1353 int i;
1354
1355 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1356 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1357
1358 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1359 }
1360 }
1361
1362 #define VRLMI(name, size, element, insert) \
1363 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1364 { \
1365 int i; \
1366 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1367 uint##size##_t src1 = a->element[i]; \
1368 uint##size##_t src2 = b->element[i]; \
1369 uint##size##_t src3 = r->element[i]; \
1370 uint##size##_t begin, end, shift, mask, rot_val; \
1371 \
1372 shift = extract##size(src2, 0, 6); \
1373 end = extract##size(src2, 8, 6); \
1374 begin = extract##size(src2, 16, 6); \
1375 rot_val = rol##size(src1, shift); \
1376 mask = mask_u##size(begin, end); \
1377 if (insert) { \
1378 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1379 } else { \
1380 r->element[i] = (rot_val & mask); \
1381 } \
1382 } \
1383 }
1384
1385 VRLMI(vrldmi, 64, u64, 1);
1386 VRLMI(vrlwmi, 32, u32, 1);
1387 VRLMI(vrldnm, 64, u64, 0);
1388 VRLMI(vrlwnm, 32, u32, 0);
1389
1390 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1391 ppc_avr_t *c)
1392 {
1393 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1394 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1395 }
1396
1397 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1398 {
1399 int i;
1400
1401 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1402 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1403 }
1404 }
1405
1406 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1407 {
1408 int i;
1409
1410 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1411 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1412 }
1413 }
1414
1415 #if defined(HOST_WORDS_BIGENDIAN)
1416 #define VEXTU_X_DO(name, size, left) \
1417 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1418 { \
1419 int index; \
1420 if (left) { \
1421 index = (a & 0xf) * 8; \
1422 } else { \
1423 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1424 } \
1425 return int128_getlo(int128_rshift(b->s128, index)) & \
1426 MAKE_64BIT_MASK(0, size); \
1427 }
1428 #else
1429 #define VEXTU_X_DO(name, size, left) \
1430 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1431 { \
1432 int index; \
1433 if (left) { \
1434 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1435 } else { \
1436 index = (a & 0xf) * 8; \
1437 } \
1438 return int128_getlo(int128_rshift(b->s128, index)) & \
1439 MAKE_64BIT_MASK(0, size); \
1440 }
1441 #endif
1442
1443 VEXTU_X_DO(vextublx, 8, 1)
1444 VEXTU_X_DO(vextuhlx, 16, 1)
1445 VEXTU_X_DO(vextuwlx, 32, 1)
1446 VEXTU_X_DO(vextubrx, 8, 0)
1447 VEXTU_X_DO(vextuhrx, 16, 0)
1448 VEXTU_X_DO(vextuwrx, 32, 0)
1449 #undef VEXTU_X_DO
1450
1451 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1452 {
1453 int i;
1454 unsigned int shift, bytes, size;
1455
1456 size = ARRAY_SIZE(r->u8);
1457 for (i = 0; i < size; i++) {
1458 shift = b->VsrB(i) & 0x7; /* extract shift value */
1459 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1460 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1461 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1462 }
1463 }
1464
1465 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1466 {
1467 int i;
1468 unsigned int shift, bytes;
1469
1470 /*
1471 * Use reverse order, as destination and source register can be
1472 * same. Its being modified in place saving temporary, reverse
1473 * order will guarantee that computed result is not fed back.
1474 */
1475 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1476 shift = b->VsrB(i) & 0x7; /* extract shift value */
1477 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1478 /* extract adjacent bytes */
1479 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1480 }
1481 }
1482
1483 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1484 {
1485 int sh = shift & 0xf;
1486 int i;
1487 ppc_avr_t result;
1488
1489 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1490 int index = sh + i;
1491 if (index > 0xf) {
1492 result.VsrB(i) = b->VsrB(index - 0x10);
1493 } else {
1494 result.VsrB(i) = a->VsrB(index);
1495 }
1496 }
1497 *r = result;
1498 }
1499
1500 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1501 {
1502 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1503
1504 #if defined(HOST_WORDS_BIGENDIAN)
1505 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1506 memset(&r->u8[16 - sh], 0, sh);
1507 #else
1508 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1509 memset(&r->u8[0], 0, sh);
1510 #endif
1511 }
1512
1513 #if defined(HOST_WORDS_BIGENDIAN)
1514 #define VINSERT(suffix, element) \
1515 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1516 { \
1517 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1518 sizeof(r->element[0])); \
1519 }
1520 #else
1521 #define VINSERT(suffix, element) \
1522 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1523 { \
1524 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1525 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1526 }
1527 #endif
1528 VINSERT(b, u8)
1529 VINSERT(h, u16)
1530 VINSERT(w, u32)
1531 VINSERT(d, u64)
1532 #undef VINSERT
1533 #if defined(HOST_WORDS_BIGENDIAN)
1534 #define VEXTRACT(suffix, element) \
1535 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1536 { \
1537 uint32_t es = sizeof(r->element[0]); \
1538 memmove(&r->u8[8 - es], &b->u8[index], es); \
1539 memset(&r->u8[8], 0, 8); \
1540 memset(&r->u8[0], 0, 8 - es); \
1541 }
1542 #else
1543 #define VEXTRACT(suffix, element) \
1544 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1545 { \
1546 uint32_t es = sizeof(r->element[0]); \
1547 uint32_t s = (16 - index) - es; \
1548 memmove(&r->u8[8], &b->u8[s], es); \
1549 memset(&r->u8[0], 0, 8); \
1550 memset(&r->u8[8 + es], 0, 8 - es); \
1551 }
1552 #endif
1553 VEXTRACT(ub, u8)
1554 VEXTRACT(uh, u16)
1555 VEXTRACT(uw, u32)
1556 VEXTRACT(d, u64)
1557 #undef VEXTRACT
1558
1559 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1560 ppc_vsr_t *xb, uint32_t index)
1561 {
1562 ppc_vsr_t t = { };
1563 size_t es = sizeof(uint32_t);
1564 uint32_t ext_index;
1565 int i;
1566
1567 ext_index = index;
1568 for (i = 0; i < es; i++, ext_index++) {
1569 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1570 }
1571
1572 *xt = t;
1573 }
1574
1575 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1576 ppc_vsr_t *xb, uint32_t index)
1577 {
1578 ppc_vsr_t t = *xt;
1579 size_t es = sizeof(uint32_t);
1580 int ins_index, i = 0;
1581
1582 ins_index = index;
1583 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1584 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1585 }
1586
1587 *xt = t;
1588 }
1589
1590 #define VEXT_SIGNED(name, element, cast) \
1591 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1592 { \
1593 int i; \
1594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1595 r->element[i] = (cast)b->element[i]; \
1596 } \
1597 }
1598 VEXT_SIGNED(vextsb2w, s32, int8_t)
1599 VEXT_SIGNED(vextsb2d, s64, int8_t)
1600 VEXT_SIGNED(vextsh2w, s32, int16_t)
1601 VEXT_SIGNED(vextsh2d, s64, int16_t)
1602 VEXT_SIGNED(vextsw2d, s64, int32_t)
1603 #undef VEXT_SIGNED
1604
1605 #define VNEG(name, element) \
1606 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1607 { \
1608 int i; \
1609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1610 r->element[i] = -b->element[i]; \
1611 } \
1612 }
1613 VNEG(vnegw, s32)
1614 VNEG(vnegd, s64)
1615 #undef VNEG
1616
1617 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1618 {
1619 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1620
1621 #if defined(HOST_WORDS_BIGENDIAN)
1622 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1623 memset(&r->u8[0], 0, sh);
1624 #else
1625 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1626 memset(&r->u8[16 - sh], 0, sh);
1627 #endif
1628 }
1629
1630 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1631 {
1632 int i;
1633
1634 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1635 r->u32[i] = a->u32[i] >= b->u32[i];
1636 }
1637 }
1638
1639 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1640 {
1641 int64_t t;
1642 int i, upper;
1643 ppc_avr_t result;
1644 int sat = 0;
1645
1646 upper = ARRAY_SIZE(r->s32) - 1;
1647 t = (int64_t)b->VsrSW(upper);
1648 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1649 t += a->VsrSW(i);
1650 result.VsrSW(i) = 0;
1651 }
1652 result.VsrSW(upper) = cvtsdsw(t, &sat);
1653 *r = result;
1654
1655 if (sat) {
1656 set_vscr_sat(env);
1657 }
1658 }
1659
1660 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1661 {
1662 int i, j, upper;
1663 ppc_avr_t result;
1664 int sat = 0;
1665
1666 upper = 1;
1667 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1668 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1669
1670 result.VsrD(i) = 0;
1671 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1672 t += a->VsrSW(2 * i + j);
1673 }
1674 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1675 }
1676
1677 *r = result;
1678 if (sat) {
1679 set_vscr_sat(env);
1680 }
1681 }
1682
1683 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1684 {
1685 int i, j;
1686 int sat = 0;
1687
1688 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1689 int64_t t = (int64_t)b->s32[i];
1690
1691 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1692 t += a->s8[4 * i + j];
1693 }
1694 r->s32[i] = cvtsdsw(t, &sat);
1695 }
1696
1697 if (sat) {
1698 set_vscr_sat(env);
1699 }
1700 }
1701
1702 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1703 {
1704 int sat = 0;
1705 int i;
1706
1707 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1708 int64_t t = (int64_t)b->s32[i];
1709
1710 t += a->s16[2 * i] + a->s16[2 * i + 1];
1711 r->s32[i] = cvtsdsw(t, &sat);
1712 }
1713
1714 if (sat) {
1715 set_vscr_sat(env);
1716 }
1717 }
1718
1719 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1720 {
1721 int i, j;
1722 int sat = 0;
1723
1724 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1725 uint64_t t = (uint64_t)b->u32[i];
1726
1727 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1728 t += a->u8[4 * i + j];
1729 }
1730 r->u32[i] = cvtuduw(t, &sat);
1731 }
1732
1733 if (sat) {
1734 set_vscr_sat(env);
1735 }
1736 }
1737
1738 #if defined(HOST_WORDS_BIGENDIAN)
1739 #define UPKHI 1
1740 #define UPKLO 0
1741 #else
1742 #define UPKHI 0
1743 #define UPKLO 1
1744 #endif
1745 #define VUPKPX(suffix, hi) \
1746 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1747 { \
1748 int i; \
1749 ppc_avr_t result; \
1750 \
1751 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1752 uint16_t e = b->u16[hi ? i : i + 4]; \
1753 uint8_t a = (e >> 15) ? 0xff : 0; \
1754 uint8_t r = (e >> 10) & 0x1f; \
1755 uint8_t g = (e >> 5) & 0x1f; \
1756 uint8_t b = e & 0x1f; \
1757 \
1758 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1759 } \
1760 *r = result; \
1761 }
1762 VUPKPX(lpx, UPKLO)
1763 VUPKPX(hpx, UPKHI)
1764 #undef VUPKPX
1765
1766 #define VUPK(suffix, unpacked, packee, hi) \
1767 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1768 { \
1769 int i; \
1770 ppc_avr_t result; \
1771 \
1772 if (hi) { \
1773 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1774 result.unpacked[i] = b->packee[i]; \
1775 } \
1776 } else { \
1777 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1778 i++) { \
1779 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1780 } \
1781 } \
1782 *r = result; \
1783 }
1784 VUPK(hsb, s16, s8, UPKHI)
1785 VUPK(hsh, s32, s16, UPKHI)
1786 VUPK(hsw, s64, s32, UPKHI)
1787 VUPK(lsb, s16, s8, UPKLO)
1788 VUPK(lsh, s32, s16, UPKLO)
1789 VUPK(lsw, s64, s32, UPKLO)
1790 #undef VUPK
1791 #undef UPKHI
1792 #undef UPKLO
1793
1794 #define VGENERIC_DO(name, element) \
1795 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1796 { \
1797 int i; \
1798 \
1799 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1800 r->element[i] = name(b->element[i]); \
1801 } \
1802 }
1803
1804 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1805 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1806
1807 VGENERIC_DO(clzb, u8)
1808 VGENERIC_DO(clzh, u16)
1809
1810 #undef clzb
1811 #undef clzh
1812
1813 #define ctzb(v) ((v) ? ctz32(v) : 8)
1814 #define ctzh(v) ((v) ? ctz32(v) : 16)
1815 #define ctzw(v) ctz32((v))
1816 #define ctzd(v) ctz64((v))
1817
1818 VGENERIC_DO(ctzb, u8)
1819 VGENERIC_DO(ctzh, u16)
1820 VGENERIC_DO(ctzw, u32)
1821 VGENERIC_DO(ctzd, u64)
1822
1823 #undef ctzb
1824 #undef ctzh
1825 #undef ctzw
1826 #undef ctzd
1827
1828 #define popcntb(v) ctpop8(v)
1829 #define popcnth(v) ctpop16(v)
1830 #define popcntw(v) ctpop32(v)
1831 #define popcntd(v) ctpop64(v)
1832
1833 VGENERIC_DO(popcntb, u8)
1834 VGENERIC_DO(popcnth, u16)
1835 VGENERIC_DO(popcntw, u32)
1836 VGENERIC_DO(popcntd, u64)
1837
1838 #undef popcntb
1839 #undef popcnth
1840 #undef popcntw
1841 #undef popcntd
1842
1843 #undef VGENERIC_DO
1844
1845 #if defined(HOST_WORDS_BIGENDIAN)
1846 #define QW_ONE { .u64 = { 0, 1 } }
1847 #else
1848 #define QW_ONE { .u64 = { 1, 0 } }
1849 #endif
1850
1851 #ifndef CONFIG_INT128
1852
1853 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1854 {
1855 t->u64[0] = ~a.u64[0];
1856 t->u64[1] = ~a.u64[1];
1857 }
1858
1859 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1860 {
1861 if (a.VsrD(0) < b.VsrD(0)) {
1862 return -1;
1863 } else if (a.VsrD(0) > b.VsrD(0)) {
1864 return 1;
1865 } else if (a.VsrD(1) < b.VsrD(1)) {
1866 return -1;
1867 } else if (a.VsrD(1) > b.VsrD(1)) {
1868 return 1;
1869 } else {
1870 return 0;
1871 }
1872 }
1873
1874 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1875 {
1876 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1877 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1878 (~a.VsrD(1) < b.VsrD(1));
1879 }
1880
1881 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1882 {
1883 ppc_avr_t not_a;
1884 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1885 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1886 (~a.VsrD(1) < b.VsrD(1));
1887 avr_qw_not(&not_a, a);
1888 return avr_qw_cmpu(not_a, b) < 0;
1889 }
1890
1891 #endif
1892
1893 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1894 {
1895 #ifdef CONFIG_INT128
1896 r->u128 = a->u128 + b->u128;
1897 #else
1898 avr_qw_add(r, *a, *b);
1899 #endif
1900 }
1901
1902 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1903 {
1904 #ifdef CONFIG_INT128
1905 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1906 #else
1907
1908 if (c->VsrD(1) & 1) {
1909 ppc_avr_t tmp;
1910
1911 tmp.VsrD(0) = 0;
1912 tmp.VsrD(1) = c->VsrD(1) & 1;
1913 avr_qw_add(&tmp, *a, tmp);
1914 avr_qw_add(r, tmp, *b);
1915 } else {
1916 avr_qw_add(r, *a, *b);
1917 }
1918 #endif
1919 }
1920
1921 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1922 {
1923 #ifdef CONFIG_INT128
1924 r->u128 = (~a->u128 < b->u128);
1925 #else
1926 ppc_avr_t not_a;
1927
1928 avr_qw_not(&not_a, *a);
1929
1930 r->VsrD(0) = 0;
1931 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1932 #endif
1933 }
1934
1935 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1936 {
1937 #ifdef CONFIG_INT128
1938 int carry_out = (~a->u128 < b->u128);
1939 if (!carry_out && (c->u128 & 1)) {
1940 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1941 ((a->u128 != 0) || (b->u128 != 0));
1942 }
1943 r->u128 = carry_out;
1944 #else
1945
1946 int carry_in = c->VsrD(1) & 1;
1947 int carry_out = 0;
1948 ppc_avr_t tmp;
1949
1950 carry_out = avr_qw_addc(&tmp, *a, *b);
1951
1952 if (!carry_out && carry_in) {
1953 ppc_avr_t one = QW_ONE;
1954 carry_out = avr_qw_addc(&tmp, tmp, one);
1955 }
1956 r->VsrD(0) = 0;
1957 r->VsrD(1) = carry_out;
1958 #endif
1959 }
1960
1961 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1962 {
1963 #ifdef CONFIG_INT128
1964 r->u128 = a->u128 - b->u128;
1965 #else
1966 ppc_avr_t tmp;
1967 ppc_avr_t one = QW_ONE;
1968
1969 avr_qw_not(&tmp, *b);
1970 avr_qw_add(&tmp, *a, tmp);
1971 avr_qw_add(r, tmp, one);
1972 #endif
1973 }
1974
1975 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1976 {
1977 #ifdef CONFIG_INT128
1978 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
1979 #else
1980 ppc_avr_t tmp, sum;
1981
1982 avr_qw_not(&tmp, *b);
1983 avr_qw_add(&sum, *a, tmp);
1984
1985 tmp.VsrD(0) = 0;
1986 tmp.VsrD(1) = c->VsrD(1) & 1;
1987 avr_qw_add(r, sum, tmp);
1988 #endif
1989 }
1990
1991 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1992 {
1993 #ifdef CONFIG_INT128
1994 r->u128 = (~a->u128 < ~b->u128) ||
1995 (a->u128 + ~b->u128 == (__uint128_t)-1);
1996 #else
1997 int carry = (avr_qw_cmpu(*a, *b) > 0);
1998 if (!carry) {
1999 ppc_avr_t tmp;
2000 avr_qw_not(&tmp, *b);
2001 avr_qw_add(&tmp, *a, tmp);
2002 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2003 }
2004 r->VsrD(0) = 0;
2005 r->VsrD(1) = carry;
2006 #endif
2007 }
2008
2009 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2010 {
2011 #ifdef CONFIG_INT128
2012 r->u128 =
2013 (~a->u128 < ~b->u128) ||
2014 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2015 #else
2016 int carry_in = c->VsrD(1) & 1;
2017 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2018 if (!carry_out && carry_in) {
2019 ppc_avr_t tmp;
2020 avr_qw_not(&tmp, *b);
2021 avr_qw_add(&tmp, *a, tmp);
2022 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2023 }
2024
2025 r->VsrD(0) = 0;
2026 r->VsrD(1) = carry_out;
2027 #endif
2028 }
2029
2030 #define BCD_PLUS_PREF_1 0xC
2031 #define BCD_PLUS_PREF_2 0xF
2032 #define BCD_PLUS_ALT_1 0xA
2033 #define BCD_NEG_PREF 0xD
2034 #define BCD_NEG_ALT 0xB
2035 #define BCD_PLUS_ALT_2 0xE
2036 #define NATIONAL_PLUS 0x2B
2037 #define NATIONAL_NEG 0x2D
2038
2039 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2040
2041 static int bcd_get_sgn(ppc_avr_t *bcd)
2042 {
2043 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2044 case BCD_PLUS_PREF_1:
2045 case BCD_PLUS_PREF_2:
2046 case BCD_PLUS_ALT_1:
2047 case BCD_PLUS_ALT_2:
2048 {
2049 return 1;
2050 }
2051
2052 case BCD_NEG_PREF:
2053 case BCD_NEG_ALT:
2054 {
2055 return -1;
2056 }
2057
2058 default:
2059 {
2060 return 0;
2061 }
2062 }
2063 }
2064
2065 static int bcd_preferred_sgn(int sgn, int ps)
2066 {
2067 if (sgn >= 0) {
2068 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2069 } else {
2070 return BCD_NEG_PREF;
2071 }
2072 }
2073
2074 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2075 {
2076 uint8_t result;
2077 if (n & 1) {
2078 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2079 } else {
2080 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2081 }
2082
2083 if (unlikely(result > 9)) {
2084 *invalid = true;
2085 }
2086 return result;
2087 }
2088
2089 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2090 {
2091 if (n & 1) {
2092 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2093 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2094 } else {
2095 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2096 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2097 }
2098 }
2099
2100 static bool bcd_is_valid(ppc_avr_t *bcd)
2101 {
2102 int i;
2103 int invalid = 0;
2104
2105 if (bcd_get_sgn(bcd) == 0) {
2106 return false;
2107 }
2108
2109 for (i = 1; i < 32; i++) {
2110 bcd_get_digit(bcd, i, &invalid);
2111 if (unlikely(invalid)) {
2112 return false;
2113 }
2114 }
2115 return true;
2116 }
2117
2118 static int bcd_cmp_zero(ppc_avr_t *bcd)
2119 {
2120 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2121 return CRF_EQ;
2122 } else {
2123 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2124 }
2125 }
2126
2127 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2128 {
2129 return reg->VsrH(7 - n);
2130 }
2131
2132 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2133 {
2134 reg->VsrH(7 - n) = val;
2135 }
2136
2137 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2138 {
2139 int i;
2140 int invalid = 0;
2141 for (i = 31; i > 0; i--) {
2142 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2143 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2144 if (unlikely(invalid)) {
2145 return 0; /* doesn't matter */
2146 } else if (dig_a > dig_b) {
2147 return 1;
2148 } else if (dig_a < dig_b) {
2149 return -1;
2150 }
2151 }
2152
2153 return 0;
2154 }
2155
2156 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2157 int *overflow)
2158 {
2159 int carry = 0;
2160 int i;
2161 for (i = 1; i <= 31; i++) {
2162 uint8_t digit = bcd_get_digit(a, i, invalid) +
2163 bcd_get_digit(b, i, invalid) + carry;
2164 if (digit > 9) {
2165 carry = 1;
2166 digit -= 10;
2167 } else {
2168 carry = 0;
2169 }
2170
2171 bcd_put_digit(t, digit, i);
2172 }
2173
2174 *overflow = carry;
2175 }
2176
2177 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2178 int *overflow)
2179 {
2180 int carry = 0;
2181 int i;
2182
2183 for (i = 1; i <= 31; i++) {
2184 uint8_t digit = bcd_get_digit(a, i, invalid) -
2185 bcd_get_digit(b, i, invalid) + carry;
2186 if (digit & 0x80) {
2187 carry = -1;
2188 digit += 10;
2189 } else {
2190 carry = 0;
2191 }
2192
2193 bcd_put_digit(t, digit, i);
2194 }
2195
2196 *overflow = carry;
2197 }
2198
2199 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2200 {
2201
2202 int sgna = bcd_get_sgn(a);
2203 int sgnb = bcd_get_sgn(b);
2204 int invalid = (sgna == 0) || (sgnb == 0);
2205 int overflow = 0;
2206 uint32_t cr = 0;
2207 ppc_avr_t result = { .u64 = { 0, 0 } };
2208
2209 if (!invalid) {
2210 if (sgna == sgnb) {
2211 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2212 bcd_add_mag(&result, a, b, &invalid, &overflow);
2213 cr = bcd_cmp_zero(&result);
2214 } else {
2215 int magnitude = bcd_cmp_mag(a, b);
2216 if (magnitude > 0) {
2217 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2218 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2219 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2220 } else if (magnitude < 0) {
2221 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2222 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2223 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2224 } else {
2225 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2226 cr = CRF_EQ;
2227 }
2228 }
2229 }
2230
2231 if (unlikely(invalid)) {
2232 result.VsrD(0) = result.VsrD(1) = -1;
2233 cr = CRF_SO;
2234 } else if (overflow) {
2235 cr |= CRF_SO;
2236 }
2237
2238 *r = result;
2239
2240 return cr;
2241 }
2242
2243 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2244 {
2245 ppc_avr_t bcopy = *b;
2246 int sgnb = bcd_get_sgn(b);
2247 if (sgnb < 0) {
2248 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2249 } else if (sgnb > 0) {
2250 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2251 }
2252 /* else invalid ... defer to bcdadd code for proper handling */
2253
2254 return helper_bcdadd(r, a, &bcopy, ps);
2255 }
2256
2257 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2258 {
2259 int i;
2260 int cr = 0;
2261 uint16_t national = 0;
2262 uint16_t sgnb = get_national_digit(b, 0);
2263 ppc_avr_t ret = { .u64 = { 0, 0 } };
2264 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2265
2266 for (i = 1; i < 8; i++) {
2267 national = get_national_digit(b, i);
2268 if (unlikely(national < 0x30 || national > 0x39)) {
2269 invalid = 1;
2270 break;
2271 }
2272
2273 bcd_put_digit(&ret, national & 0xf, i);
2274 }
2275
2276 if (sgnb == NATIONAL_PLUS) {
2277 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2278 } else {
2279 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2280 }
2281
2282 cr = bcd_cmp_zero(&ret);
2283
2284 if (unlikely(invalid)) {
2285 cr = CRF_SO;
2286 }
2287
2288 *r = ret;
2289
2290 return cr;
2291 }
2292
2293 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2294 {
2295 int i;
2296 int cr = 0;
2297 int sgnb = bcd_get_sgn(b);
2298 int invalid = (sgnb == 0);
2299 ppc_avr_t ret = { .u64 = { 0, 0 } };
2300
2301 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2302
2303 for (i = 1; i < 8; i++) {
2304 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2305
2306 if (unlikely(invalid)) {
2307 break;
2308 }
2309 }
2310 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2311
2312 cr = bcd_cmp_zero(b);
2313
2314 if (ox_flag) {
2315 cr |= CRF_SO;
2316 }
2317
2318 if (unlikely(invalid)) {
2319 cr = CRF_SO;
2320 }
2321
2322 *r = ret;
2323
2324 return cr;
2325 }
2326
2327 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2328 {
2329 int i;
2330 int cr = 0;
2331 int invalid = 0;
2332 int zone_digit = 0;
2333 int zone_lead = ps ? 0xF : 0x3;
2334 int digit = 0;
2335 ppc_avr_t ret = { .u64 = { 0, 0 } };
2336 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2337
2338 if (unlikely((sgnb < 0xA) && ps)) {
2339 invalid = 1;
2340 }
2341
2342 for (i = 0; i < 16; i++) {
2343 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2344 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2345 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2346 invalid = 1;
2347 break;
2348 }
2349
2350 bcd_put_digit(&ret, digit, i + 1);
2351 }
2352
2353 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2354 (!ps && (sgnb & 0x4))) {
2355 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2356 } else {
2357 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2358 }
2359
2360 cr = bcd_cmp_zero(&ret);
2361
2362 if (unlikely(invalid)) {
2363 cr = CRF_SO;
2364 }
2365
2366 *r = ret;
2367
2368 return cr;
2369 }
2370
2371 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2372 {
2373 int i;
2374 int cr = 0;
2375 uint8_t digit = 0;
2376 int sgnb = bcd_get_sgn(b);
2377 int zone_lead = (ps) ? 0xF0 : 0x30;
2378 int invalid = (sgnb == 0);
2379 ppc_avr_t ret = { .u64 = { 0, 0 } };
2380
2381 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2382
2383 for (i = 0; i < 16; i++) {
2384 digit = bcd_get_digit(b, i + 1, &invalid);
2385
2386 if (unlikely(invalid)) {
2387 break;
2388 }
2389
2390 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2391 }
2392
2393 if (ps) {
2394 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2395 } else {
2396 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2397 }
2398
2399 cr = bcd_cmp_zero(b);
2400
2401 if (ox_flag) {
2402 cr |= CRF_SO;
2403 }
2404
2405 if (unlikely(invalid)) {
2406 cr = CRF_SO;
2407 }
2408
2409 *r = ret;
2410
2411 return cr;
2412 }
2413
2414 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2415 {
2416 int i;
2417 int cr = 0;
2418 uint64_t lo_value;
2419 uint64_t hi_value;
2420 ppc_avr_t ret = { .u64 = { 0, 0 } };
2421
2422 if (b->VsrSD(0) < 0) {
2423 lo_value = -b->VsrSD(1);
2424 hi_value = ~b->VsrD(0) + !lo_value;
2425 bcd_put_digit(&ret, 0xD, 0);
2426 } else {
2427 lo_value = b->VsrD(1);
2428 hi_value = b->VsrD(0);
2429 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2430 }
2431
2432 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2433 lo_value > 9999999999999999ULL) {
2434 cr = CRF_SO;
2435 }
2436
2437 for (i = 1; i < 16; hi_value /= 10, i++) {
2438 bcd_put_digit(&ret, hi_value % 10, i);
2439 }
2440
2441 for (; i < 32; lo_value /= 10, i++) {
2442 bcd_put_digit(&ret, lo_value % 10, i);
2443 }
2444
2445 cr |= bcd_cmp_zero(&ret);
2446
2447 *r = ret;
2448
2449 return cr;
2450 }
2451
2452 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2453 {
2454 uint8_t i;
2455 int cr;
2456 uint64_t carry;
2457 uint64_t unused;
2458 uint64_t lo_value;
2459 uint64_t hi_value = 0;
2460 int sgnb = bcd_get_sgn(b);
2461 int invalid = (sgnb == 0);
2462
2463 lo_value = bcd_get_digit(b, 31, &invalid);
2464 for (i = 30; i > 0; i--) {
2465 mulu64(&lo_value, &carry, lo_value, 10ULL);
2466 mulu64(&hi_value, &unused, hi_value, 10ULL);
2467 lo_value += bcd_get_digit(b, i, &invalid);
2468 hi_value += carry;
2469
2470 if (unlikely(invalid)) {
2471 break;
2472 }
2473 }
2474
2475 if (sgnb == -1) {
2476 r->VsrSD(1) = -lo_value;
2477 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2478 } else {
2479 r->VsrSD(1) = lo_value;
2480 r->VsrSD(0) = hi_value;
2481 }
2482
2483 cr = bcd_cmp_zero(b);
2484
2485 if (unlikely(invalid)) {
2486 cr = CRF_SO;
2487 }
2488
2489 return cr;
2490 }
2491
2492 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2493 {
2494 int i;
2495 int invalid = 0;
2496
2497 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2498 return CRF_SO;
2499 }
2500
2501 *r = *a;
2502 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2503
2504 for (i = 1; i < 32; i++) {
2505 bcd_get_digit(a, i, &invalid);
2506 bcd_get_digit(b, i, &invalid);
2507 if (unlikely(invalid)) {
2508 return CRF_SO;
2509 }
2510 }
2511
2512 return bcd_cmp_zero(r);
2513 }
2514
2515 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2516 {
2517 int sgnb = bcd_get_sgn(b);
2518
2519 *r = *b;
2520 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2521
2522 if (bcd_is_valid(b) == false) {
2523 return CRF_SO;
2524 }
2525
2526 return bcd_cmp_zero(r);
2527 }
2528
2529 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2530 {
2531 int cr;
2532 int i = a->VsrSB(7);
2533 bool ox_flag = false;
2534 int sgnb = bcd_get_sgn(b);
2535 ppc_avr_t ret = *b;
2536 ret.VsrD(1) &= ~0xf;
2537
2538 if (bcd_is_valid(b) == false) {
2539 return CRF_SO;
2540 }
2541
2542 if (unlikely(i > 31)) {
2543 i = 31;
2544 } else if (unlikely(i < -31)) {
2545 i = -31;
2546 }
2547
2548 if (i > 0) {
2549 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2550 } else {
2551 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2552 }
2553 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2554
2555 *r = ret;
2556
2557 cr = bcd_cmp_zero(r);
2558 if (ox_flag) {
2559 cr |= CRF_SO;
2560 }
2561
2562 return cr;
2563 }
2564
2565 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2566 {
2567 int cr;
2568 int i;
2569 int invalid = 0;
2570 bool ox_flag = false;
2571 ppc_avr_t ret = *b;
2572
2573 for (i = 0; i < 32; i++) {
2574 bcd_get_digit(b, i, &invalid);
2575
2576 if (unlikely(invalid)) {
2577 return CRF_SO;
2578 }
2579 }
2580
2581 i = a->VsrSB(7);
2582 if (i >= 32) {
2583 ox_flag = true;
2584 ret.VsrD(1) = ret.VsrD(0) = 0;
2585 } else if (i <= -32) {
2586 ret.VsrD(1) = ret.VsrD(0) = 0;
2587 } else if (i > 0) {
2588 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2589 } else {
2590 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2591 }
2592 *r = ret;
2593
2594 cr = bcd_cmp_zero(r);
2595 if (ox_flag) {
2596 cr |= CRF_SO;
2597 }
2598
2599 return cr;
2600 }
2601
2602 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2603 {
2604 int cr;
2605 int unused = 0;
2606 int invalid = 0;
2607 bool ox_flag = false;
2608 int sgnb = bcd_get_sgn(b);
2609 ppc_avr_t ret = *b;
2610 ret.VsrD(1) &= ~0xf;
2611
2612 int i = a->VsrSB(7);
2613 ppc_avr_t bcd_one;
2614
2615 bcd_one.VsrD(0) = 0;
2616 bcd_one.VsrD(1) = 0x10;
2617
2618 if (bcd_is_valid(b) == false) {
2619 return CRF_SO;
2620 }
2621
2622 if (unlikely(i > 31)) {
2623 i = 31;
2624 } else if (unlikely(i < -31)) {
2625 i = -31;
2626 }
2627
2628 if (i > 0) {
2629 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2630 } else {
2631 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2632
2633 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2634 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2635 }
2636 }
2637 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2638
2639 cr = bcd_cmp_zero(&ret);
2640 if (ox_flag) {
2641 cr |= CRF_SO;
2642 }
2643 *r = ret;
2644
2645 return cr;
2646 }
2647
2648 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2649 {
2650 uint64_t mask;
2651 uint32_t ox_flag = 0;
2652 int i = a->VsrSH(3) + 1;
2653 ppc_avr_t ret = *b;
2654
2655 if (bcd_is_valid(b) == false) {
2656 return CRF_SO;
2657 }
2658
2659 if (i > 16 && i < 32) {
2660 mask = (uint64_t)-1 >> (128 - i * 4);
2661 if (ret.VsrD(0) & ~mask) {
2662 ox_flag = CRF_SO;
2663 }
2664
2665 ret.VsrD(0) &= mask;
2666 } else if (i >= 0 && i <= 16) {
2667 mask = (uint64_t)-1 >> (64 - i * 4);
2668 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2669 ox_flag = CRF_SO;
2670 }
2671
2672 ret.VsrD(1) &= mask;
2673 ret.VsrD(0) = 0;
2674 }
2675 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2676 *r = ret;
2677
2678 return bcd_cmp_zero(&ret) | ox_flag;
2679 }
2680
2681 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2682 {
2683 int i;
2684 uint64_t mask;
2685 uint32_t ox_flag = 0;
2686 int invalid = 0;
2687 ppc_avr_t ret = *b;
2688
2689 for (i = 0; i < 32; i++) {
2690 bcd_get_digit(b, i, &invalid);
2691
2692 if (unlikely(invalid)) {
2693 return CRF_SO;
2694 }
2695 }
2696
2697 i = a->VsrSH(3);
2698 if (i > 16 && i < 33) {
2699 mask = (uint64_t)-1 >> (128 - i * 4);
2700 if (ret.VsrD(0) & ~mask) {
2701 ox_flag = CRF_SO;
2702 }
2703
2704 ret.VsrD(0) &= mask;
2705 } else if (i > 0 && i <= 16) {
2706 mask = (uint64_t)-1 >> (64 - i * 4);
2707 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2708 ox_flag = CRF_SO;
2709 }
2710
2711 ret.VsrD(1) &= mask;
2712 ret.VsrD(0) = 0;
2713 } else if (i == 0) {
2714 if (ret.VsrD(0) || ret.VsrD(1)) {
2715 ox_flag = CRF_SO;
2716 }
2717 ret.VsrD(0) = ret.VsrD(1) = 0;
2718 }
2719
2720 *r = ret;
2721 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2722 return ox_flag | CRF_EQ;
2723 }
2724
2725 return ox_flag | CRF_GT;
2726 }
2727
2728 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2729 {
2730 int i;
2731 VECTOR_FOR_INORDER_I(i, u8) {
2732 r->u8[i] = AES_sbox[a->u8[i]];
2733 }
2734 }
2735
2736 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2737 {
2738 ppc_avr_t result;
2739 int i;
2740
2741 VECTOR_FOR_INORDER_I(i, u32) {
2742 result.VsrW(i) = b->VsrW(i) ^
2743 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2744 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2745 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2746 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2747 }
2748 *r = result;
2749 }
2750
2751 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2752 {
2753 ppc_avr_t result;
2754 int i;
2755
2756 VECTOR_FOR_INORDER_I(i, u8) {
2757 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2758 }
2759 *r = result;
2760 }
2761
2762 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2763 {
2764 /* This differs from what is written in ISA V2.07. The RTL is */
2765 /* incorrect and will be fixed in V2.07B. */
2766 int i;
2767 ppc_avr_t tmp;
2768
2769 VECTOR_FOR_INORDER_I(i, u8) {
2770 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2771 }
2772
2773 VECTOR_FOR_INORDER_I(i, u32) {
2774 r->VsrW(i) =
2775 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2776 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2777 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2778 AES_imc[tmp.VsrB(4 * i + 3)][3];
2779 }
2780 }
2781
2782 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2783 {
2784 ppc_avr_t result;
2785 int i;
2786
2787 VECTOR_FOR_INORDER_I(i, u8) {
2788 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2789 }
2790 *r = result;
2791 }
2792
2793 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2794 {
2795 int st = (st_six & 0x10) != 0;
2796 int six = st_six & 0xF;
2797 int i;
2798
2799 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2800 if (st == 0) {
2801 if ((six & (0x8 >> i)) == 0) {
2802 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2803 ror32(a->VsrW(i), 18) ^
2804 (a->VsrW(i) >> 3);
2805 } else { /* six.bit[i] == 1 */
2806 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2807 ror32(a->VsrW(i), 19) ^
2808 (a->VsrW(i) >> 10);
2809 }
2810 } else { /* st == 1 */
2811 if ((six & (0x8 >> i)) == 0) {
2812 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2813 ror32(a->VsrW(i), 13) ^
2814 ror32(a->VsrW(i), 22);
2815 } else { /* six.bit[i] == 1 */
2816 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2817 ror32(a->VsrW(i), 11) ^
2818 ror32(a->VsrW(i), 25);
2819 }
2820 }
2821 }
2822 }
2823
2824 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2825 {
2826 int st = (st_six & 0x10) != 0;
2827 int six = st_six & 0xF;
2828 int i;
2829
2830 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2831 if (st == 0) {
2832 if ((six & (0x8 >> (2 * i))) == 0) {
2833 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2834 ror64(a->VsrD(i), 8) ^
2835 (a->VsrD(i) >> 7);
2836 } else { /* six.bit[2*i] == 1 */
2837 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2838 ror64(a->VsrD(i), 61) ^
2839 (a->VsrD(i) >> 6);
2840 }
2841 } else { /* st == 1 */
2842 if ((six & (0x8 >> (2 * i))) == 0) {
2843 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2844 ror64(a->VsrD(i), 34) ^
2845 ror64(a->VsrD(i), 39);
2846 } else { /* six.bit[2*i] == 1 */
2847 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2848 ror64(a->VsrD(i), 18) ^
2849 ror64(a->VsrD(i), 41);
2850 }
2851 }
2852 }
2853 }
2854
2855 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2856 {
2857 ppc_avr_t result;
2858 int i;
2859
2860 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2861 int indexA = c->VsrB(i) >> 4;
2862 int indexB = c->VsrB(i) & 0xF;
2863
2864 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2865 }
2866 *r = result;
2867 }
2868
2869 #undef VECTOR_FOR_INORDER_I
2870
2871 /*****************************************************************************/
2872 /* SPE extension helpers */
2873 /* Use a table to make this quicker */
2874 static const uint8_t hbrev[16] = {
2875 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2876 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2877 };
2878
2879 static inline uint8_t byte_reverse(uint8_t val)
2880 {
2881 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2882 }
2883
2884 static inline uint32_t word_reverse(uint32_t val)
2885 {
2886 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2887 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2888 }
2889
2890 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2891 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2892 {
2893 uint32_t a, b, d, mask;
2894
2895 mask = UINT32_MAX >> (32 - MASKBITS);
2896 a = arg1 & mask;
2897 b = arg2 & mask;
2898 d = word_reverse(1 + word_reverse(a | ~b));
2899 return (arg1 & ~mask) | (d & b);
2900 }
2901
2902 uint32_t helper_cntlsw32(uint32_t val)
2903 {
2904 if (val & 0x80000000) {
2905 return clz32(~val);
2906 } else {
2907 return clz32(val);
2908 }
2909 }
2910
2911 uint32_t helper_cntlzw32(uint32_t val)
2912 {
2913 return clz32(val);
2914 }
2915
2916 /* 440 specific */
2917 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2918 target_ulong low, uint32_t update_Rc)
2919 {
2920 target_ulong mask;
2921 int i;
2922
2923 i = 1;
2924 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2925 if ((high & mask) == 0) {
2926 if (update_Rc) {
2927 env->crf[0] = 0x4;
2928 }
2929 goto done;
2930 }
2931 i++;
2932 }
2933 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2934 if ((low & mask) == 0) {
2935 if (update_Rc) {
2936 env->crf[0] = 0x8;
2937 }
2938 goto done;
2939 }
2940 i++;
2941 }
2942 i = 8;
2943 if (update_Rc) {
2944 env->crf[0] = 0x2;
2945 }
2946 done:
2947 env->xer = (env->xer & ~0x7F) | i;
2948 if (update_Rc) {
2949 env->crf[0] |= xer_so;
2950 }
2951 return i;
2952 }