Merge remote-tracking branch 'remotes/marcel/tags/rdma-pull-request' into staging
[qemu.git] / accel / tcg / tcg-runtime-gvec.c
1 /*
2 * Generic vectorized operation runtime
3 *
4 * Copyright (c) 2018 Linaro
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "tcg/tcg-gvec-desc.h"
25
26
27 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
28 {
29 intptr_t maxsz = simd_maxsz(desc);
30 intptr_t i;
31
32 if (unlikely(maxsz > oprsz)) {
33 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
34 *(uint64_t *)(d + i) = 0;
35 }
36 }
37 }
38
39 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
40 {
41 intptr_t oprsz = simd_oprsz(desc);
42 intptr_t i;
43
44 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
45 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
46 }
47 clear_high(d, oprsz, desc);
48 }
49
50 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
51 {
52 intptr_t oprsz = simd_oprsz(desc);
53 intptr_t i;
54
55 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
56 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
57 }
58 clear_high(d, oprsz, desc);
59 }
60
61 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
62 {
63 intptr_t oprsz = simd_oprsz(desc);
64 intptr_t i;
65
66 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
67 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
68 }
69 clear_high(d, oprsz, desc);
70 }
71
72 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
73 {
74 intptr_t oprsz = simd_oprsz(desc);
75 intptr_t i;
76
77 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
78 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
79 }
80 clear_high(d, oprsz, desc);
81 }
82
83 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
84 {
85 intptr_t oprsz = simd_oprsz(desc);
86 intptr_t i;
87
88 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
89 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
90 }
91 clear_high(d, oprsz, desc);
92 }
93
94 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
95 {
96 intptr_t oprsz = simd_oprsz(desc);
97 intptr_t i;
98
99 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
100 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
101 }
102 clear_high(d, oprsz, desc);
103 }
104
105 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
106 {
107 intptr_t oprsz = simd_oprsz(desc);
108 intptr_t i;
109
110 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
111 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
112 }
113 clear_high(d, oprsz, desc);
114 }
115
116 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
117 {
118 intptr_t oprsz = simd_oprsz(desc);
119 intptr_t i;
120
121 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
122 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
123 }
124 clear_high(d, oprsz, desc);
125 }
126
127 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
128 {
129 intptr_t oprsz = simd_oprsz(desc);
130 intptr_t i;
131
132 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
133 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
134 }
135 clear_high(d, oprsz, desc);
136 }
137
138 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
139 {
140 intptr_t oprsz = simd_oprsz(desc);
141 intptr_t i;
142
143 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
144 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
145 }
146 clear_high(d, oprsz, desc);
147 }
148
149 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
150 {
151 intptr_t oprsz = simd_oprsz(desc);
152 intptr_t i;
153
154 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
155 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
156 }
157 clear_high(d, oprsz, desc);
158 }
159
160 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
161 {
162 intptr_t oprsz = simd_oprsz(desc);
163 intptr_t i;
164
165 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
166 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
167 }
168 clear_high(d, oprsz, desc);
169 }
170
171 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
172 {
173 intptr_t oprsz = simd_oprsz(desc);
174 intptr_t i;
175
176 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
177 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
178 }
179 clear_high(d, oprsz, desc);
180 }
181
182 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
183 {
184 intptr_t oprsz = simd_oprsz(desc);
185 intptr_t i;
186
187 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
188 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
189 }
190 clear_high(d, oprsz, desc);
191 }
192
193 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
194 {
195 intptr_t oprsz = simd_oprsz(desc);
196 intptr_t i;
197
198 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
199 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
200 }
201 clear_high(d, oprsz, desc);
202 }
203
204 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
205 {
206 intptr_t oprsz = simd_oprsz(desc);
207 intptr_t i;
208
209 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
210 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
211 }
212 clear_high(d, oprsz, desc);
213 }
214
215 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
216 {
217 intptr_t oprsz = simd_oprsz(desc);
218 intptr_t i;
219
220 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
221 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
222 }
223 clear_high(d, oprsz, desc);
224 }
225
226 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
227 {
228 intptr_t oprsz = simd_oprsz(desc);
229 intptr_t i;
230
231 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
232 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
233 }
234 clear_high(d, oprsz, desc);
235 }
236
237 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
238 {
239 intptr_t oprsz = simd_oprsz(desc);
240 intptr_t i;
241
242 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
243 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
244 }
245 clear_high(d, oprsz, desc);
246 }
247
248 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
249 {
250 intptr_t oprsz = simd_oprsz(desc);
251 intptr_t i;
252
253 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
254 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
255 }
256 clear_high(d, oprsz, desc);
257 }
258
259 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
260 {
261 intptr_t oprsz = simd_oprsz(desc);
262 intptr_t i;
263
264 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
265 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
266 }
267 clear_high(d, oprsz, desc);
268 }
269
270 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
271 {
272 intptr_t oprsz = simd_oprsz(desc);
273 intptr_t i;
274
275 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
276 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
277 }
278 clear_high(d, oprsz, desc);
279 }
280
281 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
282 {
283 intptr_t oprsz = simd_oprsz(desc);
284 intptr_t i;
285
286 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
287 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
288 }
289 clear_high(d, oprsz, desc);
290 }
291
292 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
293 {
294 intptr_t oprsz = simd_oprsz(desc);
295 intptr_t i;
296
297 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
298 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
299 }
300 clear_high(d, oprsz, desc);
301 }
302
303 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
304 {
305 intptr_t oprsz = simd_oprsz(desc);
306 intptr_t i;
307
308 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
309 *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
310 }
311 clear_high(d, oprsz, desc);
312 }
313
314 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
315 {
316 intptr_t oprsz = simd_oprsz(desc);
317 intptr_t i;
318
319 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
320 *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
321 }
322 clear_high(d, oprsz, desc);
323 }
324
325 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
326 {
327 intptr_t oprsz = simd_oprsz(desc);
328 intptr_t i;
329
330 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
331 *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
332 }
333 clear_high(d, oprsz, desc);
334 }
335
336 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
337 {
338 intptr_t oprsz = simd_oprsz(desc);
339 intptr_t i;
340
341 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
342 *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
343 }
344 clear_high(d, oprsz, desc);
345 }
346
347 void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
348 {
349 intptr_t oprsz = simd_oprsz(desc);
350 intptr_t i;
351
352 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
353 int8_t aa = *(int8_t *)(a + i);
354 *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
355 }
356 clear_high(d, oprsz, desc);
357 }
358
359 void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
360 {
361 intptr_t oprsz = simd_oprsz(desc);
362 intptr_t i;
363
364 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
365 int16_t aa = *(int16_t *)(a + i);
366 *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
367 }
368 clear_high(d, oprsz, desc);
369 }
370
371 void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
372 {
373 intptr_t oprsz = simd_oprsz(desc);
374 intptr_t i;
375
376 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
377 int32_t aa = *(int32_t *)(a + i);
378 *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
379 }
380 clear_high(d, oprsz, desc);
381 }
382
383 void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
384 {
385 intptr_t oprsz = simd_oprsz(desc);
386 intptr_t i;
387
388 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
389 int64_t aa = *(int64_t *)(a + i);
390 *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
391 }
392 clear_high(d, oprsz, desc);
393 }
394
395 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
396 {
397 intptr_t oprsz = simd_oprsz(desc);
398
399 memcpy(d, a, oprsz);
400 clear_high(d, oprsz, desc);
401 }
402
403 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
404 {
405 intptr_t oprsz = simd_oprsz(desc);
406 intptr_t i;
407
408 if (c == 0) {
409 oprsz = 0;
410 } else {
411 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
412 *(uint64_t *)(d + i) = c;
413 }
414 }
415 clear_high(d, oprsz, desc);
416 }
417
418 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
419 {
420 intptr_t oprsz = simd_oprsz(desc);
421 intptr_t i;
422
423 if (c == 0) {
424 oprsz = 0;
425 } else {
426 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
427 *(uint32_t *)(d + i) = c;
428 }
429 }
430 clear_high(d, oprsz, desc);
431 }
432
433 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
434 {
435 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
436 }
437
438 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
439 {
440 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
441 }
442
443 void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
444 {
445 intptr_t oprsz = simd_oprsz(desc);
446 intptr_t i;
447
448 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
449 *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
450 }
451 clear_high(d, oprsz, desc);
452 }
453
454 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
455 {
456 intptr_t oprsz = simd_oprsz(desc);
457 intptr_t i;
458
459 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
460 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
461 }
462 clear_high(d, oprsz, desc);
463 }
464
465 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
466 {
467 intptr_t oprsz = simd_oprsz(desc);
468 intptr_t i;
469
470 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
471 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
472 }
473 clear_high(d, oprsz, desc);
474 }
475
476 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
477 {
478 intptr_t oprsz = simd_oprsz(desc);
479 intptr_t i;
480
481 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
482 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
483 }
484 clear_high(d, oprsz, desc);
485 }
486
487 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
488 {
489 intptr_t oprsz = simd_oprsz(desc);
490 intptr_t i;
491
492 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
493 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
494 }
495 clear_high(d, oprsz, desc);
496 }
497
498 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
499 {
500 intptr_t oprsz = simd_oprsz(desc);
501 intptr_t i;
502
503 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
504 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
505 }
506 clear_high(d, oprsz, desc);
507 }
508
509 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
510 {
511 intptr_t oprsz = simd_oprsz(desc);
512 intptr_t i;
513
514 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
515 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
516 }
517 clear_high(d, oprsz, desc);
518 }
519
520 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
521 {
522 intptr_t oprsz = simd_oprsz(desc);
523 intptr_t i;
524
525 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
526 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
527 }
528 clear_high(d, oprsz, desc);
529 }
530
531 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
532 {
533 intptr_t oprsz = simd_oprsz(desc);
534 intptr_t i;
535
536 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
537 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
538 }
539 clear_high(d, oprsz, desc);
540 }
541
542 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
543 {
544 intptr_t oprsz = simd_oprsz(desc);
545 intptr_t i;
546
547 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
548 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
549 }
550 clear_high(d, oprsz, desc);
551 }
552
553 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
554 {
555 intptr_t oprsz = simd_oprsz(desc);
556 intptr_t i;
557
558 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
559 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
560 }
561 clear_high(d, oprsz, desc);
562 }
563
564 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
565 {
566 intptr_t oprsz = simd_oprsz(desc);
567 intptr_t i;
568
569 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
570 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
571 }
572 clear_high(d, oprsz, desc);
573 }
574
575 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
576 {
577 intptr_t oprsz = simd_oprsz(desc);
578 int shift = simd_data(desc);
579 intptr_t i;
580
581 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
582 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
583 }
584 clear_high(d, oprsz, desc);
585 }
586
587 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
588 {
589 intptr_t oprsz = simd_oprsz(desc);
590 int shift = simd_data(desc);
591 intptr_t i;
592
593 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
594 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
595 }
596 clear_high(d, oprsz, desc);
597 }
598
599 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
600 {
601 intptr_t oprsz = simd_oprsz(desc);
602 int shift = simd_data(desc);
603 intptr_t i;
604
605 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
606 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
607 }
608 clear_high(d, oprsz, desc);
609 }
610
611 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
612 {
613 intptr_t oprsz = simd_oprsz(desc);
614 int shift = simd_data(desc);
615 intptr_t i;
616
617 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
618 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
619 }
620 clear_high(d, oprsz, desc);
621 }
622
623 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
624 {
625 intptr_t oprsz = simd_oprsz(desc);
626 int shift = simd_data(desc);
627 intptr_t i;
628
629 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
630 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
631 }
632 clear_high(d, oprsz, desc);
633 }
634
635 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
636 {
637 intptr_t oprsz = simd_oprsz(desc);
638 int shift = simd_data(desc);
639 intptr_t i;
640
641 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
642 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
643 }
644 clear_high(d, oprsz, desc);
645 }
646
647 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
648 {
649 intptr_t oprsz = simd_oprsz(desc);
650 int shift = simd_data(desc);
651 intptr_t i;
652
653 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
654 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
655 }
656 clear_high(d, oprsz, desc);
657 }
658
659 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
660 {
661 intptr_t oprsz = simd_oprsz(desc);
662 int shift = simd_data(desc);
663 intptr_t i;
664
665 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
666 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
667 }
668 clear_high(d, oprsz, desc);
669 }
670
671 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
672 {
673 intptr_t oprsz = simd_oprsz(desc);
674 int shift = simd_data(desc);
675 intptr_t i;
676
677 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
678 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
679 }
680 clear_high(d, oprsz, desc);
681 }
682
683 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
684 {
685 intptr_t oprsz = simd_oprsz(desc);
686 int shift = simd_data(desc);
687 intptr_t i;
688
689 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
690 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
691 }
692 clear_high(d, oprsz, desc);
693 }
694
695 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
696 {
697 intptr_t oprsz = simd_oprsz(desc);
698 int shift = simd_data(desc);
699 intptr_t i;
700
701 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
702 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
703 }
704 clear_high(d, oprsz, desc);
705 }
706
707 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
708 {
709 intptr_t oprsz = simd_oprsz(desc);
710 int shift = simd_data(desc);
711 intptr_t i;
712
713 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
714 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
715 }
716 clear_high(d, oprsz, desc);
717 }
718
719 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
720 {
721 intptr_t oprsz = simd_oprsz(desc);
722 intptr_t i;
723
724 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
725 uint8_t sh = *(uint8_t *)(b + i) & 7;
726 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
727 }
728 clear_high(d, oprsz, desc);
729 }
730
731 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
732 {
733 intptr_t oprsz = simd_oprsz(desc);
734 intptr_t i;
735
736 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
737 uint8_t sh = *(uint16_t *)(b + i) & 15;
738 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
739 }
740 clear_high(d, oprsz, desc);
741 }
742
743 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
744 {
745 intptr_t oprsz = simd_oprsz(desc);
746 intptr_t i;
747
748 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
749 uint8_t sh = *(uint32_t *)(b + i) & 31;
750 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
751 }
752 clear_high(d, oprsz, desc);
753 }
754
755 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
756 {
757 intptr_t oprsz = simd_oprsz(desc);
758 intptr_t i;
759
760 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
761 uint8_t sh = *(uint64_t *)(b + i) & 63;
762 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
763 }
764 clear_high(d, oprsz, desc);
765 }
766
767 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
768 {
769 intptr_t oprsz = simd_oprsz(desc);
770 intptr_t i;
771
772 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
773 uint8_t sh = *(uint8_t *)(b + i) & 7;
774 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
775 }
776 clear_high(d, oprsz, desc);
777 }
778
779 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
780 {
781 intptr_t oprsz = simd_oprsz(desc);
782 intptr_t i;
783
784 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
785 uint8_t sh = *(uint16_t *)(b + i) & 15;
786 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
787 }
788 clear_high(d, oprsz, desc);
789 }
790
791 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
792 {
793 intptr_t oprsz = simd_oprsz(desc);
794 intptr_t i;
795
796 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
797 uint8_t sh = *(uint32_t *)(b + i) & 31;
798 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
799 }
800 clear_high(d, oprsz, desc);
801 }
802
803 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
804 {
805 intptr_t oprsz = simd_oprsz(desc);
806 intptr_t i;
807
808 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
809 uint8_t sh = *(uint64_t *)(b + i) & 63;
810 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
811 }
812 clear_high(d, oprsz, desc);
813 }
814
815 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
816 {
817 intptr_t oprsz = simd_oprsz(desc);
818 intptr_t i;
819
820 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
821 uint8_t sh = *(uint8_t *)(b + i) & 7;
822 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
823 }
824 clear_high(d, oprsz, desc);
825 }
826
827 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
828 {
829 intptr_t oprsz = simd_oprsz(desc);
830 intptr_t i;
831
832 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
833 uint8_t sh = *(uint16_t *)(b + i) & 15;
834 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
835 }
836 clear_high(d, oprsz, desc);
837 }
838
839 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
840 {
841 intptr_t oprsz = simd_oprsz(desc);
842 intptr_t i;
843
844 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
845 uint8_t sh = *(uint32_t *)(b + i) & 31;
846 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
847 }
848 clear_high(d, oprsz, desc);
849 }
850
851 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
852 {
853 intptr_t oprsz = simd_oprsz(desc);
854 intptr_t i;
855
856 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
857 uint8_t sh = *(uint64_t *)(b + i) & 63;
858 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
859 }
860 clear_high(d, oprsz, desc);
861 }
862
863 #define DO_CMP1(NAME, TYPE, OP) \
864 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
865 { \
866 intptr_t oprsz = simd_oprsz(desc); \
867 intptr_t i; \
868 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
869 *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
870 } \
871 clear_high(d, oprsz, desc); \
872 }
873
874 #define DO_CMP2(SZ) \
875 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
876 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
877 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
878 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
879 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
880 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
881
882 DO_CMP2(8)
883 DO_CMP2(16)
884 DO_CMP2(32)
885 DO_CMP2(64)
886
887 #undef DO_CMP1
888 #undef DO_CMP2
889
890 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
891 {
892 intptr_t oprsz = simd_oprsz(desc);
893 intptr_t i;
894
895 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
896 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
897 if (r > INT8_MAX) {
898 r = INT8_MAX;
899 } else if (r < INT8_MIN) {
900 r = INT8_MIN;
901 }
902 *(int8_t *)(d + i) = r;
903 }
904 clear_high(d, oprsz, desc);
905 }
906
907 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
908 {
909 intptr_t oprsz = simd_oprsz(desc);
910 intptr_t i;
911
912 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
913 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
914 if (r > INT16_MAX) {
915 r = INT16_MAX;
916 } else if (r < INT16_MIN) {
917 r = INT16_MIN;
918 }
919 *(int16_t *)(d + i) = r;
920 }
921 clear_high(d, oprsz, desc);
922 }
923
924 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
925 {
926 intptr_t oprsz = simd_oprsz(desc);
927 intptr_t i;
928
929 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
930 int32_t ai = *(int32_t *)(a + i);
931 int32_t bi = *(int32_t *)(b + i);
932 int32_t di = ai + bi;
933 if (((di ^ ai) &~ (ai ^ bi)) < 0) {
934 /* Signed overflow. */
935 di = (di < 0 ? INT32_MAX : INT32_MIN);
936 }
937 *(int32_t *)(d + i) = di;
938 }
939 clear_high(d, oprsz, desc);
940 }
941
942 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
943 {
944 intptr_t oprsz = simd_oprsz(desc);
945 intptr_t i;
946
947 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
948 int64_t ai = *(int64_t *)(a + i);
949 int64_t bi = *(int64_t *)(b + i);
950 int64_t di = ai + bi;
951 if (((di ^ ai) &~ (ai ^ bi)) < 0) {
952 /* Signed overflow. */
953 di = (di < 0 ? INT64_MAX : INT64_MIN);
954 }
955 *(int64_t *)(d + i) = di;
956 }
957 clear_high(d, oprsz, desc);
958 }
959
960 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
961 {
962 intptr_t oprsz = simd_oprsz(desc);
963 intptr_t i;
964
965 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
966 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
967 if (r > INT8_MAX) {
968 r = INT8_MAX;
969 } else if (r < INT8_MIN) {
970 r = INT8_MIN;
971 }
972 *(uint8_t *)(d + i) = r;
973 }
974 clear_high(d, oprsz, desc);
975 }
976
977 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
978 {
979 intptr_t oprsz = simd_oprsz(desc);
980 intptr_t i;
981
982 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
983 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
984 if (r > INT16_MAX) {
985 r = INT16_MAX;
986 } else if (r < INT16_MIN) {
987 r = INT16_MIN;
988 }
989 *(int16_t *)(d + i) = r;
990 }
991 clear_high(d, oprsz, desc);
992 }
993
994 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
995 {
996 intptr_t oprsz = simd_oprsz(desc);
997 intptr_t i;
998
999 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1000 int32_t ai = *(int32_t *)(a + i);
1001 int32_t bi = *(int32_t *)(b + i);
1002 int32_t di = ai - bi;
1003 if (((di ^ ai) & (ai ^ bi)) < 0) {
1004 /* Signed overflow. */
1005 di = (di < 0 ? INT32_MAX : INT32_MIN);
1006 }
1007 *(int32_t *)(d + i) = di;
1008 }
1009 clear_high(d, oprsz, desc);
1010 }
1011
1012 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1013 {
1014 intptr_t oprsz = simd_oprsz(desc);
1015 intptr_t i;
1016
1017 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1018 int64_t ai = *(int64_t *)(a + i);
1019 int64_t bi = *(int64_t *)(b + i);
1020 int64_t di = ai - bi;
1021 if (((di ^ ai) & (ai ^ bi)) < 0) {
1022 /* Signed overflow. */
1023 di = (di < 0 ? INT64_MAX : INT64_MIN);
1024 }
1025 *(int64_t *)(d + i) = di;
1026 }
1027 clear_high(d, oprsz, desc);
1028 }
1029
1030 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1031 {
1032 intptr_t oprsz = simd_oprsz(desc);
1033 intptr_t i;
1034
1035 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1036 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1037 if (r > UINT8_MAX) {
1038 r = UINT8_MAX;
1039 }
1040 *(uint8_t *)(d + i) = r;
1041 }
1042 clear_high(d, oprsz, desc);
1043 }
1044
1045 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1046 {
1047 intptr_t oprsz = simd_oprsz(desc);
1048 intptr_t i;
1049
1050 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1051 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1052 if (r > UINT16_MAX) {
1053 r = UINT16_MAX;
1054 }
1055 *(uint16_t *)(d + i) = r;
1056 }
1057 clear_high(d, oprsz, desc);
1058 }
1059
1060 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1061 {
1062 intptr_t oprsz = simd_oprsz(desc);
1063 intptr_t i;
1064
1065 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1066 uint32_t ai = *(uint32_t *)(a + i);
1067 uint32_t bi = *(uint32_t *)(b + i);
1068 uint32_t di = ai + bi;
1069 if (di < ai) {
1070 di = UINT32_MAX;
1071 }
1072 *(uint32_t *)(d + i) = di;
1073 }
1074 clear_high(d, oprsz, desc);
1075 }
1076
1077 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1078 {
1079 intptr_t oprsz = simd_oprsz(desc);
1080 intptr_t i;
1081
1082 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1083 uint64_t ai = *(uint64_t *)(a + i);
1084 uint64_t bi = *(uint64_t *)(b + i);
1085 uint64_t di = ai + bi;
1086 if (di < ai) {
1087 di = UINT64_MAX;
1088 }
1089 *(uint64_t *)(d + i) = di;
1090 }
1091 clear_high(d, oprsz, desc);
1092 }
1093
1094 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1095 {
1096 intptr_t oprsz = simd_oprsz(desc);
1097 intptr_t i;
1098
1099 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1100 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1101 if (r < 0) {
1102 r = 0;
1103 }
1104 *(uint8_t *)(d + i) = r;
1105 }
1106 clear_high(d, oprsz, desc);
1107 }
1108
1109 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1110 {
1111 intptr_t oprsz = simd_oprsz(desc);
1112 intptr_t i;
1113
1114 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1115 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1116 if (r < 0) {
1117 r = 0;
1118 }
1119 *(uint16_t *)(d + i) = r;
1120 }
1121 clear_high(d, oprsz, desc);
1122 }
1123
1124 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1125 {
1126 intptr_t oprsz = simd_oprsz(desc);
1127 intptr_t i;
1128
1129 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1130 uint32_t ai = *(uint32_t *)(a + i);
1131 uint32_t bi = *(uint32_t *)(b + i);
1132 uint32_t di = ai - bi;
1133 if (ai < bi) {
1134 di = 0;
1135 }
1136 *(uint32_t *)(d + i) = di;
1137 }
1138 clear_high(d, oprsz, desc);
1139 }
1140
1141 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1142 {
1143 intptr_t oprsz = simd_oprsz(desc);
1144 intptr_t i;
1145
1146 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1147 uint64_t ai = *(uint64_t *)(a + i);
1148 uint64_t bi = *(uint64_t *)(b + i);
1149 uint64_t di = ai - bi;
1150 if (ai < bi) {
1151 di = 0;
1152 }
1153 *(uint64_t *)(d + i) = di;
1154 }
1155 clear_high(d, oprsz, desc);
1156 }
1157
1158 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1159 {
1160 intptr_t oprsz = simd_oprsz(desc);
1161 intptr_t i;
1162
1163 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1164 int8_t aa = *(int8_t *)(a + i);
1165 int8_t bb = *(int8_t *)(b + i);
1166 int8_t dd = aa < bb ? aa : bb;
1167 *(int8_t *)(d + i) = dd;
1168 }
1169 clear_high(d, oprsz, desc);
1170 }
1171
1172 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1173 {
1174 intptr_t oprsz = simd_oprsz(desc);
1175 intptr_t i;
1176
1177 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1178 int16_t aa = *(int16_t *)(a + i);
1179 int16_t bb = *(int16_t *)(b + i);
1180 int16_t dd = aa < bb ? aa : bb;
1181 *(int16_t *)(d + i) = dd;
1182 }
1183 clear_high(d, oprsz, desc);
1184 }
1185
1186 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1187 {
1188 intptr_t oprsz = simd_oprsz(desc);
1189 intptr_t i;
1190
1191 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1192 int32_t aa = *(int32_t *)(a + i);
1193 int32_t bb = *(int32_t *)(b + i);
1194 int32_t dd = aa < bb ? aa : bb;
1195 *(int32_t *)(d + i) = dd;
1196 }
1197 clear_high(d, oprsz, desc);
1198 }
1199
1200 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1201 {
1202 intptr_t oprsz = simd_oprsz(desc);
1203 intptr_t i;
1204
1205 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1206 int64_t aa = *(int64_t *)(a + i);
1207 int64_t bb = *(int64_t *)(b + i);
1208 int64_t dd = aa < bb ? aa : bb;
1209 *(int64_t *)(d + i) = dd;
1210 }
1211 clear_high(d, oprsz, desc);
1212 }
1213
1214 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1215 {
1216 intptr_t oprsz = simd_oprsz(desc);
1217 intptr_t i;
1218
1219 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1220 int8_t aa = *(int8_t *)(a + i);
1221 int8_t bb = *(int8_t *)(b + i);
1222 int8_t dd = aa > bb ? aa : bb;
1223 *(int8_t *)(d + i) = dd;
1224 }
1225 clear_high(d, oprsz, desc);
1226 }
1227
1228 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1229 {
1230 intptr_t oprsz = simd_oprsz(desc);
1231 intptr_t i;
1232
1233 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1234 int16_t aa = *(int16_t *)(a + i);
1235 int16_t bb = *(int16_t *)(b + i);
1236 int16_t dd = aa > bb ? aa : bb;
1237 *(int16_t *)(d + i) = dd;
1238 }
1239 clear_high(d, oprsz, desc);
1240 }
1241
1242 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1243 {
1244 intptr_t oprsz = simd_oprsz(desc);
1245 intptr_t i;
1246
1247 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1248 int32_t aa = *(int32_t *)(a + i);
1249 int32_t bb = *(int32_t *)(b + i);
1250 int32_t dd = aa > bb ? aa : bb;
1251 *(int32_t *)(d + i) = dd;
1252 }
1253 clear_high(d, oprsz, desc);
1254 }
1255
1256 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1257 {
1258 intptr_t oprsz = simd_oprsz(desc);
1259 intptr_t i;
1260
1261 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1262 int64_t aa = *(int64_t *)(a + i);
1263 int64_t bb = *(int64_t *)(b + i);
1264 int64_t dd = aa > bb ? aa : bb;
1265 *(int64_t *)(d + i) = dd;
1266 }
1267 clear_high(d, oprsz, desc);
1268 }
1269
1270 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1271 {
1272 intptr_t oprsz = simd_oprsz(desc);
1273 intptr_t i;
1274
1275 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1276 uint8_t aa = *(uint8_t *)(a + i);
1277 uint8_t bb = *(uint8_t *)(b + i);
1278 uint8_t dd = aa < bb ? aa : bb;
1279 *(uint8_t *)(d + i) = dd;
1280 }
1281 clear_high(d, oprsz, desc);
1282 }
1283
1284 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1285 {
1286 intptr_t oprsz = simd_oprsz(desc);
1287 intptr_t i;
1288
1289 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1290 uint16_t aa = *(uint16_t *)(a + i);
1291 uint16_t bb = *(uint16_t *)(b + i);
1292 uint16_t dd = aa < bb ? aa : bb;
1293 *(uint16_t *)(d + i) = dd;
1294 }
1295 clear_high(d, oprsz, desc);
1296 }
1297
1298 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1299 {
1300 intptr_t oprsz = simd_oprsz(desc);
1301 intptr_t i;
1302
1303 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1304 uint32_t aa = *(uint32_t *)(a + i);
1305 uint32_t bb = *(uint32_t *)(b + i);
1306 uint32_t dd = aa < bb ? aa : bb;
1307 *(uint32_t *)(d + i) = dd;
1308 }
1309 clear_high(d, oprsz, desc);
1310 }
1311
1312 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1313 {
1314 intptr_t oprsz = simd_oprsz(desc);
1315 intptr_t i;
1316
1317 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1318 uint64_t aa = *(uint64_t *)(a + i);
1319 uint64_t bb = *(uint64_t *)(b + i);
1320 uint64_t dd = aa < bb ? aa : bb;
1321 *(uint64_t *)(d + i) = dd;
1322 }
1323 clear_high(d, oprsz, desc);
1324 }
1325
1326 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1327 {
1328 intptr_t oprsz = simd_oprsz(desc);
1329 intptr_t i;
1330
1331 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1332 uint8_t aa = *(uint8_t *)(a + i);
1333 uint8_t bb = *(uint8_t *)(b + i);
1334 uint8_t dd = aa > bb ? aa : bb;
1335 *(uint8_t *)(d + i) = dd;
1336 }
1337 clear_high(d, oprsz, desc);
1338 }
1339
1340 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1341 {
1342 intptr_t oprsz = simd_oprsz(desc);
1343 intptr_t i;
1344
1345 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1346 uint16_t aa = *(uint16_t *)(a + i);
1347 uint16_t bb = *(uint16_t *)(b + i);
1348 uint16_t dd = aa > bb ? aa : bb;
1349 *(uint16_t *)(d + i) = dd;
1350 }
1351 clear_high(d, oprsz, desc);
1352 }
1353
1354 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1355 {
1356 intptr_t oprsz = simd_oprsz(desc);
1357 intptr_t i;
1358
1359 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1360 uint32_t aa = *(uint32_t *)(a + i);
1361 uint32_t bb = *(uint32_t *)(b + i);
1362 uint32_t dd = aa > bb ? aa : bb;
1363 *(uint32_t *)(d + i) = dd;
1364 }
1365 clear_high(d, oprsz, desc);
1366 }
1367
1368 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1369 {
1370 intptr_t oprsz = simd_oprsz(desc);
1371 intptr_t i;
1372
1373 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1374 uint64_t aa = *(uint64_t *)(a + i);
1375 uint64_t bb = *(uint64_t *)(b + i);
1376 uint64_t dd = aa > bb ? aa : bb;
1377 *(uint64_t *)(d + i) = dd;
1378 }
1379 clear_high(d, oprsz, desc);
1380 }
1381
1382 void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
1383 {
1384 intptr_t oprsz = simd_oprsz(desc);
1385 intptr_t i;
1386
1387 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1388 uint64_t aa = *(uint64_t *)(a + i);
1389 uint64_t bb = *(uint64_t *)(b + i);
1390 uint64_t cc = *(uint64_t *)(c + i);
1391 *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
1392 }
1393 clear_high(d, oprsz, desc);
1394 }