Merge tag 'for-upstream' of git://repo.or.cz/qemu/kevin into staging
[qemu.git] / tcg / tcg.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27
28 #include "qemu/osdep.h"
29
30 /* Define to jump the ELF file used to communicate with GDB. */
31 #undef DEBUG_JIT
32
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42 CPU definitions. Currently they are used for qemu_ld/st
43 instructions */
44 #define NO_CPU_IO_DEFS
45
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS ELFCLASS32
51 #else
52 # define ELF_CLASS ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA ELFDATA2MSB
56 #else
57 # define ELF_DATA ELFDATA2LSB
58 #endif
59
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64
65 #ifdef CONFIG_TCG_INTERPRETER
66 #include <ffi.h>
67 #endif
68
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70 used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74 intptr_t value, intptr_t addend);
75
76 /* The CIE and FDE header definitions will be common to all hosts. */
77 typedef struct {
78 uint32_t len __attribute__((aligned((sizeof(void *)))));
79 uint32_t id;
80 uint8_t version;
81 char augmentation[1];
82 uint8_t code_align;
83 uint8_t data_align;
84 uint8_t return_column;
85 } DebugFrameCIE;
86
87 typedef struct QEMU_PACKED {
88 uint32_t len __attribute__((aligned((sizeof(void *)))));
89 uint32_t cie_offset;
90 uintptr_t func_start;
91 uintptr_t func_len;
92 } DebugFrameFDEHeader;
93
94 typedef struct QEMU_PACKED {
95 DebugFrameCIE cie;
96 DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98
99 static void tcg_register_jit_int(const void *buf, size_t size,
100 const void *debug_frame,
101 size_t debug_frame_size)
102 __attribute__((unused));
103
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106 intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109 TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111 const TCGArg args[TCG_MAX_OP_ARGS],
112 const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115 TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121 unsigned vecl, unsigned vece,
122 const TCGArg args[TCG_MAX_OP_ARGS],
123 const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126 TCGReg dst, TCGReg src)
127 {
128 g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131 TCGReg dst, TCGReg base, intptr_t offset)
132 {
133 g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136 TCGReg dst, int64_t arg)
137 {
138 g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141 unsigned vecl, unsigned vece,
142 const TCGArg args[TCG_MAX_OP_ARGS],
143 const int const_args[TCG_MAX_OP_ARGS])
144 {
145 g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149 intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151 TCGReg base, intptr_t ofs);
152 #ifdef CONFIG_TCG_INTERPRETER
153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
154 ffi_cif *cif);
155 #else
156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
157 #endif
158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
159 #ifdef TCG_TARGET_NEED_LDST_LABELS
160 static int tcg_out_ldst_finalize(TCGContext *s);
161 #endif
162
163 TCGContext tcg_init_ctx;
164 __thread TCGContext *tcg_ctx;
165
166 TCGContext **tcg_ctxs;
167 unsigned int tcg_cur_ctxs;
168 unsigned int tcg_max_ctxs;
169 TCGv_env cpu_env = 0;
170 const void *tcg_code_gen_epilogue;
171 uintptr_t tcg_splitwx_diff;
172
173 #ifndef CONFIG_TCG_INTERPRETER
174 tcg_prologue_fn *tcg_qemu_tb_exec;
175 #endif
176
177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
178 static TCGRegSet tcg_target_call_clobber_regs;
179
180 #if TCG_TARGET_INSN_UNIT_SIZE == 1
181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
182 {
183 *s->code_ptr++ = v;
184 }
185
186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
187 uint8_t v)
188 {
189 *p = v;
190 }
191 #endif
192
193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
195 {
196 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
197 *s->code_ptr++ = v;
198 } else {
199 tcg_insn_unit *p = s->code_ptr;
200 memcpy(p, &v, sizeof(v));
201 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
202 }
203 }
204
205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
206 uint16_t v)
207 {
208 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
209 *p = v;
210 } else {
211 memcpy(p, &v, sizeof(v));
212 }
213 }
214 #endif
215
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
218 {
219 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
220 *s->code_ptr++ = v;
221 } else {
222 tcg_insn_unit *p = s->code_ptr;
223 memcpy(p, &v, sizeof(v));
224 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
225 }
226 }
227
228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
229 uint32_t v)
230 {
231 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
232 *p = v;
233 } else {
234 memcpy(p, &v, sizeof(v));
235 }
236 }
237 #endif
238
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
241 {
242 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
243 *s->code_ptr++ = v;
244 } else {
245 tcg_insn_unit *p = s->code_ptr;
246 memcpy(p, &v, sizeof(v));
247 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
248 }
249 }
250
251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
252 uint64_t v)
253 {
254 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
255 *p = v;
256 } else {
257 memcpy(p, &v, sizeof(v));
258 }
259 }
260 #endif
261
262 /* label relocation processing */
263
264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
265 TCGLabel *l, intptr_t addend)
266 {
267 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
268
269 r->type = type;
270 r->ptr = code_ptr;
271 r->addend = addend;
272 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
273 }
274
275 static void tcg_out_label(TCGContext *s, TCGLabel *l)
276 {
277 tcg_debug_assert(!l->has_value);
278 l->has_value = 1;
279 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
280 }
281
282 TCGLabel *gen_new_label(void)
283 {
284 TCGContext *s = tcg_ctx;
285 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
286
287 memset(l, 0, sizeof(TCGLabel));
288 l->id = s->nb_labels++;
289 QSIMPLEQ_INIT(&l->relocs);
290
291 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
292
293 return l;
294 }
295
296 static bool tcg_resolve_relocs(TCGContext *s)
297 {
298 TCGLabel *l;
299
300 QSIMPLEQ_FOREACH(l, &s->labels, next) {
301 TCGRelocation *r;
302 uintptr_t value = l->u.value;
303
304 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
305 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
306 return false;
307 }
308 }
309 }
310 return true;
311 }
312
313 static void set_jmp_reset_offset(TCGContext *s, int which)
314 {
315 /*
316 * We will check for overflow at the end of the opcode loop in
317 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
318 */
319 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
320 }
321
322 /* Signal overflow, starting over with fewer guest insns. */
323 static G_NORETURN
324 void tcg_raise_tb_overflow(TCGContext *s)
325 {
326 siglongjmp(s->jmp_trans, -2);
327 }
328
329 #define C_PFX1(P, A) P##A
330 #define C_PFX2(P, A, B) P##A##_##B
331 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
332 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
333 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
334 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
335
336 /* Define an enumeration for the various combinations. */
337
338 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
339 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
340 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
341 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
342
343 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
344 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
345 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
346 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
347
348 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
349
350 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
351 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
352 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
353 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
354
355 typedef enum {
356 #include "tcg-target-con-set.h"
357 } TCGConstraintSetIndex;
358
359 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
360
361 #undef C_O0_I1
362 #undef C_O0_I2
363 #undef C_O0_I3
364 #undef C_O0_I4
365 #undef C_O1_I1
366 #undef C_O1_I2
367 #undef C_O1_I3
368 #undef C_O1_I4
369 #undef C_N1_I2
370 #undef C_O2_I1
371 #undef C_O2_I2
372 #undef C_O2_I3
373 #undef C_O2_I4
374
375 /* Put all of the constraint sets into an array, indexed by the enum. */
376
377 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
378 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
379 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
380 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
381
382 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
383 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
384 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
385 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
386
387 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
388
389 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
390 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
391 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
392 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
393
394 static const TCGTargetOpDef constraint_sets[] = {
395 #include "tcg-target-con-set.h"
396 };
397
398
399 #undef C_O0_I1
400 #undef C_O0_I2
401 #undef C_O0_I3
402 #undef C_O0_I4
403 #undef C_O1_I1
404 #undef C_O1_I2
405 #undef C_O1_I3
406 #undef C_O1_I4
407 #undef C_N1_I2
408 #undef C_O2_I1
409 #undef C_O2_I2
410 #undef C_O2_I3
411 #undef C_O2_I4
412
413 /* Expand the enumerator to be returned from tcg_target_op_def(). */
414
415 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
416 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
417 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
418 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
419
420 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
421 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
422 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
423 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
424
425 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
426
427 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
428 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
429 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
430 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
431
432 #include "tcg-target.c.inc"
433
434 static void alloc_tcg_plugin_context(TCGContext *s)
435 {
436 #ifdef CONFIG_PLUGIN
437 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
438 s->plugin_tb->insns =
439 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
440 #endif
441 }
442
443 /*
444 * All TCG threads except the parent (i.e. the one that called tcg_context_init
445 * and registered the target's TCG globals) must register with this function
446 * before initiating translation.
447 *
448 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
449 * of tcg_region_init() for the reasoning behind this.
450 *
451 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
452 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
453 * is not used anymore for translation once this function is called.
454 *
455 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
456 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
457 */
458 #ifdef CONFIG_USER_ONLY
459 void tcg_register_thread(void)
460 {
461 tcg_ctx = &tcg_init_ctx;
462 }
463 #else
464 void tcg_register_thread(void)
465 {
466 TCGContext *s = g_malloc(sizeof(*s));
467 unsigned int i, n;
468
469 *s = tcg_init_ctx;
470
471 /* Relink mem_base. */
472 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
473 if (tcg_init_ctx.temps[i].mem_base) {
474 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
475 tcg_debug_assert(b >= 0 && b < n);
476 s->temps[i].mem_base = &s->temps[b];
477 }
478 }
479
480 /* Claim an entry in tcg_ctxs */
481 n = qatomic_fetch_inc(&tcg_cur_ctxs);
482 g_assert(n < tcg_max_ctxs);
483 qatomic_set(&tcg_ctxs[n], s);
484
485 if (n > 0) {
486 alloc_tcg_plugin_context(s);
487 tcg_region_initial_alloc(s);
488 }
489
490 tcg_ctx = s;
491 }
492 #endif /* !CONFIG_USER_ONLY */
493
494 /* pool based memory allocation */
495 void *tcg_malloc_internal(TCGContext *s, int size)
496 {
497 TCGPool *p;
498 int pool_size;
499
500 if (size > TCG_POOL_CHUNK_SIZE) {
501 /* big malloc: insert a new pool (XXX: could optimize) */
502 p = g_malloc(sizeof(TCGPool) + size);
503 p->size = size;
504 p->next = s->pool_first_large;
505 s->pool_first_large = p;
506 return p->data;
507 } else {
508 p = s->pool_current;
509 if (!p) {
510 p = s->pool_first;
511 if (!p)
512 goto new_pool;
513 } else {
514 if (!p->next) {
515 new_pool:
516 pool_size = TCG_POOL_CHUNK_SIZE;
517 p = g_malloc(sizeof(TCGPool) + pool_size);
518 p->size = pool_size;
519 p->next = NULL;
520 if (s->pool_current)
521 s->pool_current->next = p;
522 else
523 s->pool_first = p;
524 } else {
525 p = p->next;
526 }
527 }
528 }
529 s->pool_current = p;
530 s->pool_cur = p->data + size;
531 s->pool_end = p->data + p->size;
532 return p->data;
533 }
534
535 void tcg_pool_reset(TCGContext *s)
536 {
537 TCGPool *p, *t;
538 for (p = s->pool_first_large; p; p = t) {
539 t = p->next;
540 g_free(p);
541 }
542 s->pool_first_large = NULL;
543 s->pool_cur = s->pool_end = NULL;
544 s->pool_current = NULL;
545 }
546
547 #include "exec/helper-proto.h"
548
549 static const TCGHelperInfo all_helpers[] = {
550 #include "exec/helper-tcg.h"
551 };
552 static GHashTable *helper_table;
553
554 #ifdef CONFIG_TCG_INTERPRETER
555 static GHashTable *ffi_table;
556
557 static ffi_type * const typecode_to_ffi[8] = {
558 [dh_typecode_void] = &ffi_type_void,
559 [dh_typecode_i32] = &ffi_type_uint32,
560 [dh_typecode_s32] = &ffi_type_sint32,
561 [dh_typecode_i64] = &ffi_type_uint64,
562 [dh_typecode_s64] = &ffi_type_sint64,
563 [dh_typecode_ptr] = &ffi_type_pointer,
564 };
565 #endif
566
567 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
568 static void process_op_defs(TCGContext *s);
569 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
570 TCGReg reg, const char *name);
571
572 static void tcg_context_init(unsigned max_cpus)
573 {
574 TCGContext *s = &tcg_init_ctx;
575 int op, total_args, n, i;
576 TCGOpDef *def;
577 TCGArgConstraint *args_ct;
578 TCGTemp *ts;
579
580 memset(s, 0, sizeof(*s));
581 s->nb_globals = 0;
582
583 /* Count total number of arguments and allocate the corresponding
584 space */
585 total_args = 0;
586 for(op = 0; op < NB_OPS; op++) {
587 def = &tcg_op_defs[op];
588 n = def->nb_iargs + def->nb_oargs;
589 total_args += n;
590 }
591
592 args_ct = g_new0(TCGArgConstraint, total_args);
593
594 for(op = 0; op < NB_OPS; op++) {
595 def = &tcg_op_defs[op];
596 def->args_ct = args_ct;
597 n = def->nb_iargs + def->nb_oargs;
598 args_ct += n;
599 }
600
601 /* Register helpers. */
602 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
603 helper_table = g_hash_table_new(NULL, NULL);
604
605 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
606 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
607 (gpointer)&all_helpers[i]);
608 }
609
610 #ifdef CONFIG_TCG_INTERPRETER
611 /* g_direct_hash/equal for direct comparisons on uint32_t. */
612 ffi_table = g_hash_table_new(NULL, NULL);
613 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
614 struct {
615 ffi_cif cif;
616 ffi_type *args[];
617 } *ca;
618 uint32_t typemask = all_helpers[i].typemask;
619 gpointer hash = (gpointer)(uintptr_t)typemask;
620 ffi_status status;
621 int nargs;
622
623 if (g_hash_table_lookup(ffi_table, hash)) {
624 continue;
625 }
626
627 /* Ignoring the return type, find the last non-zero field. */
628 nargs = 32 - clz32(typemask >> 3);
629 nargs = DIV_ROUND_UP(nargs, 3);
630
631 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
632 ca->cif.rtype = typecode_to_ffi[typemask & 7];
633 ca->cif.nargs = nargs;
634
635 if (nargs != 0) {
636 ca->cif.arg_types = ca->args;
637 for (i = 0; i < nargs; ++i) {
638 int typecode = extract32(typemask, (i + 1) * 3, 3);
639 ca->args[i] = typecode_to_ffi[typecode];
640 }
641 }
642
643 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
644 ca->cif.rtype, ca->cif.arg_types);
645 assert(status == FFI_OK);
646
647 g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
648 }
649 #endif
650
651 tcg_target_init(s);
652 process_op_defs(s);
653
654 /* Reverse the order of the saved registers, assuming they're all at
655 the start of tcg_target_reg_alloc_order. */
656 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
657 int r = tcg_target_reg_alloc_order[n];
658 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
659 break;
660 }
661 }
662 for (i = 0; i < n; ++i) {
663 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
664 }
665 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
666 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
667 }
668
669 alloc_tcg_plugin_context(s);
670
671 tcg_ctx = s;
672 /*
673 * In user-mode we simply share the init context among threads, since we
674 * use a single region. See the documentation tcg_region_init() for the
675 * reasoning behind this.
676 * In softmmu we will have at most max_cpus TCG threads.
677 */
678 #ifdef CONFIG_USER_ONLY
679 tcg_ctxs = &tcg_ctx;
680 tcg_cur_ctxs = 1;
681 tcg_max_ctxs = 1;
682 #else
683 tcg_max_ctxs = max_cpus;
684 tcg_ctxs = g_new0(TCGContext *, max_cpus);
685 #endif
686
687 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
688 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
689 cpu_env = temp_tcgv_ptr(ts);
690 }
691
692 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
693 {
694 tcg_context_init(max_cpus);
695 tcg_region_init(tb_size, splitwx, max_cpus);
696 }
697
698 /*
699 * Allocate TBs right before their corresponding translated code, making
700 * sure that TBs and code are on different cache lines.
701 */
702 TranslationBlock *tcg_tb_alloc(TCGContext *s)
703 {
704 uintptr_t align = qemu_icache_linesize;
705 TranslationBlock *tb;
706 void *next;
707
708 retry:
709 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
710 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
711
712 if (unlikely(next > s->code_gen_highwater)) {
713 if (tcg_region_alloc(s)) {
714 return NULL;
715 }
716 goto retry;
717 }
718 qatomic_set(&s->code_gen_ptr, next);
719 s->data_gen_ptr = NULL;
720 return tb;
721 }
722
723 void tcg_prologue_init(TCGContext *s)
724 {
725 size_t prologue_size;
726
727 s->code_ptr = s->code_gen_ptr;
728 s->code_buf = s->code_gen_ptr;
729 s->data_gen_ptr = NULL;
730
731 #ifndef CONFIG_TCG_INTERPRETER
732 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
733 #endif
734
735 #ifdef TCG_TARGET_NEED_POOL_LABELS
736 s->pool_labels = NULL;
737 #endif
738
739 qemu_thread_jit_write();
740 /* Generate the prologue. */
741 tcg_target_qemu_prologue(s);
742
743 #ifdef TCG_TARGET_NEED_POOL_LABELS
744 /* Allow the prologue to put e.g. guest_base into a pool entry. */
745 {
746 int result = tcg_out_pool_finalize(s);
747 tcg_debug_assert(result == 0);
748 }
749 #endif
750
751 prologue_size = tcg_current_code_size(s);
752
753 #ifndef CONFIG_TCG_INTERPRETER
754 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
755 (uintptr_t)s->code_buf, prologue_size);
756 #endif
757
758 #ifdef DEBUG_DISAS
759 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
760 FILE *logfile = qemu_log_trylock();
761 if (logfile) {
762 fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
763 if (s->data_gen_ptr) {
764 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
765 size_t data_size = prologue_size - code_size;
766 size_t i;
767
768 disas(logfile, s->code_gen_ptr, code_size);
769
770 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
771 if (sizeof(tcg_target_ulong) == 8) {
772 fprintf(logfile,
773 "0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
774 (uintptr_t)s->data_gen_ptr + i,
775 *(uint64_t *)(s->data_gen_ptr + i));
776 } else {
777 fprintf(logfile,
778 "0x%08" PRIxPTR ": .long 0x%08x\n",
779 (uintptr_t)s->data_gen_ptr + i,
780 *(uint32_t *)(s->data_gen_ptr + i));
781 }
782 }
783 } else {
784 disas(logfile, s->code_gen_ptr, prologue_size);
785 }
786 fprintf(logfile, "\n");
787 qemu_log_unlock(logfile);
788 }
789 }
790 #endif
791
792 #ifndef CONFIG_TCG_INTERPRETER
793 /*
794 * Assert that goto_ptr is implemented completely, setting an epilogue.
795 * For tci, we use NULL as the signal to return from the interpreter,
796 * so skip this check.
797 */
798 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
799 #endif
800
801 tcg_region_prologue_set(s);
802 }
803
804 void tcg_func_start(TCGContext *s)
805 {
806 tcg_pool_reset(s);
807 s->nb_temps = s->nb_globals;
808
809 /* No temps have been previously allocated for size or locality. */
810 memset(s->free_temps, 0, sizeof(s->free_temps));
811
812 /* No constant temps have been previously allocated. */
813 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
814 if (s->const_table[i]) {
815 g_hash_table_remove_all(s->const_table[i]);
816 }
817 }
818
819 s->nb_ops = 0;
820 s->nb_labels = 0;
821 s->current_frame_offset = s->frame_start;
822
823 #ifdef CONFIG_DEBUG_TCG
824 s->goto_tb_issue_mask = 0;
825 #endif
826
827 QTAILQ_INIT(&s->ops);
828 QTAILQ_INIT(&s->free_ops);
829 QSIMPLEQ_INIT(&s->labels);
830 }
831
832 static TCGTemp *tcg_temp_alloc(TCGContext *s)
833 {
834 int n = s->nb_temps++;
835
836 if (n >= TCG_MAX_TEMPS) {
837 tcg_raise_tb_overflow(s);
838 }
839 return memset(&s->temps[n], 0, sizeof(TCGTemp));
840 }
841
842 static TCGTemp *tcg_global_alloc(TCGContext *s)
843 {
844 TCGTemp *ts;
845
846 tcg_debug_assert(s->nb_globals == s->nb_temps);
847 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
848 s->nb_globals++;
849 ts = tcg_temp_alloc(s);
850 ts->kind = TEMP_GLOBAL;
851
852 return ts;
853 }
854
855 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
856 TCGReg reg, const char *name)
857 {
858 TCGTemp *ts;
859
860 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
861 tcg_abort();
862 }
863
864 ts = tcg_global_alloc(s);
865 ts->base_type = type;
866 ts->type = type;
867 ts->kind = TEMP_FIXED;
868 ts->reg = reg;
869 ts->name = name;
870 tcg_regset_set_reg(s->reserved_regs, reg);
871
872 return ts;
873 }
874
875 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
876 {
877 s->frame_start = start;
878 s->frame_end = start + size;
879 s->frame_temp
880 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
881 }
882
883 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
884 intptr_t offset, const char *name)
885 {
886 TCGContext *s = tcg_ctx;
887 TCGTemp *base_ts = tcgv_ptr_temp(base);
888 TCGTemp *ts = tcg_global_alloc(s);
889 int indirect_reg = 0, bigendian = 0;
890 #if HOST_BIG_ENDIAN
891 bigendian = 1;
892 #endif
893
894 switch (base_ts->kind) {
895 case TEMP_FIXED:
896 break;
897 case TEMP_GLOBAL:
898 /* We do not support double-indirect registers. */
899 tcg_debug_assert(!base_ts->indirect_reg);
900 base_ts->indirect_base = 1;
901 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
902 ? 2 : 1);
903 indirect_reg = 1;
904 break;
905 default:
906 g_assert_not_reached();
907 }
908
909 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
910 TCGTemp *ts2 = tcg_global_alloc(s);
911 char buf[64];
912
913 ts->base_type = TCG_TYPE_I64;
914 ts->type = TCG_TYPE_I32;
915 ts->indirect_reg = indirect_reg;
916 ts->mem_allocated = 1;
917 ts->mem_base = base_ts;
918 ts->mem_offset = offset + bigendian * 4;
919 pstrcpy(buf, sizeof(buf), name);
920 pstrcat(buf, sizeof(buf), "_0");
921 ts->name = strdup(buf);
922
923 tcg_debug_assert(ts2 == ts + 1);
924 ts2->base_type = TCG_TYPE_I64;
925 ts2->type = TCG_TYPE_I32;
926 ts2->indirect_reg = indirect_reg;
927 ts2->mem_allocated = 1;
928 ts2->mem_base = base_ts;
929 ts2->mem_offset = offset + (1 - bigendian) * 4;
930 pstrcpy(buf, sizeof(buf), name);
931 pstrcat(buf, sizeof(buf), "_1");
932 ts2->name = strdup(buf);
933 } else {
934 ts->base_type = type;
935 ts->type = type;
936 ts->indirect_reg = indirect_reg;
937 ts->mem_allocated = 1;
938 ts->mem_base = base_ts;
939 ts->mem_offset = offset;
940 ts->name = name;
941 }
942 return ts;
943 }
944
945 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
946 {
947 TCGContext *s = tcg_ctx;
948 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
949 TCGTemp *ts;
950 int idx, k;
951
952 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
953 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
954 if (idx < TCG_MAX_TEMPS) {
955 /* There is already an available temp with the right type. */
956 clear_bit(idx, s->free_temps[k].l);
957
958 ts = &s->temps[idx];
959 ts->temp_allocated = 1;
960 tcg_debug_assert(ts->base_type == type);
961 tcg_debug_assert(ts->kind == kind);
962 } else {
963 ts = tcg_temp_alloc(s);
964 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
965 TCGTemp *ts2 = tcg_temp_alloc(s);
966
967 ts->base_type = type;
968 ts->type = TCG_TYPE_I32;
969 ts->temp_allocated = 1;
970 ts->kind = kind;
971
972 tcg_debug_assert(ts2 == ts + 1);
973 ts2->base_type = TCG_TYPE_I64;
974 ts2->type = TCG_TYPE_I32;
975 ts2->temp_allocated = 1;
976 ts2->kind = kind;
977 } else {
978 ts->base_type = type;
979 ts->type = type;
980 ts->temp_allocated = 1;
981 ts->kind = kind;
982 }
983 }
984
985 #if defined(CONFIG_DEBUG_TCG)
986 s->temps_in_use++;
987 #endif
988 return ts;
989 }
990
991 TCGv_vec tcg_temp_new_vec(TCGType type)
992 {
993 TCGTemp *t;
994
995 #ifdef CONFIG_DEBUG_TCG
996 switch (type) {
997 case TCG_TYPE_V64:
998 assert(TCG_TARGET_HAS_v64);
999 break;
1000 case TCG_TYPE_V128:
1001 assert(TCG_TARGET_HAS_v128);
1002 break;
1003 case TCG_TYPE_V256:
1004 assert(TCG_TARGET_HAS_v256);
1005 break;
1006 default:
1007 g_assert_not_reached();
1008 }
1009 #endif
1010
1011 t = tcg_temp_new_internal(type, 0);
1012 return temp_tcgv_vec(t);
1013 }
1014
1015 /* Create a new temp of the same type as an existing temp. */
1016 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1017 {
1018 TCGTemp *t = tcgv_vec_temp(match);
1019
1020 tcg_debug_assert(t->temp_allocated != 0);
1021
1022 t = tcg_temp_new_internal(t->base_type, 0);
1023 return temp_tcgv_vec(t);
1024 }
1025
1026 void tcg_temp_free_internal(TCGTemp *ts)
1027 {
1028 TCGContext *s = tcg_ctx;
1029 int k, idx;
1030
1031 switch (ts->kind) {
1032 case TEMP_CONST:
1033 /*
1034 * In order to simplify users of tcg_constant_*,
1035 * silently ignore free.
1036 */
1037 return;
1038 case TEMP_NORMAL:
1039 case TEMP_LOCAL:
1040 break;
1041 default:
1042 g_assert_not_reached();
1043 }
1044
1045 #if defined(CONFIG_DEBUG_TCG)
1046 s->temps_in_use--;
1047 if (s->temps_in_use < 0) {
1048 fprintf(stderr, "More temporaries freed than allocated!\n");
1049 }
1050 #endif
1051
1052 tcg_debug_assert(ts->temp_allocated != 0);
1053 ts->temp_allocated = 0;
1054
1055 idx = temp_idx(ts);
1056 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1057 set_bit(idx, s->free_temps[k].l);
1058 }
1059
1060 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1061 {
1062 TCGContext *s = tcg_ctx;
1063 GHashTable *h = s->const_table[type];
1064 TCGTemp *ts;
1065
1066 if (h == NULL) {
1067 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1068 s->const_table[type] = h;
1069 }
1070
1071 ts = g_hash_table_lookup(h, &val);
1072 if (ts == NULL) {
1073 ts = tcg_temp_alloc(s);
1074
1075 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1076 TCGTemp *ts2 = tcg_temp_alloc(s);
1077
1078 ts->base_type = TCG_TYPE_I64;
1079 ts->type = TCG_TYPE_I32;
1080 ts->kind = TEMP_CONST;
1081 ts->temp_allocated = 1;
1082 /*
1083 * Retain the full value of the 64-bit constant in the low
1084 * part, so that the hash table works. Actual uses will
1085 * truncate the value to the low part.
1086 */
1087 ts->val = val;
1088
1089 tcg_debug_assert(ts2 == ts + 1);
1090 ts2->base_type = TCG_TYPE_I64;
1091 ts2->type = TCG_TYPE_I32;
1092 ts2->kind = TEMP_CONST;
1093 ts2->temp_allocated = 1;
1094 ts2->val = val >> 32;
1095 } else {
1096 ts->base_type = type;
1097 ts->type = type;
1098 ts->kind = TEMP_CONST;
1099 ts->temp_allocated = 1;
1100 ts->val = val;
1101 }
1102 g_hash_table_insert(h, &ts->val, ts);
1103 }
1104
1105 return ts;
1106 }
1107
1108 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1109 {
1110 val = dup_const(vece, val);
1111 return temp_tcgv_vec(tcg_constant_internal(type, val));
1112 }
1113
1114 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1115 {
1116 TCGTemp *t = tcgv_vec_temp(match);
1117
1118 tcg_debug_assert(t->temp_allocated != 0);
1119 return tcg_constant_vec(t->base_type, vece, val);
1120 }
1121
1122 TCGv_i32 tcg_const_i32(int32_t val)
1123 {
1124 TCGv_i32 t0;
1125 t0 = tcg_temp_new_i32();
1126 tcg_gen_movi_i32(t0, val);
1127 return t0;
1128 }
1129
1130 TCGv_i64 tcg_const_i64(int64_t val)
1131 {
1132 TCGv_i64 t0;
1133 t0 = tcg_temp_new_i64();
1134 tcg_gen_movi_i64(t0, val);
1135 return t0;
1136 }
1137
1138 TCGv_i32 tcg_const_local_i32(int32_t val)
1139 {
1140 TCGv_i32 t0;
1141 t0 = tcg_temp_local_new_i32();
1142 tcg_gen_movi_i32(t0, val);
1143 return t0;
1144 }
1145
1146 TCGv_i64 tcg_const_local_i64(int64_t val)
1147 {
1148 TCGv_i64 t0;
1149 t0 = tcg_temp_local_new_i64();
1150 tcg_gen_movi_i64(t0, val);
1151 return t0;
1152 }
1153
1154 #if defined(CONFIG_DEBUG_TCG)
1155 void tcg_clear_temp_count(void)
1156 {
1157 TCGContext *s = tcg_ctx;
1158 s->temps_in_use = 0;
1159 }
1160
1161 int tcg_check_temp_count(void)
1162 {
1163 TCGContext *s = tcg_ctx;
1164 if (s->temps_in_use) {
1165 /* Clear the count so that we don't give another
1166 * warning immediately next time around.
1167 */
1168 s->temps_in_use = 0;
1169 return 1;
1170 }
1171 return 0;
1172 }
1173 #endif
1174
1175 /* Return true if OP may appear in the opcode stream.
1176 Test the runtime variable that controls each opcode. */
1177 bool tcg_op_supported(TCGOpcode op)
1178 {
1179 const bool have_vec
1180 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1181
1182 switch (op) {
1183 case INDEX_op_discard:
1184 case INDEX_op_set_label:
1185 case INDEX_op_call:
1186 case INDEX_op_br:
1187 case INDEX_op_mb:
1188 case INDEX_op_insn_start:
1189 case INDEX_op_exit_tb:
1190 case INDEX_op_goto_tb:
1191 case INDEX_op_goto_ptr:
1192 case INDEX_op_qemu_ld_i32:
1193 case INDEX_op_qemu_st_i32:
1194 case INDEX_op_qemu_ld_i64:
1195 case INDEX_op_qemu_st_i64:
1196 return true;
1197
1198 case INDEX_op_qemu_st8_i32:
1199 return TCG_TARGET_HAS_qemu_st8_i32;
1200
1201 case INDEX_op_mov_i32:
1202 case INDEX_op_setcond_i32:
1203 case INDEX_op_brcond_i32:
1204 case INDEX_op_ld8u_i32:
1205 case INDEX_op_ld8s_i32:
1206 case INDEX_op_ld16u_i32:
1207 case INDEX_op_ld16s_i32:
1208 case INDEX_op_ld_i32:
1209 case INDEX_op_st8_i32:
1210 case INDEX_op_st16_i32:
1211 case INDEX_op_st_i32:
1212 case INDEX_op_add_i32:
1213 case INDEX_op_sub_i32:
1214 case INDEX_op_mul_i32:
1215 case INDEX_op_and_i32:
1216 case INDEX_op_or_i32:
1217 case INDEX_op_xor_i32:
1218 case INDEX_op_shl_i32:
1219 case INDEX_op_shr_i32:
1220 case INDEX_op_sar_i32:
1221 return true;
1222
1223 case INDEX_op_movcond_i32:
1224 return TCG_TARGET_HAS_movcond_i32;
1225 case INDEX_op_div_i32:
1226 case INDEX_op_divu_i32:
1227 return TCG_TARGET_HAS_div_i32;
1228 case INDEX_op_rem_i32:
1229 case INDEX_op_remu_i32:
1230 return TCG_TARGET_HAS_rem_i32;
1231 case INDEX_op_div2_i32:
1232 case INDEX_op_divu2_i32:
1233 return TCG_TARGET_HAS_div2_i32;
1234 case INDEX_op_rotl_i32:
1235 case INDEX_op_rotr_i32:
1236 return TCG_TARGET_HAS_rot_i32;
1237 case INDEX_op_deposit_i32:
1238 return TCG_TARGET_HAS_deposit_i32;
1239 case INDEX_op_extract_i32:
1240 return TCG_TARGET_HAS_extract_i32;
1241 case INDEX_op_sextract_i32:
1242 return TCG_TARGET_HAS_sextract_i32;
1243 case INDEX_op_extract2_i32:
1244 return TCG_TARGET_HAS_extract2_i32;
1245 case INDEX_op_add2_i32:
1246 return TCG_TARGET_HAS_add2_i32;
1247 case INDEX_op_sub2_i32:
1248 return TCG_TARGET_HAS_sub2_i32;
1249 case INDEX_op_mulu2_i32:
1250 return TCG_TARGET_HAS_mulu2_i32;
1251 case INDEX_op_muls2_i32:
1252 return TCG_TARGET_HAS_muls2_i32;
1253 case INDEX_op_muluh_i32:
1254 return TCG_TARGET_HAS_muluh_i32;
1255 case INDEX_op_mulsh_i32:
1256 return TCG_TARGET_HAS_mulsh_i32;
1257 case INDEX_op_ext8s_i32:
1258 return TCG_TARGET_HAS_ext8s_i32;
1259 case INDEX_op_ext16s_i32:
1260 return TCG_TARGET_HAS_ext16s_i32;
1261 case INDEX_op_ext8u_i32:
1262 return TCG_TARGET_HAS_ext8u_i32;
1263 case INDEX_op_ext16u_i32:
1264 return TCG_TARGET_HAS_ext16u_i32;
1265 case INDEX_op_bswap16_i32:
1266 return TCG_TARGET_HAS_bswap16_i32;
1267 case INDEX_op_bswap32_i32:
1268 return TCG_TARGET_HAS_bswap32_i32;
1269 case INDEX_op_not_i32:
1270 return TCG_TARGET_HAS_not_i32;
1271 case INDEX_op_neg_i32:
1272 return TCG_TARGET_HAS_neg_i32;
1273 case INDEX_op_andc_i32:
1274 return TCG_TARGET_HAS_andc_i32;
1275 case INDEX_op_orc_i32:
1276 return TCG_TARGET_HAS_orc_i32;
1277 case INDEX_op_eqv_i32:
1278 return TCG_TARGET_HAS_eqv_i32;
1279 case INDEX_op_nand_i32:
1280 return TCG_TARGET_HAS_nand_i32;
1281 case INDEX_op_nor_i32:
1282 return TCG_TARGET_HAS_nor_i32;
1283 case INDEX_op_clz_i32:
1284 return TCG_TARGET_HAS_clz_i32;
1285 case INDEX_op_ctz_i32:
1286 return TCG_TARGET_HAS_ctz_i32;
1287 case INDEX_op_ctpop_i32:
1288 return TCG_TARGET_HAS_ctpop_i32;
1289
1290 case INDEX_op_brcond2_i32:
1291 case INDEX_op_setcond2_i32:
1292 return TCG_TARGET_REG_BITS == 32;
1293
1294 case INDEX_op_mov_i64:
1295 case INDEX_op_setcond_i64:
1296 case INDEX_op_brcond_i64:
1297 case INDEX_op_ld8u_i64:
1298 case INDEX_op_ld8s_i64:
1299 case INDEX_op_ld16u_i64:
1300 case INDEX_op_ld16s_i64:
1301 case INDEX_op_ld32u_i64:
1302 case INDEX_op_ld32s_i64:
1303 case INDEX_op_ld_i64:
1304 case INDEX_op_st8_i64:
1305 case INDEX_op_st16_i64:
1306 case INDEX_op_st32_i64:
1307 case INDEX_op_st_i64:
1308 case INDEX_op_add_i64:
1309 case INDEX_op_sub_i64:
1310 case INDEX_op_mul_i64:
1311 case INDEX_op_and_i64:
1312 case INDEX_op_or_i64:
1313 case INDEX_op_xor_i64:
1314 case INDEX_op_shl_i64:
1315 case INDEX_op_shr_i64:
1316 case INDEX_op_sar_i64:
1317 case INDEX_op_ext_i32_i64:
1318 case INDEX_op_extu_i32_i64:
1319 return TCG_TARGET_REG_BITS == 64;
1320
1321 case INDEX_op_movcond_i64:
1322 return TCG_TARGET_HAS_movcond_i64;
1323 case INDEX_op_div_i64:
1324 case INDEX_op_divu_i64:
1325 return TCG_TARGET_HAS_div_i64;
1326 case INDEX_op_rem_i64:
1327 case INDEX_op_remu_i64:
1328 return TCG_TARGET_HAS_rem_i64;
1329 case INDEX_op_div2_i64:
1330 case INDEX_op_divu2_i64:
1331 return TCG_TARGET_HAS_div2_i64;
1332 case INDEX_op_rotl_i64:
1333 case INDEX_op_rotr_i64:
1334 return TCG_TARGET_HAS_rot_i64;
1335 case INDEX_op_deposit_i64:
1336 return TCG_TARGET_HAS_deposit_i64;
1337 case INDEX_op_extract_i64:
1338 return TCG_TARGET_HAS_extract_i64;
1339 case INDEX_op_sextract_i64:
1340 return TCG_TARGET_HAS_sextract_i64;
1341 case INDEX_op_extract2_i64:
1342 return TCG_TARGET_HAS_extract2_i64;
1343 case INDEX_op_extrl_i64_i32:
1344 return TCG_TARGET_HAS_extrl_i64_i32;
1345 case INDEX_op_extrh_i64_i32:
1346 return TCG_TARGET_HAS_extrh_i64_i32;
1347 case INDEX_op_ext8s_i64:
1348 return TCG_TARGET_HAS_ext8s_i64;
1349 case INDEX_op_ext16s_i64:
1350 return TCG_TARGET_HAS_ext16s_i64;
1351 case INDEX_op_ext32s_i64:
1352 return TCG_TARGET_HAS_ext32s_i64;
1353 case INDEX_op_ext8u_i64:
1354 return TCG_TARGET_HAS_ext8u_i64;
1355 case INDEX_op_ext16u_i64:
1356 return TCG_TARGET_HAS_ext16u_i64;
1357 case INDEX_op_ext32u_i64:
1358 return TCG_TARGET_HAS_ext32u_i64;
1359 case INDEX_op_bswap16_i64:
1360 return TCG_TARGET_HAS_bswap16_i64;
1361 case INDEX_op_bswap32_i64:
1362 return TCG_TARGET_HAS_bswap32_i64;
1363 case INDEX_op_bswap64_i64:
1364 return TCG_TARGET_HAS_bswap64_i64;
1365 case INDEX_op_not_i64:
1366 return TCG_TARGET_HAS_not_i64;
1367 case INDEX_op_neg_i64:
1368 return TCG_TARGET_HAS_neg_i64;
1369 case INDEX_op_andc_i64:
1370 return TCG_TARGET_HAS_andc_i64;
1371 case INDEX_op_orc_i64:
1372 return TCG_TARGET_HAS_orc_i64;
1373 case INDEX_op_eqv_i64:
1374 return TCG_TARGET_HAS_eqv_i64;
1375 case INDEX_op_nand_i64:
1376 return TCG_TARGET_HAS_nand_i64;
1377 case INDEX_op_nor_i64:
1378 return TCG_TARGET_HAS_nor_i64;
1379 case INDEX_op_clz_i64:
1380 return TCG_TARGET_HAS_clz_i64;
1381 case INDEX_op_ctz_i64:
1382 return TCG_TARGET_HAS_ctz_i64;
1383 case INDEX_op_ctpop_i64:
1384 return TCG_TARGET_HAS_ctpop_i64;
1385 case INDEX_op_add2_i64:
1386 return TCG_TARGET_HAS_add2_i64;
1387 case INDEX_op_sub2_i64:
1388 return TCG_TARGET_HAS_sub2_i64;
1389 case INDEX_op_mulu2_i64:
1390 return TCG_TARGET_HAS_mulu2_i64;
1391 case INDEX_op_muls2_i64:
1392 return TCG_TARGET_HAS_muls2_i64;
1393 case INDEX_op_muluh_i64:
1394 return TCG_TARGET_HAS_muluh_i64;
1395 case INDEX_op_mulsh_i64:
1396 return TCG_TARGET_HAS_mulsh_i64;
1397
1398 case INDEX_op_mov_vec:
1399 case INDEX_op_dup_vec:
1400 case INDEX_op_dupm_vec:
1401 case INDEX_op_ld_vec:
1402 case INDEX_op_st_vec:
1403 case INDEX_op_add_vec:
1404 case INDEX_op_sub_vec:
1405 case INDEX_op_and_vec:
1406 case INDEX_op_or_vec:
1407 case INDEX_op_xor_vec:
1408 case INDEX_op_cmp_vec:
1409 return have_vec;
1410 case INDEX_op_dup2_vec:
1411 return have_vec && TCG_TARGET_REG_BITS == 32;
1412 case INDEX_op_not_vec:
1413 return have_vec && TCG_TARGET_HAS_not_vec;
1414 case INDEX_op_neg_vec:
1415 return have_vec && TCG_TARGET_HAS_neg_vec;
1416 case INDEX_op_abs_vec:
1417 return have_vec && TCG_TARGET_HAS_abs_vec;
1418 case INDEX_op_andc_vec:
1419 return have_vec && TCG_TARGET_HAS_andc_vec;
1420 case INDEX_op_orc_vec:
1421 return have_vec && TCG_TARGET_HAS_orc_vec;
1422 case INDEX_op_nand_vec:
1423 return have_vec && TCG_TARGET_HAS_nand_vec;
1424 case INDEX_op_nor_vec:
1425 return have_vec && TCG_TARGET_HAS_nor_vec;
1426 case INDEX_op_eqv_vec:
1427 return have_vec && TCG_TARGET_HAS_eqv_vec;
1428 case INDEX_op_mul_vec:
1429 return have_vec && TCG_TARGET_HAS_mul_vec;
1430 case INDEX_op_shli_vec:
1431 case INDEX_op_shri_vec:
1432 case INDEX_op_sari_vec:
1433 return have_vec && TCG_TARGET_HAS_shi_vec;
1434 case INDEX_op_shls_vec:
1435 case INDEX_op_shrs_vec:
1436 case INDEX_op_sars_vec:
1437 return have_vec && TCG_TARGET_HAS_shs_vec;
1438 case INDEX_op_shlv_vec:
1439 case INDEX_op_shrv_vec:
1440 case INDEX_op_sarv_vec:
1441 return have_vec && TCG_TARGET_HAS_shv_vec;
1442 case INDEX_op_rotli_vec:
1443 return have_vec && TCG_TARGET_HAS_roti_vec;
1444 case INDEX_op_rotls_vec:
1445 return have_vec && TCG_TARGET_HAS_rots_vec;
1446 case INDEX_op_rotlv_vec:
1447 case INDEX_op_rotrv_vec:
1448 return have_vec && TCG_TARGET_HAS_rotv_vec;
1449 case INDEX_op_ssadd_vec:
1450 case INDEX_op_usadd_vec:
1451 case INDEX_op_sssub_vec:
1452 case INDEX_op_ussub_vec:
1453 return have_vec && TCG_TARGET_HAS_sat_vec;
1454 case INDEX_op_smin_vec:
1455 case INDEX_op_umin_vec:
1456 case INDEX_op_smax_vec:
1457 case INDEX_op_umax_vec:
1458 return have_vec && TCG_TARGET_HAS_minmax_vec;
1459 case INDEX_op_bitsel_vec:
1460 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1461 case INDEX_op_cmpsel_vec:
1462 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1463
1464 default:
1465 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1466 return true;
1467 }
1468 }
1469
1470 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1471 and endian swap. Maybe it would be better to do the alignment
1472 and endian swap in tcg_reg_alloc_call(). */
1473 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1474 {
1475 int i, real_args, nb_rets, pi;
1476 unsigned typemask;
1477 const TCGHelperInfo *info;
1478 TCGOp *op;
1479
1480 info = g_hash_table_lookup(helper_table, (gpointer)func);
1481 typemask = info->typemask;
1482
1483 #ifdef CONFIG_PLUGIN
1484 /* detect non-plugin helpers */
1485 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1486 tcg_ctx->plugin_insn->calls_helpers = true;
1487 }
1488 #endif
1489
1490 #if defined(__sparc__) && !defined(__arch64__) \
1491 && !defined(CONFIG_TCG_INTERPRETER)
1492 /* We have 64-bit values in one register, but need to pass as two
1493 separate parameters. Split them. */
1494 int orig_typemask = typemask;
1495 int orig_nargs = nargs;
1496 TCGv_i64 retl, reth;
1497 TCGTemp *split_args[MAX_OPC_PARAM];
1498
1499 retl = NULL;
1500 reth = NULL;
1501 typemask = 0;
1502 for (i = real_args = 0; i < nargs; ++i) {
1503 int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1504 bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1505
1506 if (is_64bit) {
1507 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1508 TCGv_i32 h = tcg_temp_new_i32();
1509 TCGv_i32 l = tcg_temp_new_i32();
1510 tcg_gen_extr_i64_i32(l, h, orig);
1511 split_args[real_args++] = tcgv_i32_temp(h);
1512 typemask |= dh_typecode_i32 << (real_args * 3);
1513 split_args[real_args++] = tcgv_i32_temp(l);
1514 typemask |= dh_typecode_i32 << (real_args * 3);
1515 } else {
1516 split_args[real_args++] = args[i];
1517 typemask |= argtype << (real_args * 3);
1518 }
1519 }
1520 nargs = real_args;
1521 args = split_args;
1522 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1523 for (i = 0; i < nargs; ++i) {
1524 int argtype = extract32(typemask, (i + 1) * 3, 3);
1525 bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1526 bool is_signed = argtype & 1;
1527
1528 if (is_32bit) {
1529 TCGv_i64 temp = tcg_temp_new_i64();
1530 TCGv_i32 orig = temp_tcgv_i32(args[i]);
1531 if (is_signed) {
1532 tcg_gen_ext_i32_i64(temp, orig);
1533 } else {
1534 tcg_gen_extu_i32_i64(temp, orig);
1535 }
1536 args[i] = tcgv_i64_temp(temp);
1537 }
1538 }
1539 #endif /* TCG_TARGET_EXTEND_ARGS */
1540
1541 op = tcg_emit_op(INDEX_op_call);
1542
1543 pi = 0;
1544 if (ret != NULL) {
1545 #if defined(__sparc__) && !defined(__arch64__) \
1546 && !defined(CONFIG_TCG_INTERPRETER)
1547 if ((typemask & 6) == dh_typecode_i64) {
1548 /* The 32-bit ABI is going to return the 64-bit value in
1549 the %o0/%o1 register pair. Prepare for this by using
1550 two return temporaries, and reassemble below. */
1551 retl = tcg_temp_new_i64();
1552 reth = tcg_temp_new_i64();
1553 op->args[pi++] = tcgv_i64_arg(reth);
1554 op->args[pi++] = tcgv_i64_arg(retl);
1555 nb_rets = 2;
1556 } else {
1557 op->args[pi++] = temp_arg(ret);
1558 nb_rets = 1;
1559 }
1560 #else
1561 if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1562 #if HOST_BIG_ENDIAN
1563 op->args[pi++] = temp_arg(ret + 1);
1564 op->args[pi++] = temp_arg(ret);
1565 #else
1566 op->args[pi++] = temp_arg(ret);
1567 op->args[pi++] = temp_arg(ret + 1);
1568 #endif
1569 nb_rets = 2;
1570 } else {
1571 op->args[pi++] = temp_arg(ret);
1572 nb_rets = 1;
1573 }
1574 #endif
1575 } else {
1576 nb_rets = 0;
1577 }
1578 TCGOP_CALLO(op) = nb_rets;
1579
1580 real_args = 0;
1581 for (i = 0; i < nargs; i++) {
1582 int argtype = extract32(typemask, (i + 1) * 3, 3);
1583 bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1584 bool want_align = false;
1585
1586 #if defined(CONFIG_TCG_INTERPRETER)
1587 /*
1588 * Align all arguments, so that they land in predictable places
1589 * for passing off to ffi_call.
1590 */
1591 want_align = true;
1592 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1593 /* Some targets want aligned 64 bit args */
1594 want_align = is_64bit;
1595 #endif
1596
1597 if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1598 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1599 real_args++;
1600 }
1601
1602 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1603 /*
1604 * If stack grows up, then we will be placing successive
1605 * arguments at lower addresses, which means we need to
1606 * reverse the order compared to how we would normally
1607 * treat either big or little-endian. For those arguments
1608 * that will wind up in registers, this still works for
1609 * HPPA (the only current STACK_GROWSUP target) since the
1610 * argument registers are *also* allocated in decreasing
1611 * order. If another such target is added, this logic may
1612 * have to get more complicated to differentiate between
1613 * stack arguments and register arguments.
1614 */
1615 #if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP)
1616 op->args[pi++] = temp_arg(args[i] + 1);
1617 op->args[pi++] = temp_arg(args[i]);
1618 #else
1619 op->args[pi++] = temp_arg(args[i]);
1620 op->args[pi++] = temp_arg(args[i] + 1);
1621 #endif
1622 real_args += 2;
1623 continue;
1624 }
1625
1626 op->args[pi++] = temp_arg(args[i]);
1627 real_args++;
1628 }
1629 op->args[pi++] = (uintptr_t)func;
1630 op->args[pi++] = (uintptr_t)info;
1631 TCGOP_CALLI(op) = real_args;
1632
1633 /* Make sure the fields didn't overflow. */
1634 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1635 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1636
1637 #if defined(__sparc__) && !defined(__arch64__) \
1638 && !defined(CONFIG_TCG_INTERPRETER)
1639 /* Free all of the parts we allocated above. */
1640 for (i = real_args = 0; i < orig_nargs; ++i) {
1641 int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1642 bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1643
1644 if (is_64bit) {
1645 tcg_temp_free_internal(args[real_args++]);
1646 tcg_temp_free_internal(args[real_args++]);
1647 } else {
1648 real_args++;
1649 }
1650 }
1651 if ((orig_typemask & 6) == dh_typecode_i64) {
1652 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1653 Note that describing these as TCGv_i64 eliminates an unnecessary
1654 zero-extension that tcg_gen_concat_i32_i64 would create. */
1655 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1656 tcg_temp_free_i64(retl);
1657 tcg_temp_free_i64(reth);
1658 }
1659 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1660 for (i = 0; i < nargs; ++i) {
1661 int argtype = extract32(typemask, (i + 1) * 3, 3);
1662 bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1663
1664 if (is_32bit) {
1665 tcg_temp_free_internal(args[i]);
1666 }
1667 }
1668 #endif /* TCG_TARGET_EXTEND_ARGS */
1669 }
1670
1671 static void tcg_reg_alloc_start(TCGContext *s)
1672 {
1673 int i, n;
1674
1675 for (i = 0, n = s->nb_temps; i < n; i++) {
1676 TCGTemp *ts = &s->temps[i];
1677 TCGTempVal val = TEMP_VAL_MEM;
1678
1679 switch (ts->kind) {
1680 case TEMP_CONST:
1681 val = TEMP_VAL_CONST;
1682 break;
1683 case TEMP_FIXED:
1684 val = TEMP_VAL_REG;
1685 break;
1686 case TEMP_GLOBAL:
1687 break;
1688 case TEMP_NORMAL:
1689 case TEMP_EBB:
1690 val = TEMP_VAL_DEAD;
1691 /* fall through */
1692 case TEMP_LOCAL:
1693 ts->mem_allocated = 0;
1694 break;
1695 default:
1696 g_assert_not_reached();
1697 }
1698 ts->val_type = val;
1699 }
1700
1701 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1702 }
1703
1704 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1705 TCGTemp *ts)
1706 {
1707 int idx = temp_idx(ts);
1708
1709 switch (ts->kind) {
1710 case TEMP_FIXED:
1711 case TEMP_GLOBAL:
1712 pstrcpy(buf, buf_size, ts->name);
1713 break;
1714 case TEMP_LOCAL:
1715 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1716 break;
1717 case TEMP_EBB:
1718 snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1719 break;
1720 case TEMP_NORMAL:
1721 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1722 break;
1723 case TEMP_CONST:
1724 switch (ts->type) {
1725 case TCG_TYPE_I32:
1726 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1727 break;
1728 #if TCG_TARGET_REG_BITS > 32
1729 case TCG_TYPE_I64:
1730 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1731 break;
1732 #endif
1733 case TCG_TYPE_V64:
1734 case TCG_TYPE_V128:
1735 case TCG_TYPE_V256:
1736 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1737 64 << (ts->type - TCG_TYPE_V64), ts->val);
1738 break;
1739 default:
1740 g_assert_not_reached();
1741 }
1742 break;
1743 }
1744 return buf;
1745 }
1746
1747 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1748 int buf_size, TCGArg arg)
1749 {
1750 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1751 }
1752
1753 static const char * const cond_name[] =
1754 {
1755 [TCG_COND_NEVER] = "never",
1756 [TCG_COND_ALWAYS] = "always",
1757 [TCG_COND_EQ] = "eq",
1758 [TCG_COND_NE] = "ne",
1759 [TCG_COND_LT] = "lt",
1760 [TCG_COND_GE] = "ge",
1761 [TCG_COND_LE] = "le",
1762 [TCG_COND_GT] = "gt",
1763 [TCG_COND_LTU] = "ltu",
1764 [TCG_COND_GEU] = "geu",
1765 [TCG_COND_LEU] = "leu",
1766 [TCG_COND_GTU] = "gtu"
1767 };
1768
1769 static const char * const ldst_name[] =
1770 {
1771 [MO_UB] = "ub",
1772 [MO_SB] = "sb",
1773 [MO_LEUW] = "leuw",
1774 [MO_LESW] = "lesw",
1775 [MO_LEUL] = "leul",
1776 [MO_LESL] = "lesl",
1777 [MO_LEUQ] = "leq",
1778 [MO_BEUW] = "beuw",
1779 [MO_BESW] = "besw",
1780 [MO_BEUL] = "beul",
1781 [MO_BESL] = "besl",
1782 [MO_BEUQ] = "beq",
1783 };
1784
1785 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1786 #ifdef TARGET_ALIGNED_ONLY
1787 [MO_UNALN >> MO_ASHIFT] = "un+",
1788 [MO_ALIGN >> MO_ASHIFT] = "",
1789 #else
1790 [MO_UNALN >> MO_ASHIFT] = "",
1791 [MO_ALIGN >> MO_ASHIFT] = "al+",
1792 #endif
1793 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1794 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1795 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1796 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1797 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1798 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1799 };
1800
1801 static const char bswap_flag_name[][6] = {
1802 [TCG_BSWAP_IZ] = "iz",
1803 [TCG_BSWAP_OZ] = "oz",
1804 [TCG_BSWAP_OS] = "os",
1805 [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1806 [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1807 };
1808
1809 static inline bool tcg_regset_single(TCGRegSet d)
1810 {
1811 return (d & (d - 1)) == 0;
1812 }
1813
1814 static inline TCGReg tcg_regset_first(TCGRegSet d)
1815 {
1816 if (TCG_TARGET_NB_REGS <= 32) {
1817 return ctz32(d);
1818 } else {
1819 return ctz64(d);
1820 }
1821 }
1822
1823 /* Return only the number of characters output -- no error return. */
1824 #define ne_fprintf(...) \
1825 ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1826
1827 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1828 {
1829 char buf[128];
1830 TCGOp *op;
1831
1832 QTAILQ_FOREACH(op, &s->ops, link) {
1833 int i, k, nb_oargs, nb_iargs, nb_cargs;
1834 const TCGOpDef *def;
1835 TCGOpcode c;
1836 int col = 0;
1837
1838 c = op->opc;
1839 def = &tcg_op_defs[c];
1840
1841 if (c == INDEX_op_insn_start) {
1842 nb_oargs = 0;
1843 col += ne_fprintf(f, "\n ----");
1844
1845 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1846 target_ulong a;
1847 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1848 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1849 #else
1850 a = op->args[i];
1851 #endif
1852 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
1853 }
1854 } else if (c == INDEX_op_call) {
1855 const TCGHelperInfo *info = tcg_call_info(op);
1856 void *func = tcg_call_func(op);
1857
1858 /* variable number of arguments */
1859 nb_oargs = TCGOP_CALLO(op);
1860 nb_iargs = TCGOP_CALLI(op);
1861 nb_cargs = def->nb_cargs;
1862
1863 col += ne_fprintf(f, " %s ", def->name);
1864
1865 /*
1866 * Print the function name from TCGHelperInfo, if available.
1867 * Note that plugins have a template function for the info,
1868 * but the actual function pointer comes from the plugin.
1869 */
1870 if (func == info->func) {
1871 col += ne_fprintf(f, "%s", info->name);
1872 } else {
1873 col += ne_fprintf(f, "plugin(%p)", func);
1874 }
1875
1876 col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
1877 for (i = 0; i < nb_oargs; i++) {
1878 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1879 op->args[i]));
1880 }
1881 for (i = 0; i < nb_iargs; i++) {
1882 TCGArg arg = op->args[nb_oargs + i];
1883 const char *t = "<dummy>";
1884 if (arg != TCG_CALL_DUMMY_ARG) {
1885 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1886 }
1887 col += ne_fprintf(f, ",%s", t);
1888 }
1889 } else {
1890 col += ne_fprintf(f, " %s ", def->name);
1891
1892 nb_oargs = def->nb_oargs;
1893 nb_iargs = def->nb_iargs;
1894 nb_cargs = def->nb_cargs;
1895
1896 if (def->flags & TCG_OPF_VECTOR) {
1897 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
1898 8 << TCGOP_VECE(op));
1899 }
1900
1901 k = 0;
1902 for (i = 0; i < nb_oargs; i++) {
1903 const char *sep = k ? "," : "";
1904 col += ne_fprintf(f, "%s%s", sep,
1905 tcg_get_arg_str(s, buf, sizeof(buf),
1906 op->args[k++]));
1907 }
1908 for (i = 0; i < nb_iargs; i++) {
1909 const char *sep = k ? "," : "";
1910 col += ne_fprintf(f, "%s%s", sep,
1911 tcg_get_arg_str(s, buf, sizeof(buf),
1912 op->args[k++]));
1913 }
1914 switch (c) {
1915 case INDEX_op_brcond_i32:
1916 case INDEX_op_setcond_i32:
1917 case INDEX_op_movcond_i32:
1918 case INDEX_op_brcond2_i32:
1919 case INDEX_op_setcond2_i32:
1920 case INDEX_op_brcond_i64:
1921 case INDEX_op_setcond_i64:
1922 case INDEX_op_movcond_i64:
1923 case INDEX_op_cmp_vec:
1924 case INDEX_op_cmpsel_vec:
1925 if (op->args[k] < ARRAY_SIZE(cond_name)
1926 && cond_name[op->args[k]]) {
1927 col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
1928 } else {
1929 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
1930 }
1931 i = 1;
1932 break;
1933 case INDEX_op_qemu_ld_i32:
1934 case INDEX_op_qemu_st_i32:
1935 case INDEX_op_qemu_st8_i32:
1936 case INDEX_op_qemu_ld_i64:
1937 case INDEX_op_qemu_st_i64:
1938 {
1939 MemOpIdx oi = op->args[k++];
1940 MemOp op = get_memop(oi);
1941 unsigned ix = get_mmuidx(oi);
1942
1943 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1944 col += ne_fprintf(f, ",$0x%x,%u", op, ix);
1945 } else {
1946 const char *s_al, *s_op;
1947 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1948 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1949 col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
1950 }
1951 i = 1;
1952 }
1953 break;
1954 case INDEX_op_bswap16_i32:
1955 case INDEX_op_bswap16_i64:
1956 case INDEX_op_bswap32_i32:
1957 case INDEX_op_bswap32_i64:
1958 case INDEX_op_bswap64_i64:
1959 {
1960 TCGArg flags = op->args[k];
1961 const char *name = NULL;
1962
1963 if (flags < ARRAY_SIZE(bswap_flag_name)) {
1964 name = bswap_flag_name[flags];
1965 }
1966 if (name) {
1967 col += ne_fprintf(f, ",%s", name);
1968 } else {
1969 col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
1970 }
1971 i = k = 1;
1972 }
1973 break;
1974 default:
1975 i = 0;
1976 break;
1977 }
1978 switch (c) {
1979 case INDEX_op_set_label:
1980 case INDEX_op_br:
1981 case INDEX_op_brcond_i32:
1982 case INDEX_op_brcond_i64:
1983 case INDEX_op_brcond2_i32:
1984 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
1985 arg_label(op->args[k])->id);
1986 i++, k++;
1987 break;
1988 default:
1989 break;
1990 }
1991 for (; i < nb_cargs; i++, k++) {
1992 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
1993 op->args[k]);
1994 }
1995 }
1996
1997 if (have_prefs || op->life) {
1998 for (; col < 40; ++col) {
1999 putc(' ', f);
2000 }
2001 }
2002
2003 if (op->life) {
2004 unsigned life = op->life;
2005
2006 if (life & (SYNC_ARG * 3)) {
2007 ne_fprintf(f, " sync:");
2008 for (i = 0; i < 2; ++i) {
2009 if (life & (SYNC_ARG << i)) {
2010 ne_fprintf(f, " %d", i);
2011 }
2012 }
2013 }
2014 life /= DEAD_ARG;
2015 if (life) {
2016 ne_fprintf(f, " dead:");
2017 for (i = 0; life; ++i, life >>= 1) {
2018 if (life & 1) {
2019 ne_fprintf(f, " %d", i);
2020 }
2021 }
2022 }
2023 }
2024
2025 if (have_prefs) {
2026 for (i = 0; i < nb_oargs; ++i) {
2027 TCGRegSet set = op->output_pref[i];
2028
2029 if (i == 0) {
2030 ne_fprintf(f, " pref=");
2031 } else {
2032 ne_fprintf(f, ",");
2033 }
2034 if (set == 0) {
2035 ne_fprintf(f, "none");
2036 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2037 ne_fprintf(f, "all");
2038 #ifdef CONFIG_DEBUG_TCG
2039 } else if (tcg_regset_single(set)) {
2040 TCGReg reg = tcg_regset_first(set);
2041 ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2042 #endif
2043 } else if (TCG_TARGET_NB_REGS <= 32) {
2044 ne_fprintf(f, "0x%x", (uint32_t)set);
2045 } else {
2046 ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2047 }
2048 }
2049 }
2050
2051 putc('\n', f);
2052 }
2053 }
2054
2055 /* we give more priority to constraints with less registers */
2056 static int get_constraint_priority(const TCGOpDef *def, int k)
2057 {
2058 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2059 int n;
2060
2061 if (arg_ct->oalias) {
2062 /* an alias is equivalent to a single register */
2063 n = 1;
2064 } else {
2065 n = ctpop64(arg_ct->regs);
2066 }
2067 return TCG_TARGET_NB_REGS - n + 1;
2068 }
2069
2070 /* sort from highest priority to lowest */
2071 static void sort_constraints(TCGOpDef *def, int start, int n)
2072 {
2073 int i, j;
2074 TCGArgConstraint *a = def->args_ct;
2075
2076 for (i = 0; i < n; i++) {
2077 a[start + i].sort_index = start + i;
2078 }
2079 if (n <= 1) {
2080 return;
2081 }
2082 for (i = 0; i < n - 1; i++) {
2083 for (j = i + 1; j < n; j++) {
2084 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2085 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2086 if (p1 < p2) {
2087 int tmp = a[start + i].sort_index;
2088 a[start + i].sort_index = a[start + j].sort_index;
2089 a[start + j].sort_index = tmp;
2090 }
2091 }
2092 }
2093 }
2094
2095 static void process_op_defs(TCGContext *s)
2096 {
2097 TCGOpcode op;
2098
2099 for (op = 0; op < NB_OPS; op++) {
2100 TCGOpDef *def = &tcg_op_defs[op];
2101 const TCGTargetOpDef *tdefs;
2102 int i, nb_args;
2103
2104 if (def->flags & TCG_OPF_NOT_PRESENT) {
2105 continue;
2106 }
2107
2108 nb_args = def->nb_iargs + def->nb_oargs;
2109 if (nb_args == 0) {
2110 continue;
2111 }
2112
2113 /*
2114 * Macro magic should make it impossible, but double-check that
2115 * the array index is in range. Since the signness of an enum
2116 * is implementation defined, force the result to unsigned.
2117 */
2118 unsigned con_set = tcg_target_op_def(op);
2119 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2120 tdefs = &constraint_sets[con_set];
2121
2122 for (i = 0; i < nb_args; i++) {
2123 const char *ct_str = tdefs->args_ct_str[i];
2124 /* Incomplete TCGTargetOpDef entry. */
2125 tcg_debug_assert(ct_str != NULL);
2126
2127 while (*ct_str != '\0') {
2128 switch(*ct_str) {
2129 case '0' ... '9':
2130 {
2131 int oarg = *ct_str - '0';
2132 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2133 tcg_debug_assert(oarg < def->nb_oargs);
2134 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2135 def->args_ct[i] = def->args_ct[oarg];
2136 /* The output sets oalias. */
2137 def->args_ct[oarg].oalias = true;
2138 def->args_ct[oarg].alias_index = i;
2139 /* The input sets ialias. */
2140 def->args_ct[i].ialias = true;
2141 def->args_ct[i].alias_index = oarg;
2142 }
2143 ct_str++;
2144 break;
2145 case '&':
2146 def->args_ct[i].newreg = true;
2147 ct_str++;
2148 break;
2149 case 'i':
2150 def->args_ct[i].ct |= TCG_CT_CONST;
2151 ct_str++;
2152 break;
2153
2154 /* Include all of the target-specific constraints. */
2155
2156 #undef CONST
2157 #define CONST(CASE, MASK) \
2158 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2159 #define REGS(CASE, MASK) \
2160 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2161
2162 #include "tcg-target-con-str.h"
2163
2164 #undef REGS
2165 #undef CONST
2166 default:
2167 /* Typo in TCGTargetOpDef constraint. */
2168 g_assert_not_reached();
2169 }
2170 }
2171 }
2172
2173 /* TCGTargetOpDef entry with too much information? */
2174 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2175
2176 /* sort the constraints (XXX: this is just an heuristic) */
2177 sort_constraints(def, 0, def->nb_oargs);
2178 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2179 }
2180 }
2181
2182 void tcg_op_remove(TCGContext *s, TCGOp *op)
2183 {
2184 TCGLabel *label;
2185
2186 switch (op->opc) {
2187 case INDEX_op_br:
2188 label = arg_label(op->args[0]);
2189 label->refs--;
2190 break;
2191 case INDEX_op_brcond_i32:
2192 case INDEX_op_brcond_i64:
2193 label = arg_label(op->args[3]);
2194 label->refs--;
2195 break;
2196 case INDEX_op_brcond2_i32:
2197 label = arg_label(op->args[5]);
2198 label->refs--;
2199 break;
2200 default:
2201 break;
2202 }
2203
2204 QTAILQ_REMOVE(&s->ops, op, link);
2205 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2206 s->nb_ops--;
2207
2208 #ifdef CONFIG_PROFILER
2209 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2210 #endif
2211 }
2212
2213 void tcg_remove_ops_after(TCGOp *op)
2214 {
2215 TCGContext *s = tcg_ctx;
2216
2217 while (true) {
2218 TCGOp *last = tcg_last_op();
2219 if (last == op) {
2220 return;
2221 }
2222 tcg_op_remove(s, last);
2223 }
2224 }
2225
2226 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2227 {
2228 TCGContext *s = tcg_ctx;
2229 TCGOp *op;
2230
2231 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2232 op = tcg_malloc(sizeof(TCGOp));
2233 } else {
2234 op = QTAILQ_FIRST(&s->free_ops);
2235 QTAILQ_REMOVE(&s->free_ops, op, link);
2236 }
2237 memset(op, 0, offsetof(TCGOp, link));
2238 op->opc = opc;
2239 s->nb_ops++;
2240
2241 return op;
2242 }
2243
2244 TCGOp *tcg_emit_op(TCGOpcode opc)
2245 {
2246 TCGOp *op = tcg_op_alloc(opc);
2247 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2248 return op;
2249 }
2250
2251 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2252 {
2253 TCGOp *new_op = tcg_op_alloc(opc);
2254 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2255 return new_op;
2256 }
2257
2258 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2259 {
2260 TCGOp *new_op = tcg_op_alloc(opc);
2261 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2262 return new_op;
2263 }
2264
2265 /* Reachable analysis : remove unreachable code. */
2266 static void reachable_code_pass(TCGContext *s)
2267 {
2268 TCGOp *op, *op_next;
2269 bool dead = false;
2270
2271 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2272 bool remove = dead;
2273 TCGLabel *label;
2274
2275 switch (op->opc) {
2276 case INDEX_op_set_label:
2277 label = arg_label(op->args[0]);
2278 if (label->refs == 0) {
2279 /*
2280 * While there is an occasional backward branch, virtually
2281 * all branches generated by the translators are forward.
2282 * Which means that generally we will have already removed
2283 * all references to the label that will be, and there is
2284 * little to be gained by iterating.
2285 */
2286 remove = true;
2287 } else {
2288 /* Once we see a label, insns become live again. */
2289 dead = false;
2290 remove = false;
2291
2292 /*
2293 * Optimization can fold conditional branches to unconditional.
2294 * If we find a label with one reference which is preceded by
2295 * an unconditional branch to it, remove both. This needed to
2296 * wait until the dead code in between them was removed.
2297 */
2298 if (label->refs == 1) {
2299 TCGOp *op_prev = QTAILQ_PREV(op, link);
2300 if (op_prev->opc == INDEX_op_br &&
2301 label == arg_label(op_prev->args[0])) {
2302 tcg_op_remove(s, op_prev);
2303 remove = true;
2304 }
2305 }
2306 }
2307 break;
2308
2309 case INDEX_op_br:
2310 case INDEX_op_exit_tb:
2311 case INDEX_op_goto_ptr:
2312 /* Unconditional branches; everything following is dead. */
2313 dead = true;
2314 break;
2315
2316 case INDEX_op_call:
2317 /* Notice noreturn helper calls, raising exceptions. */
2318 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2319 dead = true;
2320 }
2321 break;
2322
2323 case INDEX_op_insn_start:
2324 /* Never remove -- we need to keep these for unwind. */
2325 remove = false;
2326 break;
2327
2328 default:
2329 break;
2330 }
2331
2332 if (remove) {
2333 tcg_op_remove(s, op);
2334 }
2335 }
2336 }
2337
2338 #define TS_DEAD 1
2339 #define TS_MEM 2
2340
2341 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2342 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2343
2344 /* For liveness_pass_1, the register preferences for a given temp. */
2345 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2346 {
2347 return ts->state_ptr;
2348 }
2349
2350 /* For liveness_pass_1, reset the preferences for a given temp to the
2351 * maximal regset for its type.
2352 */
2353 static inline void la_reset_pref(TCGTemp *ts)
2354 {
2355 *la_temp_pref(ts)
2356 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2357 }
2358
2359 /* liveness analysis: end of function: all temps are dead, and globals
2360 should be in memory. */
2361 static void la_func_end(TCGContext *s, int ng, int nt)
2362 {
2363 int i;
2364
2365 for (i = 0; i < ng; ++i) {
2366 s->temps[i].state = TS_DEAD | TS_MEM;
2367 la_reset_pref(&s->temps[i]);
2368 }
2369 for (i = ng; i < nt; ++i) {
2370 s->temps[i].state = TS_DEAD;
2371 la_reset_pref(&s->temps[i]);
2372 }
2373 }
2374
2375 /* liveness analysis: end of basic block: all temps are dead, globals
2376 and local temps should be in memory. */
2377 static void la_bb_end(TCGContext *s, int ng, int nt)
2378 {
2379 int i;
2380
2381 for (i = 0; i < nt; ++i) {
2382 TCGTemp *ts = &s->temps[i];
2383 int state;
2384
2385 switch (ts->kind) {
2386 case TEMP_FIXED:
2387 case TEMP_GLOBAL:
2388 case TEMP_LOCAL:
2389 state = TS_DEAD | TS_MEM;
2390 break;
2391 case TEMP_NORMAL:
2392 case TEMP_EBB:
2393 case TEMP_CONST:
2394 state = TS_DEAD;
2395 break;
2396 default:
2397 g_assert_not_reached();
2398 }
2399 ts->state = state;
2400 la_reset_pref(ts);
2401 }
2402 }
2403
2404 /* liveness analysis: sync globals back to memory. */
2405 static void la_global_sync(TCGContext *s, int ng)
2406 {
2407 int i;
2408
2409 for (i = 0; i < ng; ++i) {
2410 int state = s->temps[i].state;
2411 s->temps[i].state = state | TS_MEM;
2412 if (state == TS_DEAD) {
2413 /* If the global was previously dead, reset prefs. */
2414 la_reset_pref(&s->temps[i]);
2415 }
2416 }
2417 }
2418
2419 /*
2420 * liveness analysis: conditional branch: all temps are dead unless
2421 * explicitly live-across-conditional-branch, globals and local temps
2422 * should be synced.
2423 */
2424 static void la_bb_sync(TCGContext *s, int ng, int nt)
2425 {
2426 la_global_sync(s, ng);
2427
2428 for (int i = ng; i < nt; ++i) {
2429 TCGTemp *ts = &s->temps[i];
2430 int state;
2431
2432 switch (ts->kind) {
2433 case TEMP_LOCAL:
2434 state = ts->state;
2435 ts->state = state | TS_MEM;
2436 if (state != TS_DEAD) {
2437 continue;
2438 }
2439 break;
2440 case TEMP_NORMAL:
2441 s->temps[i].state = TS_DEAD;
2442 break;
2443 case TEMP_EBB:
2444 case TEMP_CONST:
2445 continue;
2446 default:
2447 g_assert_not_reached();
2448 }
2449 la_reset_pref(&s->temps[i]);
2450 }
2451 }
2452
2453 /* liveness analysis: sync globals back to memory and kill. */
2454 static void la_global_kill(TCGContext *s, int ng)
2455 {
2456 int i;
2457
2458 for (i = 0; i < ng; i++) {
2459 s->temps[i].state = TS_DEAD | TS_MEM;
2460 la_reset_pref(&s->temps[i]);
2461 }
2462 }
2463
2464 /* liveness analysis: note live globals crossing calls. */
2465 static void la_cross_call(TCGContext *s, int nt)
2466 {
2467 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2468 int i;
2469
2470 for (i = 0; i < nt; i++) {
2471 TCGTemp *ts = &s->temps[i];
2472 if (!(ts->state & TS_DEAD)) {
2473 TCGRegSet *pset = la_temp_pref(ts);
2474 TCGRegSet set = *pset;
2475
2476 set &= mask;
2477 /* If the combination is not possible, restart. */
2478 if (set == 0) {
2479 set = tcg_target_available_regs[ts->type] & mask;
2480 }
2481 *pset = set;
2482 }
2483 }
2484 }
2485
2486 /* Liveness analysis : update the opc_arg_life array to tell if a
2487 given input arguments is dead. Instructions updating dead
2488 temporaries are removed. */
2489 static void liveness_pass_1(TCGContext *s)
2490 {
2491 int nb_globals = s->nb_globals;
2492 int nb_temps = s->nb_temps;
2493 TCGOp *op, *op_prev;
2494 TCGRegSet *prefs;
2495 int i;
2496
2497 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2498 for (i = 0; i < nb_temps; ++i) {
2499 s->temps[i].state_ptr = prefs + i;
2500 }
2501
2502 /* ??? Should be redundant with the exit_tb that ends the TB. */
2503 la_func_end(s, nb_globals, nb_temps);
2504
2505 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2506 int nb_iargs, nb_oargs;
2507 TCGOpcode opc_new, opc_new2;
2508 bool have_opc_new2;
2509 TCGLifeData arg_life = 0;
2510 TCGTemp *ts;
2511 TCGOpcode opc = op->opc;
2512 const TCGOpDef *def = &tcg_op_defs[opc];
2513
2514 switch (opc) {
2515 case INDEX_op_call:
2516 {
2517 int call_flags;
2518 int nb_call_regs;
2519
2520 nb_oargs = TCGOP_CALLO(op);
2521 nb_iargs = TCGOP_CALLI(op);
2522 call_flags = tcg_call_flags(op);
2523
2524 /* pure functions can be removed if their result is unused */
2525 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2526 for (i = 0; i < nb_oargs; i++) {
2527 ts = arg_temp(op->args[i]);
2528 if (ts->state != TS_DEAD) {
2529 goto do_not_remove_call;
2530 }
2531 }
2532 goto do_remove;
2533 }
2534 do_not_remove_call:
2535
2536 /* Output args are dead. */
2537 for (i = 0; i < nb_oargs; i++) {
2538 ts = arg_temp(op->args[i]);
2539 if (ts->state & TS_DEAD) {
2540 arg_life |= DEAD_ARG << i;
2541 }
2542 if (ts->state & TS_MEM) {
2543 arg_life |= SYNC_ARG << i;
2544 }
2545 ts->state = TS_DEAD;
2546 la_reset_pref(ts);
2547
2548 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2549 op->output_pref[i] = 0;
2550 }
2551
2552 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2553 TCG_CALL_NO_READ_GLOBALS))) {
2554 la_global_kill(s, nb_globals);
2555 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2556 la_global_sync(s, nb_globals);
2557 }
2558
2559 /* Record arguments that die in this helper. */
2560 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2561 ts = arg_temp(op->args[i]);
2562 if (ts && ts->state & TS_DEAD) {
2563 arg_life |= DEAD_ARG << i;
2564 }
2565 }
2566
2567 /* For all live registers, remove call-clobbered prefs. */
2568 la_cross_call(s, nb_temps);
2569
2570 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2571
2572 /* Input arguments are live for preceding opcodes. */
2573 for (i = 0; i < nb_iargs; i++) {
2574 ts = arg_temp(op->args[i + nb_oargs]);
2575 if (ts && ts->state & TS_DEAD) {
2576 /* For those arguments that die, and will be allocated
2577 * in registers, clear the register set for that arg,
2578 * to be filled in below. For args that will be on
2579 * the stack, reset to any available reg.
2580 */
2581 *la_temp_pref(ts)
2582 = (i < nb_call_regs ? 0 :
2583 tcg_target_available_regs[ts->type]);
2584 ts->state &= ~TS_DEAD;
2585 }
2586 }
2587
2588 /* For each input argument, add its input register to prefs.
2589 If a temp is used once, this produces a single set bit. */
2590 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2591 ts = arg_temp(op->args[i + nb_oargs]);
2592 if (ts) {
2593 tcg_regset_set_reg(*la_temp_pref(ts),
2594 tcg_target_call_iarg_regs[i]);
2595 }
2596 }
2597 }
2598 break;
2599 case INDEX_op_insn_start:
2600 break;
2601 case INDEX_op_discard:
2602 /* mark the temporary as dead */
2603 ts = arg_temp(op->args[0]);
2604 ts->state = TS_DEAD;
2605 la_reset_pref(ts);
2606 break;
2607
2608 case INDEX_op_add2_i32:
2609 opc_new = INDEX_op_add_i32;
2610 goto do_addsub2;
2611 case INDEX_op_sub2_i32:
2612 opc_new = INDEX_op_sub_i32;
2613 goto do_addsub2;
2614 case INDEX_op_add2_i64:
2615 opc_new = INDEX_op_add_i64;
2616 goto do_addsub2;
2617 case INDEX_op_sub2_i64:
2618 opc_new = INDEX_op_sub_i64;
2619 do_addsub2:
2620 nb_iargs = 4;
2621 nb_oargs = 2;
2622 /* Test if the high part of the operation is dead, but not
2623 the low part. The result can be optimized to a simple
2624 add or sub. This happens often for x86_64 guest when the
2625 cpu mode is set to 32 bit. */
2626 if (arg_temp(op->args[1])->state == TS_DEAD) {
2627 if (arg_temp(op->args[0])->state == TS_DEAD) {
2628 goto do_remove;
2629 }
2630 /* Replace the opcode and adjust the args in place,
2631 leaving 3 unused args at the end. */
2632 op->opc = opc = opc_new;
2633 op->args[1] = op->args[2];
2634 op->args[2] = op->args[4];
2635 /* Fall through and mark the single-word operation live. */
2636 nb_iargs = 2;
2637 nb_oargs = 1;
2638 }
2639 goto do_not_remove;
2640
2641 case INDEX_op_mulu2_i32:
2642 opc_new = INDEX_op_mul_i32;
2643 opc_new2 = INDEX_op_muluh_i32;
2644 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2645 goto do_mul2;
2646 case INDEX_op_muls2_i32:
2647 opc_new = INDEX_op_mul_i32;
2648 opc_new2 = INDEX_op_mulsh_i32;
2649 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2650 goto do_mul2;
2651 case INDEX_op_mulu2_i64:
2652 opc_new = INDEX_op_mul_i64;
2653 opc_new2 = INDEX_op_muluh_i64;
2654 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2655 goto do_mul2;
2656 case INDEX_op_muls2_i64:
2657 opc_new = INDEX_op_mul_i64;
2658 opc_new2 = INDEX_op_mulsh_i64;
2659 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2660 goto do_mul2;
2661 do_mul2:
2662 nb_iargs = 2;
2663 nb_oargs = 2;
2664 if (arg_temp(op->args[1])->state == TS_DEAD) {
2665 if (arg_temp(op->args[0])->state == TS_DEAD) {
2666 /* Both parts of the operation are dead. */
2667 goto do_remove;
2668 }
2669 /* The high part of the operation is dead; generate the low. */
2670 op->opc = opc = opc_new;
2671 op->args[1] = op->args[2];
2672 op->args[2] = op->args[3];
2673 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2674 /* The low part of the operation is dead; generate the high. */
2675 op->opc = opc = opc_new2;
2676 op->args[0] = op->args[1];
2677 op->args[1] = op->args[2];
2678 op->args[2] = op->args[3];
2679 } else {
2680 goto do_not_remove;
2681 }
2682 /* Mark the single-word operation live. */
2683 nb_oargs = 1;
2684 goto do_not_remove;
2685
2686 default:
2687 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2688 nb_iargs = def->nb_iargs;
2689 nb_oargs = def->nb_oargs;
2690
2691 /* Test if the operation can be removed because all
2692 its outputs are dead. We assume that nb_oargs == 0
2693 implies side effects */
2694 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2695 for (i = 0; i < nb_oargs; i++) {
2696 if (arg_temp(op->args[i])->state != TS_DEAD) {
2697 goto do_not_remove;
2698 }
2699 }
2700 goto do_remove;
2701 }
2702 goto do_not_remove;
2703
2704 do_remove:
2705 tcg_op_remove(s, op);
2706 break;
2707
2708 do_not_remove:
2709 for (i = 0; i < nb_oargs; i++) {
2710 ts = arg_temp(op->args[i]);
2711
2712 /* Remember the preference of the uses that followed. */
2713 op->output_pref[i] = *la_temp_pref(ts);
2714
2715 /* Output args are dead. */
2716 if (ts->state & TS_DEAD) {
2717 arg_life |= DEAD_ARG << i;
2718 }
2719 if (ts->state & TS_MEM) {
2720 arg_life |= SYNC_ARG << i;
2721 }
2722 ts->state = TS_DEAD;
2723 la_reset_pref(ts);
2724 }
2725
2726 /* If end of basic block, update. */
2727 if (def->flags & TCG_OPF_BB_EXIT) {
2728 la_func_end(s, nb_globals, nb_temps);
2729 } else if (def->flags & TCG_OPF_COND_BRANCH) {
2730 la_bb_sync(s, nb_globals, nb_temps);
2731 } else if (def->flags & TCG_OPF_BB_END) {
2732 la_bb_end(s, nb_globals, nb_temps);
2733 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2734 la_global_sync(s, nb_globals);
2735 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2736 la_cross_call(s, nb_temps);
2737 }
2738 }
2739
2740 /* Record arguments that die in this opcode. */
2741 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2742 ts = arg_temp(op->args[i]);
2743 if (ts->state & TS_DEAD) {
2744 arg_life |= DEAD_ARG << i;
2745 }
2746 }
2747
2748 /* Input arguments are live for preceding opcodes. */
2749 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2750 ts = arg_temp(op->args[i]);
2751 if (ts->state & TS_DEAD) {
2752 /* For operands that were dead, initially allow
2753 all regs for the type. */
2754 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2755 ts->state &= ~TS_DEAD;
2756 }
2757 }
2758
2759 /* Incorporate constraints for this operand. */
2760 switch (opc) {
2761 case INDEX_op_mov_i32:
2762 case INDEX_op_mov_i64:
2763 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2764 have proper constraints. That said, special case
2765 moves to propagate preferences backward. */
2766 if (IS_DEAD_ARG(1)) {
2767 *la_temp_pref(arg_temp(op->args[0]))
2768 = *la_temp_pref(arg_temp(op->args[1]));
2769 }
2770 break;
2771
2772 default:
2773 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2774 const TCGArgConstraint *ct = &def->args_ct[i];
2775 TCGRegSet set, *pset;
2776
2777 ts = arg_temp(op->args[i]);
2778 pset = la_temp_pref(ts);
2779 set = *pset;
2780
2781 set &= ct->regs;
2782 if (ct->ialias) {
2783 set &= op->output_pref[ct->alias_index];
2784 }
2785 /* If the combination is not possible, restart. */
2786 if (set == 0) {
2787 set = ct->regs;
2788 }
2789 *pset = set;
2790 }
2791 break;
2792 }
2793 break;
2794 }
2795 op->life = arg_life;
2796 }
2797 }
2798
2799 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2800 static bool liveness_pass_2(TCGContext *s)
2801 {
2802 int nb_globals = s->nb_globals;
2803 int nb_temps, i;
2804 bool changes = false;
2805 TCGOp *op, *op_next;
2806
2807 /* Create a temporary for each indirect global. */
2808 for (i = 0; i < nb_globals; ++i) {
2809 TCGTemp *its = &s->temps[i];
2810 if (its->indirect_reg) {
2811 TCGTemp *dts = tcg_temp_alloc(s);
2812 dts->type = its->type;
2813 dts->base_type = its->base_type;
2814 dts->kind = TEMP_EBB;
2815 its->state_ptr = dts;
2816 } else {
2817 its->state_ptr = NULL;
2818 }
2819 /* All globals begin dead. */
2820 its->state = TS_DEAD;
2821 }
2822 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2823 TCGTemp *its = &s->temps[i];
2824 its->state_ptr = NULL;
2825 its->state = TS_DEAD;
2826 }
2827
2828 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2829 TCGOpcode opc = op->opc;
2830 const TCGOpDef *def = &tcg_op_defs[opc];
2831 TCGLifeData arg_life = op->life;
2832 int nb_iargs, nb_oargs, call_flags;
2833 TCGTemp *arg_ts, *dir_ts;
2834
2835 if (opc == INDEX_op_call) {
2836 nb_oargs = TCGOP_CALLO(op);
2837 nb_iargs = TCGOP_CALLI(op);
2838 call_flags = tcg_call_flags(op);
2839 } else {
2840 nb_iargs = def->nb_iargs;
2841 nb_oargs = def->nb_oargs;
2842
2843 /* Set flags similar to how calls require. */
2844 if (def->flags & TCG_OPF_COND_BRANCH) {
2845 /* Like reading globals: sync_globals */
2846 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2847 } else if (def->flags & TCG_OPF_BB_END) {
2848 /* Like writing globals: save_globals */
2849 call_flags = 0;
2850 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2851 /* Like reading globals: sync_globals */
2852 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2853 } else {
2854 /* No effect on globals. */
2855 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2856 TCG_CALL_NO_WRITE_GLOBALS);
2857 }
2858 }
2859
2860 /* Make sure that input arguments are available. */
2861 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2862 arg_ts = arg_temp(op->args[i]);
2863 if (arg_ts) {
2864 dir_ts = arg_ts->state_ptr;
2865 if (dir_ts && arg_ts->state == TS_DEAD) {
2866 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2867 ? INDEX_op_ld_i32
2868 : INDEX_op_ld_i64);
2869 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2870
2871 lop->args[0] = temp_arg(dir_ts);
2872 lop->args[1] = temp_arg(arg_ts->mem_base);
2873 lop->args[2] = arg_ts->mem_offset;
2874
2875 /* Loaded, but synced with memory. */
2876 arg_ts->state = TS_MEM;
2877 }
2878 }
2879 }
2880
2881 /* Perform input replacement, and mark inputs that became dead.
2882 No action is required except keeping temp_state up to date
2883 so that we reload when needed. */
2884 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2885 arg_ts = arg_temp(op->args[i]);
2886 if (arg_ts) {
2887 dir_ts = arg_ts->state_ptr;
2888 if (dir_ts) {
2889 op->args[i] = temp_arg(dir_ts);
2890 changes = true;
2891 if (IS_DEAD_ARG(i)) {
2892 arg_ts->state = TS_DEAD;
2893 }
2894 }
2895 }
2896 }
2897
2898 /* Liveness analysis should ensure that the following are
2899 all correct, for call sites and basic block end points. */
2900 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2901 /* Nothing to do */
2902 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2903 for (i = 0; i < nb_globals; ++i) {
2904 /* Liveness should see that globals are synced back,
2905 that is, either TS_DEAD or TS_MEM. */
2906 arg_ts = &s->temps[i];
2907 tcg_debug_assert(arg_ts->state_ptr == 0
2908 || arg_ts->state != 0);
2909 }
2910 } else {
2911 for (i = 0; i < nb_globals; ++i) {
2912 /* Liveness should see that globals are saved back,
2913 that is, TS_DEAD, waiting to be reloaded. */
2914 arg_ts = &s->temps[i];
2915 tcg_debug_assert(arg_ts->state_ptr == 0
2916 || arg_ts->state == TS_DEAD);
2917 }
2918 }
2919
2920 /* Outputs become available. */
2921 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2922 arg_ts = arg_temp(op->args[0]);
2923 dir_ts = arg_ts->state_ptr;
2924 if (dir_ts) {
2925 op->args[0] = temp_arg(dir_ts);
2926 changes = true;
2927
2928 /* The output is now live and modified. */
2929 arg_ts->state = 0;
2930
2931 if (NEED_SYNC_ARG(0)) {
2932 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2933 ? INDEX_op_st_i32
2934 : INDEX_op_st_i64);
2935 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2936 TCGTemp *out_ts = dir_ts;
2937
2938 if (IS_DEAD_ARG(0)) {
2939 out_ts = arg_temp(op->args[1]);
2940 arg_ts->state = TS_DEAD;
2941 tcg_op_remove(s, op);
2942 } else {
2943 arg_ts->state = TS_MEM;
2944 }
2945
2946 sop->args[0] = temp_arg(out_ts);
2947 sop->args[1] = temp_arg(arg_ts->mem_base);
2948 sop->args[2] = arg_ts->mem_offset;
2949 } else {
2950 tcg_debug_assert(!IS_DEAD_ARG(0));
2951 }
2952 }
2953 } else {
2954 for (i = 0; i < nb_oargs; i++) {
2955 arg_ts = arg_temp(op->args[i]);
2956 dir_ts = arg_ts->state_ptr;
2957 if (!dir_ts) {
2958 continue;
2959 }
2960 op->args[i] = temp_arg(dir_ts);
2961 changes = true;
2962
2963 /* The output is now live and modified. */
2964 arg_ts->state = 0;
2965
2966 /* Sync outputs upon their last write. */
2967 if (NEED_SYNC_ARG(i)) {
2968 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2969 ? INDEX_op_st_i32
2970 : INDEX_op_st_i64);
2971 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2972
2973 sop->args[0] = temp_arg(dir_ts);
2974 sop->args[1] = temp_arg(arg_ts->mem_base);
2975 sop->args[2] = arg_ts->mem_offset;
2976
2977 arg_ts->state = TS_MEM;
2978 }
2979 /* Drop outputs that are dead. */
2980 if (IS_DEAD_ARG(i)) {
2981 arg_ts->state = TS_DEAD;
2982 }
2983 }
2984 }
2985 }
2986
2987 return changes;
2988 }
2989
2990 #ifdef CONFIG_DEBUG_TCG
2991 static void dump_regs(TCGContext *s)
2992 {
2993 TCGTemp *ts;
2994 int i;
2995 char buf[64];
2996
2997 for(i = 0; i < s->nb_temps; i++) {
2998 ts = &s->temps[i];
2999 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3000 switch(ts->val_type) {
3001 case TEMP_VAL_REG:
3002 printf("%s", tcg_target_reg_names[ts->reg]);
3003 break;
3004 case TEMP_VAL_MEM:
3005 printf("%d(%s)", (int)ts->mem_offset,
3006 tcg_target_reg_names[ts->mem_base->reg]);
3007 break;
3008 case TEMP_VAL_CONST:
3009 printf("$0x%" PRIx64, ts->val);
3010 break;
3011 case TEMP_VAL_DEAD:
3012 printf("D");
3013 break;
3014 default:
3015 printf("???");
3016 break;
3017 }
3018 printf("\n");
3019 }
3020
3021 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3022 if (s->reg_to_temp[i] != NULL) {
3023 printf("%s: %s\n",
3024 tcg_target_reg_names[i],
3025 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3026 }
3027 }
3028 }
3029
3030 static void check_regs(TCGContext *s)
3031 {
3032 int reg;
3033 int k;
3034 TCGTemp *ts;
3035 char buf[64];
3036
3037 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3038 ts = s->reg_to_temp[reg];
3039 if (ts != NULL) {
3040 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3041 printf("Inconsistency for register %s:\n",
3042 tcg_target_reg_names[reg]);
3043 goto fail;
3044 }
3045 }
3046 }
3047 for (k = 0; k < s->nb_temps; k++) {
3048 ts = &s->temps[k];
3049 if (ts->val_type == TEMP_VAL_REG
3050 && ts->kind != TEMP_FIXED
3051 && s->reg_to_temp[ts->reg] != ts) {
3052 printf("Inconsistency for temp %s:\n",
3053 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3054 fail:
3055 printf("reg state:\n");
3056 dump_regs(s);
3057 tcg_abort();
3058 }
3059 }
3060 }
3061 #endif
3062
3063 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3064 {
3065 intptr_t off, size, align;
3066
3067 switch (ts->type) {
3068 case TCG_TYPE_I32:
3069 size = align = 4;
3070 break;
3071 case TCG_TYPE_I64:
3072 case TCG_TYPE_V64:
3073 size = align = 8;
3074 break;
3075 case TCG_TYPE_V128:
3076 size = align = 16;
3077 break;
3078 case TCG_TYPE_V256:
3079 /* Note that we do not require aligned storage for V256. */
3080 size = 32, align = 16;
3081 break;
3082 default:
3083 g_assert_not_reached();
3084 }
3085
3086 /*
3087 * Assume the stack is sufficiently aligned.
3088 * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3089 * and do not require 16 byte vector alignment. This seems slightly
3090 * easier than fully parameterizing the above switch statement.
3091 */
3092 align = MIN(TCG_TARGET_STACK_ALIGN, align);
3093 off = ROUND_UP(s->current_frame_offset, align);
3094
3095 /* If we've exhausted the stack frame, restart with a smaller TB. */
3096 if (off + size > s->frame_end) {
3097 tcg_raise_tb_overflow(s);
3098 }
3099 s->current_frame_offset = off + size;
3100
3101 ts->mem_offset = off;
3102 #if defined(__sparc__)
3103 ts->mem_offset += TCG_TARGET_STACK_BIAS;
3104 #endif
3105 ts->mem_base = s->frame_temp;
3106 ts->mem_allocated = 1;
3107 }
3108
3109 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3110
3111 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3112 mark it free; otherwise mark it dead. */
3113 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3114 {
3115 TCGTempVal new_type;
3116
3117 switch (ts->kind) {
3118 case TEMP_FIXED:
3119 return;
3120 case TEMP_GLOBAL:
3121 case TEMP_LOCAL:
3122 new_type = TEMP_VAL_MEM;
3123 break;
3124 case TEMP_NORMAL:
3125 case TEMP_EBB:
3126 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3127 break;
3128 case TEMP_CONST:
3129 new_type = TEMP_VAL_CONST;
3130 break;
3131 default:
3132 g_assert_not_reached();
3133 }
3134 if (ts->val_type == TEMP_VAL_REG) {
3135 s->reg_to_temp[ts->reg] = NULL;
3136 }
3137 ts->val_type = new_type;
3138 }
3139
3140 /* Mark a temporary as dead. */
3141 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3142 {
3143 temp_free_or_dead(s, ts, 1);
3144 }
3145
3146 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3147 registers needs to be allocated to store a constant. If 'free_or_dead'
3148 is non-zero, subsequently release the temporary; if it is positive, the
3149 temp is dead; if it is negative, the temp is free. */
3150 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3151 TCGRegSet preferred_regs, int free_or_dead)
3152 {
3153 if (!temp_readonly(ts) && !ts->mem_coherent) {
3154 if (!ts->mem_allocated) {
3155 temp_allocate_frame(s, ts);
3156 }
3157 switch (ts->val_type) {
3158 case TEMP_VAL_CONST:
3159 /* If we're going to free the temp immediately, then we won't
3160 require it later in a register, so attempt to store the
3161 constant to memory directly. */
3162 if (free_or_dead
3163 && tcg_out_sti(s, ts->type, ts->val,
3164 ts->mem_base->reg, ts->mem_offset)) {
3165 break;
3166 }
3167 temp_load(s, ts, tcg_target_available_regs[ts->type],
3168 allocated_regs, preferred_regs);
3169 /* fallthrough */
3170
3171 case TEMP_VAL_REG:
3172 tcg_out_st(s, ts->type, ts->reg,
3173 ts->mem_base->reg, ts->mem_offset);
3174 break;
3175
3176 case TEMP_VAL_MEM:
3177 break;
3178
3179 case TEMP_VAL_DEAD:
3180 default:
3181 tcg_abort();
3182 }
3183 ts->mem_coherent = 1;
3184 }
3185 if (free_or_dead) {
3186 temp_free_or_dead(s, ts, free_or_dead);
3187 }
3188 }
3189
3190 /* free register 'reg' by spilling the corresponding temporary if necessary */
3191 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3192 {
3193 TCGTemp *ts = s->reg_to_temp[reg];
3194 if (ts != NULL) {
3195 temp_sync(s, ts, allocated_regs, 0, -1);
3196 }
3197 }
3198
3199 /**
3200 * tcg_reg_alloc:
3201 * @required_regs: Set of registers in which we must allocate.
3202 * @allocated_regs: Set of registers which must be avoided.
3203 * @preferred_regs: Set of registers we should prefer.
3204 * @rev: True if we search the registers in "indirect" order.
3205 *
3206 * The allocated register must be in @required_regs & ~@allocated_regs,
3207 * but if we can put it in @preferred_regs we may save a move later.
3208 */
3209 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3210 TCGRegSet allocated_regs,
3211 TCGRegSet preferred_regs, bool rev)
3212 {
3213 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3214 TCGRegSet reg_ct[2];
3215 const int *order;
3216
3217 reg_ct[1] = required_regs & ~allocated_regs;
3218 tcg_debug_assert(reg_ct[1] != 0);
3219 reg_ct[0] = reg_ct[1] & preferred_regs;
3220
3221 /* Skip the preferred_regs option if it cannot be satisfied,
3222 or if the preference made no difference. */
3223 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3224
3225 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3226
3227 /* Try free registers, preferences first. */
3228 for (j = f; j < 2; j++) {
3229 TCGRegSet set = reg_ct[j];
3230
3231 if (tcg_regset_single(set)) {
3232 /* One register in the set. */
3233 TCGReg reg = tcg_regset_first(set);
3234 if (s->reg_to_temp[reg] == NULL) {
3235 return reg;
3236 }
3237 } else {
3238 for (i = 0; i < n; i++) {
3239 TCGReg reg = order[i];
3240 if (s->reg_to_temp[reg] == NULL &&
3241 tcg_regset_test_reg(set, reg)) {
3242 return reg;
3243 }
3244 }
3245 }
3246 }
3247
3248 /* We must spill something. */
3249 for (j = f; j < 2; j++) {
3250 TCGRegSet set = reg_ct[j];
3251
3252 if (tcg_regset_single(set)) {
3253 /* One register in the set. */
3254 TCGReg reg = tcg_regset_first(set);
3255 tcg_reg_free(s, reg, allocated_regs);
3256 return reg;
3257 } else {
3258 for (i = 0; i < n; i++) {
3259 TCGReg reg = order[i];
3260 if (tcg_regset_test_reg(set, reg)) {
3261 tcg_reg_free(s, reg, allocated_regs);
3262 return reg;
3263 }
3264 }
3265 }
3266 }
3267
3268 tcg_abort();
3269 }
3270
3271 /* Make sure the temporary is in a register. If needed, allocate the register
3272 from DESIRED while avoiding ALLOCATED. */
3273 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3274 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3275 {
3276 TCGReg reg;
3277
3278 switch (ts->val_type) {
3279 case TEMP_VAL_REG:
3280 return;
3281 case TEMP_VAL_CONST:
3282 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3283 preferred_regs, ts->indirect_base);
3284 if (ts->type <= TCG_TYPE_I64) {
3285 tcg_out_movi(s, ts->type, reg, ts->val);
3286 } else {
3287 uint64_t val = ts->val;
3288 MemOp vece = MO_64;
3289
3290 /*
3291 * Find the minimal vector element that matches the constant.
3292 * The targets will, in general, have to do this search anyway,
3293 * do this generically.
3294 */
3295 if (val == dup_const(MO_8, val)) {
3296 vece = MO_8;
3297 } else if (val == dup_const(MO_16, val)) {
3298 vece = MO_16;
3299 } else if (val == dup_const(MO_32, val)) {
3300 vece = MO_32;
3301 }
3302
3303 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3304 }
3305 ts->mem_coherent = 0;
3306 break;
3307 case TEMP_VAL_MEM:
3308 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3309 preferred_regs, ts->indirect_base);
3310 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3311 ts->mem_coherent = 1;
3312 break;
3313 case TEMP_VAL_DEAD:
3314 default:
3315 tcg_abort();
3316 }
3317 ts->reg = reg;
3318 ts->val_type = TEMP_VAL_REG;
3319 s->reg_to_temp[reg] = ts;
3320 }
3321
3322 /* Save a temporary to memory. 'allocated_regs' is used in case a
3323 temporary registers needs to be allocated to store a constant. */
3324 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3325 {
3326 /* The liveness analysis already ensures that globals are back
3327 in memory. Keep an tcg_debug_assert for safety. */
3328 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3329 }
3330
3331 /* save globals to their canonical location and assume they can be
3332 modified be the following code. 'allocated_regs' is used in case a
3333 temporary registers needs to be allocated to store a constant. */
3334 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3335 {
3336 int i, n;
3337
3338 for (i = 0, n = s->nb_globals; i < n; i++) {
3339 temp_save(s, &s->temps[i], allocated_regs);
3340 }
3341 }
3342
3343 /* sync globals to their canonical location and assume they can be
3344 read by the following code. 'allocated_regs' is used in case a
3345 temporary registers needs to be allocated to store a constant. */
3346 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3347 {
3348 int i, n;
3349
3350 for (i = 0, n = s->nb_globals; i < n; i++) {
3351 TCGTemp *ts = &s->temps[i];
3352 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3353 || ts->kind == TEMP_FIXED
3354 || ts->mem_coherent);
3355 }
3356 }
3357
3358 /* at the end of a basic block, we assume all temporaries are dead and
3359 all globals are stored at their canonical location. */
3360 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3361 {
3362 int i;
3363
3364 for (i = s->nb_globals; i < s->nb_temps; i++) {
3365 TCGTemp *ts = &s->temps[i];
3366
3367 switch (ts->kind) {
3368 case TEMP_LOCAL:
3369 temp_save(s, ts, allocated_regs);
3370 break;
3371 case TEMP_NORMAL:
3372 case TEMP_EBB:
3373 /* The liveness analysis already ensures that temps are dead.
3374 Keep an tcg_debug_assert for safety. */
3375 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3376 break;
3377 case TEMP_CONST:
3378 /* Similarly, we should have freed any allocated register. */
3379 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3380 break;
3381 default:
3382 g_assert_not_reached();
3383 }
3384 }
3385
3386 save_globals(s, allocated_regs);
3387 }
3388
3389 /*
3390 * At a conditional branch, we assume all temporaries are dead unless
3391 * explicitly live-across-conditional-branch; all globals and local
3392 * temps are synced to their location.
3393 */
3394 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3395 {
3396 sync_globals(s, allocated_regs);
3397
3398 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3399 TCGTemp *ts = &s->temps[i];
3400 /*
3401 * The liveness analysis already ensures that temps are dead.
3402 * Keep tcg_debug_asserts for safety.
3403 */
3404 switch (ts->kind) {
3405 case TEMP_LOCAL:
3406 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3407 break;
3408 case TEMP_NORMAL:
3409 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3410 break;
3411 case TEMP_EBB:
3412 case TEMP_CONST:
3413 break;
3414 default:
3415 g_assert_not_reached();
3416 }
3417 }
3418 }
3419
3420 /*
3421 * Specialized code generation for INDEX_op_mov_* with a constant.
3422 */
3423 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3424 tcg_target_ulong val, TCGLifeData arg_life,
3425 TCGRegSet preferred_regs)
3426 {
3427 /* ENV should not be modified. */
3428 tcg_debug_assert(!temp_readonly(ots));
3429
3430 /* The movi is not explicitly generated here. */
3431 if (ots->val_type == TEMP_VAL_REG) {
3432 s->reg_to_temp[ots->reg] = NULL;
3433 }
3434 ots->val_type = TEMP_VAL_CONST;
3435 ots->val = val;
3436 ots->mem_coherent = 0;
3437 if (NEED_SYNC_ARG(0)) {
3438 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3439 } else if (IS_DEAD_ARG(0)) {
3440 temp_dead(s, ots);
3441 }
3442 }
3443
3444 /*
3445 * Specialized code generation for INDEX_op_mov_*.
3446 */
3447 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3448 {
3449 const TCGLifeData arg_life = op->life;
3450 TCGRegSet allocated_regs, preferred_regs;
3451 TCGTemp *ts, *ots;
3452 TCGType otype, itype;
3453
3454 allocated_regs = s->reserved_regs;
3455 preferred_regs = op->output_pref[0];
3456 ots = arg_temp(op->args[0]);
3457 ts = arg_temp(op->args[1]);
3458
3459 /* ENV should not be modified. */
3460 tcg_debug_assert(!temp_readonly(ots));
3461
3462 /* Note that otype != itype for no-op truncation. */
3463 otype = ots->type;
3464 itype = ts->type;
3465
3466 if (ts->val_type == TEMP_VAL_CONST) {
3467 /* propagate constant or generate sti */
3468 tcg_target_ulong val = ts->val;
3469 if (IS_DEAD_ARG(1)) {
3470 temp_dead(s, ts);
3471 }
3472 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3473 return;
3474 }
3475
3476 /* If the source value is in memory we're going to be forced
3477 to have it in a register in order to perform the copy. Copy
3478 the SOURCE value into its own register first, that way we
3479 don't have to reload SOURCE the next time it is used. */
3480 if (ts->val_type == TEMP_VAL_MEM) {
3481 temp_load(s, ts, tcg_target_available_regs[itype],
3482 allocated_regs, preferred_regs);
3483 }
3484
3485 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3486 if (IS_DEAD_ARG(0)) {
3487 /* mov to a non-saved dead register makes no sense (even with
3488 liveness analysis disabled). */
3489 tcg_debug_assert(NEED_SYNC_ARG(0));
3490 if (!ots->mem_allocated) {
3491 temp_allocate_frame(s, ots);
3492 }
3493 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3494 if (IS_DEAD_ARG(1)) {
3495 temp_dead(s, ts);
3496 }
3497 temp_dead(s, ots);
3498 } else {
3499 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3500 /* the mov can be suppressed */
3501 if (ots->val_type == TEMP_VAL_REG) {
3502 s->reg_to_temp[ots->reg] = NULL;
3503 }
3504 ots->reg = ts->reg;
3505 temp_dead(s, ts);
3506 } else {
3507 if (ots->val_type != TEMP_VAL_REG) {
3508 /* When allocating a new register, make sure to not spill the
3509 input one. */
3510 tcg_regset_set_reg(allocated_regs, ts->reg);
3511 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3512 allocated_regs, preferred_regs,
3513 ots->indirect_base);
3514 }
3515 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3516 /*
3517 * Cross register class move not supported.
3518 * Store the source register into the destination slot
3519 * and leave the destination temp as TEMP_VAL_MEM.
3520 */
3521 assert(!temp_readonly(ots));
3522 if (!ts->mem_allocated) {
3523 temp_allocate_frame(s, ots);
3524 }
3525 tcg_out_st(s, ts->type, ts->reg,
3526 ots->mem_base->reg, ots->mem_offset);
3527 ots->mem_coherent = 1;
3528 temp_free_or_dead(s, ots, -1);
3529 return;
3530 }
3531 }
3532 ots->val_type = TEMP_VAL_REG;
3533 ots->mem_coherent = 0;
3534 s->reg_to_temp[ots->reg] = ots;
3535 if (NEED_SYNC_ARG(0)) {
3536 temp_sync(s, ots, allocated_regs, 0, 0);
3537 }
3538 }
3539 }
3540
3541 /*
3542 * Specialized code generation for INDEX_op_dup_vec.
3543 */
3544 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3545 {
3546 const TCGLifeData arg_life = op->life;
3547 TCGRegSet dup_out_regs, dup_in_regs;
3548 TCGTemp *its, *ots;
3549 TCGType itype, vtype;
3550 intptr_t endian_fixup;
3551 unsigned vece;
3552 bool ok;
3553
3554 ots = arg_temp(op->args[0]);
3555 its = arg_temp(op->args[1]);
3556
3557 /* ENV should not be modified. */
3558 tcg_debug_assert(!temp_readonly(ots));
3559
3560 itype = its->type;
3561 vece = TCGOP_VECE(op);
3562 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3563
3564 if (its->val_type == TEMP_VAL_CONST) {
3565 /* Propagate constant via movi -> dupi. */
3566 tcg_target_ulong val = its->val;
3567 if (IS_DEAD_ARG(1)) {
3568 temp_dead(s, its);
3569 }
3570 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3571 return;
3572 }
3573
3574 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3575 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3576
3577 /* Allocate the output register now. */
3578 if (ots->val_type != TEMP_VAL_REG) {
3579 TCGRegSet allocated_regs = s->reserved_regs;
3580
3581 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3582 /* Make sure to not spill the input register. */
3583 tcg_regset_set_reg(allocated_regs, its->reg);
3584 }
3585 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3586 op->output_pref[0], ots->indirect_base);
3587 ots->val_type = TEMP_VAL_REG;
3588 ots->mem_coherent = 0;
3589 s->reg_to_temp[ots->reg] = ots;
3590 }
3591
3592 switch (its->val_type) {
3593 case TEMP_VAL_REG:
3594 /*
3595 * The dup constriaints must be broad, covering all possible VECE.
3596 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3597 * to fail, indicating that extra moves are required for that case.
3598 */
3599 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3600 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3601 goto done;
3602 }
3603 /* Try again from memory or a vector input register. */
3604 }
3605 if (!its->mem_coherent) {
3606 /*
3607 * The input register is not synced, and so an extra store
3608 * would be required to use memory. Attempt an integer-vector
3609 * register move first. We do not have a TCGRegSet for this.
3610 */
3611 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3612 break;
3613 }
3614 /* Sync the temp back to its slot and load from there. */
3615 temp_sync(s, its, s->reserved_regs, 0, 0);
3616 }
3617 /* fall through */
3618
3619 case TEMP_VAL_MEM:
3620 #if HOST_BIG_ENDIAN
3621 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3622 endian_fixup -= 1 << vece;
3623 #else
3624 endian_fixup = 0;
3625 #endif
3626 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3627 its->mem_offset + endian_fixup)) {
3628 goto done;
3629 }
3630 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3631 break;
3632
3633 default:
3634 g_assert_not_reached();
3635 }
3636
3637 /* We now have a vector input register, so dup must succeed. */
3638 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3639 tcg_debug_assert(ok);
3640
3641 done:
3642 if (IS_DEAD_ARG(1)) {
3643 temp_dead(s, its);
3644 }
3645 if (NEED_SYNC_ARG(0)) {
3646 temp_sync(s, ots, s->reserved_regs, 0, 0);
3647 }
3648 if (IS_DEAD_ARG(0)) {
3649 temp_dead(s, ots);
3650 }
3651 }
3652
3653 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3654 {
3655 const TCGLifeData arg_life = op->life;
3656 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3657 TCGRegSet i_allocated_regs;
3658 TCGRegSet o_allocated_regs;
3659 int i, k, nb_iargs, nb_oargs;
3660 TCGReg reg;
3661 TCGArg arg;
3662 const TCGArgConstraint *arg_ct;
3663 TCGTemp *ts;
3664 TCGArg new_args[TCG_MAX_OP_ARGS];
3665 int const_args[TCG_MAX_OP_ARGS];
3666
3667 nb_oargs = def->nb_oargs;
3668 nb_iargs = def->nb_iargs;
3669
3670 /* copy constants */
3671 memcpy(new_args + nb_oargs + nb_iargs,
3672 op->args + nb_oargs + nb_iargs,
3673 sizeof(TCGArg) * def->nb_cargs);
3674
3675 i_allocated_regs = s->reserved_regs;
3676 o_allocated_regs = s->reserved_regs;
3677
3678 /* satisfy input constraints */
3679 for (k = 0; k < nb_iargs; k++) {
3680 TCGRegSet i_preferred_regs, o_preferred_regs;
3681
3682 i = def->args_ct[nb_oargs + k].sort_index;
3683 arg = op->args[i];
3684 arg_ct = &def->args_ct[i];
3685 ts = arg_temp(arg);
3686
3687 if (ts->val_type == TEMP_VAL_CONST
3688 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3689 /* constant is OK for instruction */
3690 const_args[i] = 1;
3691 new_args[i] = ts->val;
3692 continue;
3693 }
3694
3695 i_preferred_regs = o_preferred_regs = 0;
3696 if (arg_ct->ialias) {
3697 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3698
3699 /*
3700 * If the input is readonly, then it cannot also be an
3701 * output and aliased to itself. If the input is not
3702 * dead after the instruction, we must allocate a new
3703 * register and move it.
3704 */
3705 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3706 goto allocate_in_reg;
3707 }
3708
3709 /*
3710 * Check if the current register has already been allocated
3711 * for another input aliased to an output.
3712 */
3713 if (ts->val_type == TEMP_VAL_REG) {
3714 reg = ts->reg;
3715 for (int k2 = 0; k2 < k; k2++) {
3716 int i2 = def->args_ct[nb_oargs + k2].sort_index;
3717 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3718 goto allocate_in_reg;
3719 }
3720 }
3721 }
3722 i_preferred_regs = o_preferred_regs;
3723 }
3724
3725 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3726 reg = ts->reg;
3727
3728 if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3729 allocate_in_reg:
3730 /*
3731 * Allocate a new register matching the constraint
3732 * and move the temporary register into it.
3733 */
3734 temp_load(s, ts, tcg_target_available_regs[ts->type],
3735 i_allocated_regs, 0);
3736 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3737 o_preferred_regs, ts->indirect_base);
3738 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3739 /*
3740 * Cross register class move not supported. Sync the
3741 * temp back to its slot and load from there.
3742 */
3743 temp_sync(s, ts, i_allocated_regs, 0, 0);
3744 tcg_out_ld(s, ts->type, reg,
3745 ts->mem_base->reg, ts->mem_offset);
3746 }
3747 }
3748 new_args[i] = reg;
3749 const_args[i] = 0;
3750 tcg_regset_set_reg(i_allocated_regs, reg);
3751 }
3752
3753 /* mark dead temporaries and free the associated registers */
3754 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3755 if (IS_DEAD_ARG(i)) {
3756 temp_dead(s, arg_temp(op->args[i]));
3757 }
3758 }
3759
3760 if (def->flags & TCG_OPF_COND_BRANCH) {
3761 tcg_reg_alloc_cbranch(s, i_allocated_regs);
3762 } else if (def->flags & TCG_OPF_BB_END) {
3763 tcg_reg_alloc_bb_end(s, i_allocated_regs);
3764 } else {
3765 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3766 /* XXX: permit generic clobber register list ? */
3767 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3768 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3769 tcg_reg_free(s, i, i_allocated_regs);
3770 }
3771 }
3772 }
3773 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3774 /* sync globals if the op has side effects and might trigger
3775 an exception. */
3776 sync_globals(s, i_allocated_regs);
3777 }
3778
3779 /* satisfy the output constraints */
3780 for(k = 0; k < nb_oargs; k++) {
3781 i = def->args_ct[k].sort_index;
3782 arg = op->args[i];
3783 arg_ct = &def->args_ct[i];
3784 ts = arg_temp(arg);
3785
3786 /* ENV should not be modified. */
3787 tcg_debug_assert(!temp_readonly(ts));
3788
3789 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3790 reg = new_args[arg_ct->alias_index];
3791 } else if (arg_ct->newreg) {
3792 reg = tcg_reg_alloc(s, arg_ct->regs,
3793 i_allocated_regs | o_allocated_regs,
3794 op->output_pref[k], ts->indirect_base);
3795 } else {
3796 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3797 op->output_pref[k], ts->indirect_base);
3798 }
3799 tcg_regset_set_reg(o_allocated_regs, reg);
3800 if (ts->val_type == TEMP_VAL_REG) {
3801 s->reg_to_temp[ts->reg] = NULL;
3802 }
3803 ts->val_type = TEMP_VAL_REG;
3804 ts->reg = reg;
3805 /*
3806 * Temp value is modified, so the value kept in memory is
3807 * potentially not the same.
3808 */
3809 ts->mem_coherent = 0;
3810 s->reg_to_temp[reg] = ts;
3811 new_args[i] = reg;
3812 }
3813 }
3814
3815 /* emit instruction */
3816 if (def->flags & TCG_OPF_VECTOR) {
3817 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3818 new_args, const_args);
3819 } else {
3820 tcg_out_op(s, op->opc, new_args, const_args);
3821 }
3822
3823 /* move the outputs in the correct register if needed */
3824 for(i = 0; i < nb_oargs; i++) {
3825 ts = arg_temp(op->args[i]);
3826
3827 /* ENV should not be modified. */
3828 tcg_debug_assert(!temp_readonly(ts));
3829
3830 if (NEED_SYNC_ARG(i)) {
3831 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3832 } else if (IS_DEAD_ARG(i)) {
3833 temp_dead(s, ts);
3834 }
3835 }
3836 }
3837
3838 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3839 {
3840 const TCGLifeData arg_life = op->life;
3841 TCGTemp *ots, *itsl, *itsh;
3842 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3843
3844 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3845 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3846 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3847
3848 ots = arg_temp(op->args[0]);
3849 itsl = arg_temp(op->args[1]);
3850 itsh = arg_temp(op->args[2]);
3851
3852 /* ENV should not be modified. */
3853 tcg_debug_assert(!temp_readonly(ots));
3854
3855 /* Allocate the output register now. */
3856 if (ots->val_type != TEMP_VAL_REG) {
3857 TCGRegSet allocated_regs = s->reserved_regs;
3858 TCGRegSet dup_out_regs =
3859 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3860
3861 /* Make sure to not spill the input registers. */
3862 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3863 tcg_regset_set_reg(allocated_regs, itsl->reg);
3864 }
3865 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3866 tcg_regset_set_reg(allocated_regs, itsh->reg);
3867 }
3868
3869 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3870 op->output_pref[0], ots->indirect_base);
3871 ots->val_type = TEMP_VAL_REG;
3872 ots->mem_coherent = 0;
3873 s->reg_to_temp[ots->reg] = ots;
3874 }
3875
3876 /* Promote dup2 of immediates to dupi_vec. */
3877 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3878 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3879 MemOp vece = MO_64;
3880
3881 if (val == dup_const(MO_8, val)) {
3882 vece = MO_8;
3883 } else if (val == dup_const(MO_16, val)) {
3884 vece = MO_16;
3885 } else if (val == dup_const(MO_32, val)) {
3886 vece = MO_32;
3887 }
3888
3889 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3890 goto done;
3891 }
3892
3893 /* If the two inputs form one 64-bit value, try dupm_vec. */
3894 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3895 if (!itsl->mem_coherent) {
3896 temp_sync(s, itsl, s->reserved_regs, 0, 0);
3897 }
3898 if (!itsh->mem_coherent) {
3899 temp_sync(s, itsh, s->reserved_regs, 0, 0);
3900 }
3901 #if HOST_BIG_ENDIAN
3902 TCGTemp *its = itsh;
3903 #else
3904 TCGTemp *its = itsl;
3905 #endif
3906 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3907 its->mem_base->reg, its->mem_offset)) {
3908 goto