target/arm: Implement VFP fp16 for VABS, VNEG, VSQRT
[qemu.git] / accel / tcg / cputlb.c
1 /*
2 * Common CPU TLB handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "translate-all.h"
37 #include "trace/trace-root.h"
38 #include "trace/mem.h"
39 #ifdef CONFIG_PLUGIN
40 #include "qemu/plugin-memory.h"
41 #endif
42
43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
44 /* #define DEBUG_TLB */
45 /* #define DEBUG_TLB_LOG */
46
47 #ifdef DEBUG_TLB
48 # define DEBUG_TLB_GATE 1
49 # ifdef DEBUG_TLB_LOG
50 # define DEBUG_TLB_LOG_GATE 1
51 # else
52 # define DEBUG_TLB_LOG_GATE 0
53 # endif
54 #else
55 # define DEBUG_TLB_GATE 0
56 # define DEBUG_TLB_LOG_GATE 0
57 #endif
58
59 #define tlb_debug(fmt, ...) do { \
60 if (DEBUG_TLB_LOG_GATE) { \
61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
62 ## __VA_ARGS__); \
63 } else if (DEBUG_TLB_GATE) { \
64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
65 } \
66 } while (0)
67
68 #define assert_cpu_is_self(cpu) do { \
69 if (DEBUG_TLB_GATE) { \
70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \
71 } \
72 } while (0)
73
74 /* run_on_cpu_data.target_ptr should always be big enough for a
75 * target_ulong even on 32 bit builds */
76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
77
78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
79 */
80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
82
83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
84 {
85 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
86 }
87
88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
89 {
90 return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
91 }
92
93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
94 size_t max_entries)
95 {
96 desc->window_begin_ns = ns;
97 desc->window_max_entries = max_entries;
98 }
99
100 /**
101 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
102 * @desc: The CPUTLBDesc portion of the TLB
103 * @fast: The CPUTLBDescFast portion of the same TLB
104 *
105 * Called with tlb_lock_held.
106 *
107 * We have two main constraints when resizing a TLB: (1) we only resize it
108 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
109 * the array or unnecessarily flushing it), which means we do not control how
110 * frequently the resizing can occur; (2) we don't have access to the guest's
111 * future scheduling decisions, and therefore have to decide the magnitude of
112 * the resize based on past observations.
113 *
114 * In general, a memory-hungry process can benefit greatly from an appropriately
115 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
116 * we just have to make the TLB as large as possible; while an oversized TLB
117 * results in minimal TLB miss rates, it also takes longer to be flushed
118 * (flushes can be _very_ frequent), and the reduced locality can also hurt
119 * performance.
120 *
121 * To achieve near-optimal performance for all kinds of workloads, we:
122 *
123 * 1. Aggressively increase the size of the TLB when the use rate of the
124 * TLB being flushed is high, since it is likely that in the near future this
125 * memory-hungry process will execute again, and its memory hungriness will
126 * probably be similar.
127 *
128 * 2. Slowly reduce the size of the TLB as the use rate declines over a
129 * reasonably large time window. The rationale is that if in such a time window
130 * we have not observed a high TLB use rate, it is likely that we won't observe
131 * it in the near future. In that case, once a time window expires we downsize
132 * the TLB to match the maximum use rate observed in the window.
133 *
134 * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
135 * since in that range performance is likely near-optimal. Recall that the TLB
136 * is direct mapped, so we want the use rate to be low (or at least not too
137 * high), since otherwise we are likely to have a significant amount of
138 * conflict misses.
139 */
140 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
141 int64_t now)
142 {
143 size_t old_size = tlb_n_entries(fast);
144 size_t rate;
145 size_t new_size = old_size;
146 int64_t window_len_ms = 100;
147 int64_t window_len_ns = window_len_ms * 1000 * 1000;
148 bool window_expired = now > desc->window_begin_ns + window_len_ns;
149
150 if (desc->n_used_entries > desc->window_max_entries) {
151 desc->window_max_entries = desc->n_used_entries;
152 }
153 rate = desc->window_max_entries * 100 / old_size;
154
155 if (rate > 70) {
156 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
157 } else if (rate < 30 && window_expired) {
158 size_t ceil = pow2ceil(desc->window_max_entries);
159 size_t expected_rate = desc->window_max_entries * 100 / ceil;
160
161 /*
162 * Avoid undersizing when the max number of entries seen is just below
163 * a pow2. For instance, if max_entries == 1025, the expected use rate
164 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
165 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
166 * later. Thus, make sure that the expected use rate remains below 70%.
167 * (and since we double the size, that means the lowest rate we'd
168 * expect to get is 35%, which is still in the 30-70% range where
169 * we consider that the size is appropriate.)
170 */
171 if (expected_rate > 70) {
172 ceil *= 2;
173 }
174 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
175 }
176
177 if (new_size == old_size) {
178 if (window_expired) {
179 tlb_window_reset(desc, now, desc->n_used_entries);
180 }
181 return;
182 }
183
184 g_free(fast->table);
185 g_free(desc->iotlb);
186
187 tlb_window_reset(desc, now, 0);
188 /* desc->n_used_entries is cleared by the caller */
189 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
190 fast->table = g_try_new(CPUTLBEntry, new_size);
191 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
192
193 /*
194 * If the allocations fail, try smaller sizes. We just freed some
195 * memory, so going back to half of new_size has a good chance of working.
196 * Increased memory pressure elsewhere in the system might cause the
197 * allocations to fail though, so we progressively reduce the allocation
198 * size, aborting if we cannot even allocate the smallest TLB we support.
199 */
200 while (fast->table == NULL || desc->iotlb == NULL) {
201 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
202 error_report("%s: %s", __func__, strerror(errno));
203 abort();
204 }
205 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
206 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
207
208 g_free(fast->table);
209 g_free(desc->iotlb);
210 fast->table = g_try_new(CPUTLBEntry, new_size);
211 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
212 }
213 }
214
215 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
216 {
217 desc->n_used_entries = 0;
218 desc->large_page_addr = -1;
219 desc->large_page_mask = -1;
220 desc->vindex = 0;
221 memset(fast->table, -1, sizeof_tlb(fast));
222 memset(desc->vtable, -1, sizeof(desc->vtable));
223 }
224
225 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
226 int64_t now)
227 {
228 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
229 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
230
231 tlb_mmu_resize_locked(desc, fast, now);
232 tlb_mmu_flush_locked(desc, fast);
233 }
234
235 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
236 {
237 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
238
239 tlb_window_reset(desc, now, 0);
240 desc->n_used_entries = 0;
241 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
242 fast->table = g_new(CPUTLBEntry, n_entries);
243 desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
244 tlb_mmu_flush_locked(desc, fast);
245 }
246
247 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
248 {
249 env_tlb(env)->d[mmu_idx].n_used_entries++;
250 }
251
252 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
253 {
254 env_tlb(env)->d[mmu_idx].n_used_entries--;
255 }
256
257 void tlb_init(CPUState *cpu)
258 {
259 CPUArchState *env = cpu->env_ptr;
260 int64_t now = get_clock_realtime();
261 int i;
262
263 qemu_spin_init(&env_tlb(env)->c.lock);
264
265 /* All tlbs are initialized flushed. */
266 env_tlb(env)->c.dirty = 0;
267
268 for (i = 0; i < NB_MMU_MODES; i++) {
269 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
270 }
271 }
272
273 void tlb_destroy(CPUState *cpu)
274 {
275 CPUArchState *env = cpu->env_ptr;
276 int i;
277
278 qemu_spin_destroy(&env_tlb(env)->c.lock);
279 for (i = 0; i < NB_MMU_MODES; i++) {
280 CPUTLBDesc *desc = &env_tlb(env)->d[i];
281 CPUTLBDescFast *fast = &env_tlb(env)->f[i];
282
283 g_free(fast->table);
284 g_free(desc->iotlb);
285 }
286 }
287
288 /* flush_all_helper: run fn across all cpus
289 *
290 * If the wait flag is set then the src cpu's helper will be queued as
291 * "safe" work and the loop exited creating a synchronisation point
292 * where all queued work will be finished before execution starts
293 * again.
294 */
295 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
296 run_on_cpu_data d)
297 {
298 CPUState *cpu;
299
300 CPU_FOREACH(cpu) {
301 if (cpu != src) {
302 async_run_on_cpu(cpu, fn, d);
303 }
304 }
305 }
306
307 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
308 {
309 CPUState *cpu;
310 size_t full = 0, part = 0, elide = 0;
311
312 CPU_FOREACH(cpu) {
313 CPUArchState *env = cpu->env_ptr;
314
315 full += atomic_read(&env_tlb(env)->c.full_flush_count);
316 part += atomic_read(&env_tlb(env)->c.part_flush_count);
317 elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
318 }
319 *pfull = full;
320 *ppart = part;
321 *pelide = elide;
322 }
323
324 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
325 {
326 CPUArchState *env = cpu->env_ptr;
327 uint16_t asked = data.host_int;
328 uint16_t all_dirty, work, to_clean;
329 int64_t now = get_clock_realtime();
330
331 assert_cpu_is_self(cpu);
332
333 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
334
335 qemu_spin_lock(&env_tlb(env)->c.lock);
336
337 all_dirty = env_tlb(env)->c.dirty;
338 to_clean = asked & all_dirty;
339 all_dirty &= ~to_clean;
340 env_tlb(env)->c.dirty = all_dirty;
341
342 for (work = to_clean; work != 0; work &= work - 1) {
343 int mmu_idx = ctz32(work);
344 tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
345 }
346
347 qemu_spin_unlock(&env_tlb(env)->c.lock);
348
349 cpu_tb_jmp_cache_clear(cpu);
350
351 if (to_clean == ALL_MMUIDX_BITS) {
352 atomic_set(&env_tlb(env)->c.full_flush_count,
353 env_tlb(env)->c.full_flush_count + 1);
354 } else {
355 atomic_set(&env_tlb(env)->c.part_flush_count,
356 env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
357 if (to_clean != asked) {
358 atomic_set(&env_tlb(env)->c.elide_flush_count,
359 env_tlb(env)->c.elide_flush_count +
360 ctpop16(asked & ~to_clean));
361 }
362 }
363 }
364
365 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
366 {
367 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
368
369 if (cpu->created && !qemu_cpu_is_self(cpu)) {
370 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
371 RUN_ON_CPU_HOST_INT(idxmap));
372 } else {
373 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
374 }
375 }
376
377 void tlb_flush(CPUState *cpu)
378 {
379 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
380 }
381
382 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
383 {
384 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
385
386 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
387
388 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
389 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
390 }
391
392 void tlb_flush_all_cpus(CPUState *src_cpu)
393 {
394 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
395 }
396
397 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
398 {
399 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
400
401 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
402
403 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
404 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
405 }
406
407 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
408 {
409 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
410 }
411
412 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
413 target_ulong page)
414 {
415 return tlb_hit_page(tlb_entry->addr_read, page) ||
416 tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
417 tlb_hit_page(tlb_entry->addr_code, page);
418 }
419
420 /**
421 * tlb_entry_is_empty - return true if the entry is not in use
422 * @te: pointer to CPUTLBEntry
423 */
424 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
425 {
426 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
427 }
428
429 /* Called with tlb_c.lock held */
430 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
431 target_ulong page)
432 {
433 if (tlb_hit_page_anyprot(tlb_entry, page)) {
434 memset(tlb_entry, -1, sizeof(*tlb_entry));
435 return true;
436 }
437 return false;
438 }
439
440 /* Called with tlb_c.lock held */
441 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
442 target_ulong page)
443 {
444 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
445 int k;
446
447 assert_cpu_is_self(env_cpu(env));
448 for (k = 0; k < CPU_VTLB_SIZE; k++) {
449 if (tlb_flush_entry_locked(&d->vtable[k], page)) {
450 tlb_n_used_entries_dec(env, mmu_idx);
451 }
452 }
453 }
454
455 static void tlb_flush_page_locked(CPUArchState *env, int midx,
456 target_ulong page)
457 {
458 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
459 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
460
461 /* Check if we need to flush due to large pages. */
462 if ((page & lp_mask) == lp_addr) {
463 tlb_debug("forcing full flush midx %d ("
464 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
465 midx, lp_addr, lp_mask);
466 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
467 } else {
468 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
469 tlb_n_used_entries_dec(env, midx);
470 }
471 tlb_flush_vtlb_page_locked(env, midx, page);
472 }
473 }
474
475 /**
476 * tlb_flush_page_by_mmuidx_async_0:
477 * @cpu: cpu on which to flush
478 * @addr: page of virtual address to flush
479 * @idxmap: set of mmu_idx to flush
480 *
481 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
482 * at @addr from the tlbs indicated by @idxmap from @cpu.
483 */
484 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
485 target_ulong addr,
486 uint16_t idxmap)
487 {
488 CPUArchState *env = cpu->env_ptr;
489 int mmu_idx;
490
491 assert_cpu_is_self(cpu);
492
493 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
494
495 qemu_spin_lock(&env_tlb(env)->c.lock);
496 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
497 if ((idxmap >> mmu_idx) & 1) {
498 tlb_flush_page_locked(env, mmu_idx, addr);
499 }
500 }
501 qemu_spin_unlock(&env_tlb(env)->c.lock);
502
503 tb_flush_jmp_cache(cpu, addr);
504 }
505
506 /**
507 * tlb_flush_page_by_mmuidx_async_1:
508 * @cpu: cpu on which to flush
509 * @data: encoded addr + idxmap
510 *
511 * Helper for tlb_flush_page_by_mmuidx and friends, called through
512 * async_run_on_cpu. The idxmap parameter is encoded in the page
513 * offset of the target_ptr field. This limits the set of mmu_idx
514 * that can be passed via this method.
515 */
516 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
517 run_on_cpu_data data)
518 {
519 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
520 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
521 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
522
523 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
524 }
525
526 typedef struct {
527 target_ulong addr;
528 uint16_t idxmap;
529 } TLBFlushPageByMMUIdxData;
530
531 /**
532 * tlb_flush_page_by_mmuidx_async_2:
533 * @cpu: cpu on which to flush
534 * @data: allocated addr + idxmap
535 *
536 * Helper for tlb_flush_page_by_mmuidx and friends, called through
537 * async_run_on_cpu. The addr+idxmap parameters are stored in a
538 * TLBFlushPageByMMUIdxData structure that has been allocated
539 * specifically for this helper. Free the structure when done.
540 */
541 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
542 run_on_cpu_data data)
543 {
544 TLBFlushPageByMMUIdxData *d = data.host_ptr;
545
546 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
547 g_free(d);
548 }
549
550 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
551 {
552 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
553
554 /* This should already be page aligned */
555 addr &= TARGET_PAGE_MASK;
556
557 if (qemu_cpu_is_self(cpu)) {
558 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
559 } else if (idxmap < TARGET_PAGE_SIZE) {
560 /*
561 * Most targets have only a few mmu_idx. In the case where
562 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
563 * allocating memory for this operation.
564 */
565 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
566 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
567 } else {
568 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
569
570 /* Otherwise allocate a structure, freed by the worker. */
571 d->addr = addr;
572 d->idxmap = idxmap;
573 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
574 RUN_ON_CPU_HOST_PTR(d));
575 }
576 }
577
578 void tlb_flush_page(CPUState *cpu, target_ulong addr)
579 {
580 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
581 }
582
583 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
584 uint16_t idxmap)
585 {
586 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
587
588 /* This should already be page aligned */
589 addr &= TARGET_PAGE_MASK;
590
591 /*
592 * Allocate memory to hold addr+idxmap only when needed.
593 * See tlb_flush_page_by_mmuidx for details.
594 */
595 if (idxmap < TARGET_PAGE_SIZE) {
596 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
597 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
598 } else {
599 CPUState *dst_cpu;
600
601 /* Allocate a separate data block for each destination cpu. */
602 CPU_FOREACH(dst_cpu) {
603 if (dst_cpu != src_cpu) {
604 TLBFlushPageByMMUIdxData *d
605 = g_new(TLBFlushPageByMMUIdxData, 1);
606
607 d->addr = addr;
608 d->idxmap = idxmap;
609 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
610 RUN_ON_CPU_HOST_PTR(d));
611 }
612 }
613 }
614
615 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
616 }
617
618 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
619 {
620 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
621 }
622
623 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
624 target_ulong addr,
625 uint16_t idxmap)
626 {
627 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
628
629 /* This should already be page aligned */
630 addr &= TARGET_PAGE_MASK;
631
632 /*
633 * Allocate memory to hold addr+idxmap only when needed.
634 * See tlb_flush_page_by_mmuidx for details.
635 */
636 if (idxmap < TARGET_PAGE_SIZE) {
637 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
638 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
639 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
640 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
641 } else {
642 CPUState *dst_cpu;
643 TLBFlushPageByMMUIdxData *d;
644
645 /* Allocate a separate data block for each destination cpu. */
646 CPU_FOREACH(dst_cpu) {
647 if (dst_cpu != src_cpu) {
648 d = g_new(TLBFlushPageByMMUIdxData, 1);
649 d->addr = addr;
650 d->idxmap = idxmap;
651 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
652 RUN_ON_CPU_HOST_PTR(d));
653 }
654 }
655
656 d = g_new(TLBFlushPageByMMUIdxData, 1);
657 d->addr = addr;
658 d->idxmap = idxmap;
659 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
660 RUN_ON_CPU_HOST_PTR(d));
661 }
662 }
663
664 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
665 {
666 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
667 }
668
669 /* update the TLBs so that writes to code in the virtual page 'addr'
670 can be detected */
671 void tlb_protect_code(ram_addr_t ram_addr)
672 {
673 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
674 DIRTY_MEMORY_CODE);
675 }
676
677 /* update the TLB so that writes in physical page 'phys_addr' are no longer
678 tested for self modifying code */
679 void tlb_unprotect_code(ram_addr_t ram_addr)
680 {
681 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
682 }
683
684
685 /*
686 * Dirty write flag handling
687 *
688 * When the TCG code writes to a location it looks up the address in
689 * the TLB and uses that data to compute the final address. If any of
690 * the lower bits of the address are set then the slow path is forced.
691 * There are a number of reasons to do this but for normal RAM the
692 * most usual is detecting writes to code regions which may invalidate
693 * generated code.
694 *
695 * Other vCPUs might be reading their TLBs during guest execution, so we update
696 * te->addr_write with atomic_set. We don't need to worry about this for
697 * oversized guests as MTTCG is disabled for them.
698 *
699 * Called with tlb_c.lock held.
700 */
701 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
702 uintptr_t start, uintptr_t length)
703 {
704 uintptr_t addr = tlb_entry->addr_write;
705
706 if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
707 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
708 addr &= TARGET_PAGE_MASK;
709 addr += tlb_entry->addend;
710 if ((addr - start) < length) {
711 #if TCG_OVERSIZED_GUEST
712 tlb_entry->addr_write |= TLB_NOTDIRTY;
713 #else
714 atomic_set(&tlb_entry->addr_write,
715 tlb_entry->addr_write | TLB_NOTDIRTY);
716 #endif
717 }
718 }
719 }
720
721 /*
722 * Called with tlb_c.lock held.
723 * Called only from the vCPU context, i.e. the TLB's owner thread.
724 */
725 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
726 {
727 *d = *s;
728 }
729
730 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
731 * the target vCPU).
732 * We must take tlb_c.lock to avoid racing with another vCPU update. The only
733 * thing actually updated is the target TLB entry ->addr_write flags.
734 */
735 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
736 {
737 CPUArchState *env;
738
739 int mmu_idx;
740
741 env = cpu->env_ptr;
742 qemu_spin_lock(&env_tlb(env)->c.lock);
743 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
744 unsigned int i;
745 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
746
747 for (i = 0; i < n; i++) {
748 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
749 start1, length);
750 }
751
752 for (i = 0; i < CPU_VTLB_SIZE; i++) {
753 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
754 start1, length);
755 }
756 }
757 qemu_spin_unlock(&env_tlb(env)->c.lock);
758 }
759
760 /* Called with tlb_c.lock held */
761 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
762 target_ulong vaddr)
763 {
764 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
765 tlb_entry->addr_write = vaddr;
766 }
767 }
768
769 /* update the TLB corresponding to virtual page vaddr
770 so that it is no longer dirty */
771 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
772 {
773 CPUArchState *env = cpu->env_ptr;
774 int mmu_idx;
775
776 assert_cpu_is_self(cpu);
777
778 vaddr &= TARGET_PAGE_MASK;
779 qemu_spin_lock(&env_tlb(env)->c.lock);
780 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
781 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
782 }
783
784 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
785 int k;
786 for (k = 0; k < CPU_VTLB_SIZE; k++) {
787 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
788 }
789 }
790 qemu_spin_unlock(&env_tlb(env)->c.lock);
791 }
792
793 /* Our TLB does not support large pages, so remember the area covered by
794 large pages and trigger a full TLB flush if these are invalidated. */
795 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
796 target_ulong vaddr, target_ulong size)
797 {
798 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
799 target_ulong lp_mask = ~(size - 1);
800
801 if (lp_addr == (target_ulong)-1) {
802 /* No previous large page. */
803 lp_addr = vaddr;
804 } else {
805 /* Extend the existing region to include the new page.
806 This is a compromise between unnecessary flushes and
807 the cost of maintaining a full variable size TLB. */
808 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
809 while (((lp_addr ^ vaddr) & lp_mask) != 0) {
810 lp_mask <<= 1;
811 }
812 }
813 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
814 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
815 }
816
817 /* Add a new TLB entry. At most one entry for a given virtual address
818 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
819 * supplied size is only used by tlb_flush_page.
820 *
821 * Called from TCG-generated code, which is under an RCU read-side
822 * critical section.
823 */
824 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
825 hwaddr paddr, MemTxAttrs attrs, int prot,
826 int mmu_idx, target_ulong size)
827 {
828 CPUArchState *env = cpu->env_ptr;
829 CPUTLB *tlb = env_tlb(env);
830 CPUTLBDesc *desc = &tlb->d[mmu_idx];
831 MemoryRegionSection *section;
832 unsigned int index;
833 target_ulong address;
834 target_ulong write_address;
835 uintptr_t addend;
836 CPUTLBEntry *te, tn;
837 hwaddr iotlb, xlat, sz, paddr_page;
838 target_ulong vaddr_page;
839 int asidx = cpu_asidx_from_attrs(cpu, attrs);
840 int wp_flags;
841 bool is_ram, is_romd;
842
843 assert_cpu_is_self(cpu);
844
845 if (size <= TARGET_PAGE_SIZE) {
846 sz = TARGET_PAGE_SIZE;
847 } else {
848 tlb_add_large_page(env, mmu_idx, vaddr, size);
849 sz = size;
850 }
851 vaddr_page = vaddr & TARGET_PAGE_MASK;
852 paddr_page = paddr & TARGET_PAGE_MASK;
853
854 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
855 &xlat, &sz, attrs, &prot);
856 assert(sz >= TARGET_PAGE_SIZE);
857
858 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
859 " prot=%x idx=%d\n",
860 vaddr, paddr, prot, mmu_idx);
861
862 address = vaddr_page;
863 if (size < TARGET_PAGE_SIZE) {
864 /* Repeat the MMU check and TLB fill on every access. */
865 address |= TLB_INVALID_MASK;
866 }
867 if (attrs.byte_swap) {
868 address |= TLB_BSWAP;
869 }
870
871 is_ram = memory_region_is_ram(section->mr);
872 is_romd = memory_region_is_romd(section->mr);
873
874 if (is_ram || is_romd) {
875 /* RAM and ROMD both have associated host memory. */
876 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
877 } else {
878 /* I/O does not; force the host address to NULL. */
879 addend = 0;
880 }
881
882 write_address = address;
883 if (is_ram) {
884 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
885 /*
886 * Computing is_clean is expensive; avoid all that unless
887 * the page is actually writable.
888 */
889 if (prot & PAGE_WRITE) {
890 if (section->readonly) {
891 write_address |= TLB_DISCARD_WRITE;
892 } else if (cpu_physical_memory_is_clean(iotlb)) {
893 write_address |= TLB_NOTDIRTY;
894 }
895 }
896 } else {
897 /* I/O or ROMD */
898 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
899 /*
900 * Writes to romd devices must go through MMIO to enable write.
901 * Reads to romd devices go through the ram_ptr found above,
902 * but of course reads to I/O must go through MMIO.
903 */
904 write_address |= TLB_MMIO;
905 if (!is_romd) {
906 address = write_address;
907 }
908 }
909
910 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
911 TARGET_PAGE_SIZE);
912
913 index = tlb_index(env, mmu_idx, vaddr_page);
914 te = tlb_entry(env, mmu_idx, vaddr_page);
915
916 /*
917 * Hold the TLB lock for the rest of the function. We could acquire/release
918 * the lock several times in the function, but it is faster to amortize the
919 * acquisition cost by acquiring it just once. Note that this leads to
920 * a longer critical section, but this is not a concern since the TLB lock
921 * is unlikely to be contended.
922 */
923 qemu_spin_lock(&tlb->c.lock);
924
925 /* Note that the tlb is no longer clean. */
926 tlb->c.dirty |= 1 << mmu_idx;
927
928 /* Make sure there's no cached translation for the new page. */
929 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
930
931 /*
932 * Only evict the old entry to the victim tlb if it's for a
933 * different page; otherwise just overwrite the stale data.
934 */
935 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
936 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
937 CPUTLBEntry *tv = &desc->vtable[vidx];
938
939 /* Evict the old entry into the victim tlb. */
940 copy_tlb_helper_locked(tv, te);
941 desc->viotlb[vidx] = desc->iotlb[index];
942 tlb_n_used_entries_dec(env, mmu_idx);
943 }
944
945 /* refill the tlb */
946 /*
947 * At this point iotlb contains a physical section number in the lower
948 * TARGET_PAGE_BITS, and either
949 * + the ram_addr_t of the page base of the target RAM (RAM)
950 * + the offset within section->mr of the page base (I/O, ROMD)
951 * We subtract the vaddr_page (which is page aligned and thus won't
952 * disturb the low bits) to give an offset which can be added to the
953 * (non-page-aligned) vaddr of the eventual memory access to get
954 * the MemoryRegion offset for the access. Note that the vaddr we
955 * subtract here is that of the page base, and not the same as the
956 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
957 */
958 desc->iotlb[index].addr = iotlb - vaddr_page;
959 desc->iotlb[index].attrs = attrs;
960
961 /* Now calculate the new entry */
962 tn.addend = addend - vaddr_page;
963 if (prot & PAGE_READ) {
964 tn.addr_read = address;
965 if (wp_flags & BP_MEM_READ) {
966 tn.addr_read |= TLB_WATCHPOINT;
967 }
968 } else {
969 tn.addr_read = -1;
970 }
971
972 if (prot & PAGE_EXEC) {
973 tn.addr_code = address;
974 } else {
975 tn.addr_code = -1;
976 }
977
978 tn.addr_write = -1;
979 if (prot & PAGE_WRITE) {
980 tn.addr_write = write_address;
981 if (prot & PAGE_WRITE_INV) {
982 tn.addr_write |= TLB_INVALID_MASK;
983 }
984 if (wp_flags & BP_MEM_WRITE) {
985 tn.addr_write |= TLB_WATCHPOINT;
986 }
987 }
988
989 copy_tlb_helper_locked(te, &tn);
990 tlb_n_used_entries_inc(env, mmu_idx);
991 qemu_spin_unlock(&tlb->c.lock);
992 }
993
994 /* Add a new TLB entry, but without specifying the memory
995 * transaction attributes to be used.
996 */
997 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
998 hwaddr paddr, int prot,
999 int mmu_idx, target_ulong size)
1000 {
1001 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
1002 prot, mmu_idx, size);
1003 }
1004
1005 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1006 {
1007 ram_addr_t ram_addr;
1008
1009 ram_addr = qemu_ram_addr_from_host(ptr);
1010 if (ram_addr == RAM_ADDR_INVALID) {
1011 error_report("Bad ram pointer %p", ptr);
1012 abort();
1013 }
1014 return ram_addr;
1015 }
1016
1017 /*
1018 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
1019 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
1020 * be discarded and looked up again (e.g. via tlb_entry()).
1021 */
1022 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
1023 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1024 {
1025 CPUClass *cc = CPU_GET_CLASS(cpu);
1026 bool ok;
1027
1028 /*
1029 * This is not a probe, so only valid return is success; failure
1030 * should result in exception + longjmp to the cpu loop.
1031 */
1032 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
1033 assert(ok);
1034 }
1035
1036 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1037 int mmu_idx, target_ulong addr, uintptr_t retaddr,
1038 MMUAccessType access_type, MemOp op)
1039 {
1040 CPUState *cpu = env_cpu(env);
1041 hwaddr mr_offset;
1042 MemoryRegionSection *section;
1043 MemoryRegion *mr;
1044 uint64_t val;
1045 bool locked = false;
1046 MemTxResult r;
1047
1048 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1049 mr = section->mr;
1050 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1051 cpu->mem_io_pc = retaddr;
1052 if (!cpu->can_do_io) {
1053 cpu_io_recompile(cpu, retaddr);
1054 }
1055
1056 if (mr->global_locking && !qemu_mutex_iothread_locked()) {
1057 qemu_mutex_lock_iothread();
1058 locked = true;
1059 }
1060 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
1061 if (r != MEMTX_OK) {
1062 hwaddr physaddr = mr_offset +
1063 section->offset_within_address_space -
1064 section->offset_within_region;
1065
1066 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
1067 mmu_idx, iotlbentry->attrs, r, retaddr);
1068 }
1069 if (locked) {
1070 qemu_mutex_unlock_iothread();
1071 }
1072
1073 return val;
1074 }
1075
1076 /*
1077 * Save a potentially trashed IOTLB entry for later lookup by plugin.
1078 * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
1079 * because of the side effect of io_writex changing memory layout.
1080 */
1081 static void save_iotlb_data(CPUState *cs, hwaddr addr,
1082 MemoryRegionSection *section, hwaddr mr_offset)
1083 {
1084 #ifdef CONFIG_PLUGIN
1085 SavedIOTLB *saved = &cs->saved_iotlb;
1086 saved->addr = addr;
1087 saved->section = section;
1088 saved->mr_offset = mr_offset;
1089 #endif
1090 }
1091
1092 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1093 int mmu_idx, uint64_t val, target_ulong addr,
1094 uintptr_t retaddr, MemOp op)
1095 {
1096 CPUState *cpu = env_cpu(env);
1097 hwaddr mr_offset;
1098 MemoryRegionSection *section;
1099 MemoryRegion *mr;
1100 bool locked = false;
1101 MemTxResult r;
1102
1103 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1104 mr = section->mr;
1105 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1106 if (!cpu->can_do_io) {
1107 cpu_io_recompile(cpu, retaddr);
1108 }
1109 cpu->mem_io_pc = retaddr;
1110
1111 /*
1112 * The memory_region_dispatch may trigger a flush/resize
1113 * so for plugins we save the iotlb_data just in case.
1114 */
1115 save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
1116
1117 if (mr->global_locking && !qemu_mutex_iothread_locked()) {
1118 qemu_mutex_lock_iothread();
1119 locked = true;
1120 }
1121 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
1122 if (r != MEMTX_OK) {
1123 hwaddr physaddr = mr_offset +
1124 section->offset_within_address_space -
1125 section->offset_within_region;
1126
1127 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
1128 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
1129 retaddr);
1130 }
1131 if (locked) {
1132 qemu_mutex_unlock_iothread();
1133 }
1134 }
1135
1136 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
1137 {
1138 #if TCG_OVERSIZED_GUEST
1139 return *(target_ulong *)((uintptr_t)entry + ofs);
1140 #else
1141 /* ofs might correspond to .addr_write, so use atomic_read */
1142 return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
1143 #endif
1144 }
1145
1146 /* Return true if ADDR is present in the victim tlb, and has been copied
1147 back to the main tlb. */
1148 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1149 size_t elt_ofs, target_ulong page)
1150 {
1151 size_t vidx;
1152
1153 assert_cpu_is_self(env_cpu(env));
1154 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1155 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1156 target_ulong cmp;
1157
1158 /* elt_ofs might correspond to .addr_write, so use atomic_read */
1159 #if TCG_OVERSIZED_GUEST
1160 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1161 #else
1162 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1163 #endif
1164
1165 if (cmp == page) {
1166 /* Found entry in victim tlb, swap tlb and iotlb. */
1167 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1168
1169 qemu_spin_lock(&env_tlb(env)->c.lock);
1170 copy_tlb_helper_locked(&tmptlb, tlb);
1171 copy_tlb_helper_locked(tlb, vtlb);
1172 copy_tlb_helper_locked(vtlb, &tmptlb);
1173 qemu_spin_unlock(&env_tlb(env)->c.lock);
1174
1175 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1176 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1177 tmpio = *io; *io = *vio; *vio = tmpio;
1178 return true;
1179 }
1180 }
1181 return false;
1182 }
1183
1184 /* Macro to call the above, with local variables from the use context. */
1185 #define VICTIM_TLB_HIT(TY, ADDR) \
1186 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1187 (ADDR) & TARGET_PAGE_MASK)
1188
1189 /*
1190 * Return a ram_addr_t for the virtual address for execution.
1191 *
1192 * Return -1 if we can't translate and execute from an entire page
1193 * of RAM. This will force us to execute by loading and translating
1194 * one insn at a time, without caching.
1195 *
1196 * NOTE: This function will trigger an exception if the page is
1197 * not executable.
1198 */
1199 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1200 void **hostp)
1201 {
1202 uintptr_t mmu_idx = cpu_mmu_index(env, true);
1203 uintptr_t index = tlb_index(env, mmu_idx, addr);
1204 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1205 void *p;
1206
1207 if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1208 if (!VICTIM_TLB_HIT(addr_code, addr)) {
1209 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1210 index = tlb_index(env, mmu_idx, addr);
1211 entry = tlb_entry(env, mmu_idx, addr);
1212
1213 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1214 /*
1215 * The MMU protection covers a smaller range than a target
1216 * page, so we must redo the MMU check for every insn.
1217 */
1218 return -1;
1219 }
1220 }
1221 assert(tlb_hit(entry->addr_code, addr));
1222 }
1223
1224 if (unlikely(entry->addr_code & TLB_MMIO)) {
1225 /* The region is not backed by RAM. */
1226 if (hostp) {
1227 *hostp = NULL;
1228 }
1229 return -1;
1230 }
1231
1232 p = (void *)((uintptr_t)addr + entry->addend);
1233 if (hostp) {
1234 *hostp = p;
1235 }
1236 return qemu_ram_addr_from_host_nofail(p);
1237 }
1238
1239 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1240 {
1241 return get_page_addr_code_hostp(env, addr, NULL);
1242 }
1243
1244 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1245 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1246 {
1247 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1248
1249 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1250
1251 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1252 struct page_collection *pages
1253 = page_collection_lock(ram_addr, ram_addr + size);
1254 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1255 page_collection_unlock(pages);
1256 }
1257
1258 /*
1259 * Set both VGA and migration bits for simplicity and to remove
1260 * the notdirty callback faster.
1261 */
1262 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1263
1264 /* We remove the notdirty callback only if the code has been flushed. */
1265 if (!cpu_physical_memory_is_clean(ram_addr)) {
1266 trace_memory_notdirty_set_dirty(mem_vaddr);
1267 tlb_set_dirty(cpu, mem_vaddr);
1268 }
1269 }
1270
1271 static int probe_access_internal(CPUArchState *env, target_ulong addr,
1272 int fault_size, MMUAccessType access_type,
1273 int mmu_idx, bool nonfault,
1274 void **phost, uintptr_t retaddr)
1275 {
1276 uintptr_t index = tlb_index(env, mmu_idx, addr);
1277 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1278 target_ulong tlb_addr, page_addr;
1279 size_t elt_ofs;
1280 int flags;
1281
1282 switch (access_type) {
1283 case MMU_DATA_LOAD:
1284 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1285 break;
1286 case MMU_DATA_STORE:
1287 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1288 break;
1289 case MMU_INST_FETCH:
1290 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1291 break;
1292 default:
1293 g_assert_not_reached();
1294 }
1295 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1296
1297 page_addr = addr & TARGET_PAGE_MASK;
1298 if (!tlb_hit_page(tlb_addr, page_addr)) {
1299 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
1300 CPUState *cs = env_cpu(env);
1301 CPUClass *cc = CPU_GET_CLASS(cs);
1302
1303 if (!cc->tlb_fill(cs, addr, fault_size, access_type,
1304 mmu_idx, nonfault, retaddr)) {
1305 /* Non-faulting page table read failed. */
1306 *phost = NULL;
1307 return TLB_INVALID_MASK;
1308 }
1309
1310 /* TLB resize via tlb_fill may have moved the entry. */
1311 entry = tlb_entry(env, mmu_idx, addr);
1312 }
1313 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1314 }
1315 flags = tlb_addr & TLB_FLAGS_MASK;
1316
1317 /* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
1318 if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1319 *phost = NULL;
1320 return TLB_MMIO;
1321 }
1322
1323 /* Everything else is RAM. */
1324 *phost = (void *)((uintptr_t)addr + entry->addend);
1325 return flags;
1326 }
1327
1328 int probe_access_flags(CPUArchState *env, target_ulong addr,
1329 MMUAccessType access_type, int mmu_idx,
1330 bool nonfault, void **phost, uintptr_t retaddr)
1331 {
1332 int flags;
1333
1334 flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
1335 nonfault, phost, retaddr);
1336
1337 /* Handle clean RAM pages. */
1338 if (unlikely(flags & TLB_NOTDIRTY)) {
1339 uintptr_t index = tlb_index(env, mmu_idx, addr);
1340 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1341
1342 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1343 flags &= ~TLB_NOTDIRTY;
1344 }
1345
1346 return flags;
1347 }
1348
1349 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1350 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1351 {
1352 void *host;
1353 int flags;
1354
1355 g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1356
1357 flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
1358 false, &host, retaddr);
1359
1360 /* Per the interface, size == 0 merely faults the access. */
1361 if (size == 0) {
1362 return NULL;
1363 }
1364
1365 if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
1366 uintptr_t index = tlb_index(env, mmu_idx, addr);
1367 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1368
1369 /* Handle watchpoints. */
1370 if (flags & TLB_WATCHPOINT) {
1371 int wp_access = (access_type == MMU_DATA_STORE
1372 ? BP_MEM_WRITE : BP_MEM_READ);
1373 cpu_check_watchpoint(env_cpu(env), addr, size,
1374 iotlbentry->attrs, wp_access, retaddr);
1375 }
1376
1377 /* Handle clean RAM pages. */
1378 if (flags & TLB_NOTDIRTY) {
1379 notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
1380 }
1381 }
1382
1383 return host;
1384 }
1385
1386 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1387 MMUAccessType access_type, int mmu_idx)
1388 {
1389 void *host;
1390 int flags;
1391
1392 flags = probe_access_internal(env, addr, 0, access_type,
1393 mmu_idx, true, &host, 0);
1394
1395 /* No combination of flags are expected by the caller. */
1396 return flags ? NULL : host;
1397 }
1398
1399 #ifdef CONFIG_PLUGIN
1400 /*
1401 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1402 * This should be a hot path as we will have just looked this path up
1403 * in the softmmu lookup code (or helper). We don't handle re-fills or
1404 * checking the victim table. This is purely informational.
1405 *
1406 * This almost never fails as the memory access being instrumented
1407 * should have just filled the TLB. The one corner case is io_writex
1408 * which can cause TLB flushes and potential resizing of the TLBs
1409 * losing the information we need. In those cases we need to recover
1410 * data from a copy of the iotlbentry. As long as this always occurs
1411 * from the same thread (which a mem callback will be) this is safe.
1412 */
1413
1414 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1415 bool is_store, struct qemu_plugin_hwaddr *data)
1416 {
1417 CPUArchState *env = cpu->env_ptr;
1418 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1419 uintptr_t index = tlb_index(env, mmu_idx, addr);
1420 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1421
1422 if (likely(tlb_hit(tlb_addr, addr))) {
1423 /* We must have an iotlb entry for MMIO */
1424 if (tlb_addr & TLB_MMIO) {
1425 CPUIOTLBEntry *iotlbentry;
1426 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1427 data->is_io = true;
1428 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1429 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1430 } else {
1431 data->is_io = false;
1432 data->v.ram.hostaddr = addr + tlbe->addend;
1433 }
1434 return true;
1435 } else {
1436 SavedIOTLB *saved = &cpu->saved_iotlb;
1437 data->is_io = true;
1438 data->v.io.section = saved->section;
1439 data->v.io.offset = saved->mr_offset;
1440 return true;
1441 }
1442 }
1443
1444 #endif
1445
1446 /* Probe for a read-modify-write atomic operation. Do not allow unaligned
1447 * operations, or io operations to proceed. Return the host address. */
1448 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1449 TCGMemOpIdx oi, uintptr_t retaddr)
1450 {
1451 size_t mmu_idx = get_mmuidx(oi);
1452 uintptr_t index = tlb_index(env, mmu_idx, addr);
1453 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1454 target_ulong tlb_addr = tlb_addr_write(tlbe);
1455 MemOp mop = get_memop(oi);
1456 int a_bits = get_alignment_bits(mop);
1457 int s_bits = mop & MO_SIZE;
1458 void *hostaddr;
1459
1460 /* Adjust the given return address. */
1461 retaddr -= GETPC_ADJ;
1462
1463 /* Enforce guest required alignment. */
1464 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1465 /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1466 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1467 mmu_idx, retaddr);
1468 }
1469
1470 /* Enforce qemu required alignment. */
1471 if (unlikely(addr & ((1 << s_bits) - 1))) {
1472 /* We get here if guest alignment was not requested,
1473 or was not enforced by cpu_unaligned_access above.
1474 We might widen the access and emulate, but for now
1475 mark an exception and exit the cpu loop. */
1476 goto stop_the_world;
1477 }
1478
1479 /* Check TLB entry and enforce page permissions. */
1480 if (!tlb_hit(tlb_addr, addr)) {
1481 if (!VICTIM_TLB_HIT(addr_write, addr)) {
1482 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1483 mmu_idx, retaddr);
1484 index = tlb_index(env, mmu_idx, addr);
1485 tlbe = tlb_entry(env, mmu_idx, addr);
1486 }
1487 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1488 }
1489
1490 /* Notice an IO access or a needs-MMU-lookup access */
1491 if (unlikely(tlb_addr & TLB_MMIO)) {
1492 /* There's really nothing that can be done to
1493 support this apart from stop-the-world. */
1494 goto stop_the_world;
1495 }
1496
1497 /* Let the guest notice RMW on a write-only page. */
1498 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1499 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1500 mmu_idx, retaddr);
1501 /* Since we don't support reads and writes to different addresses,
1502 and we do have the proper page loaded for write, this shouldn't
1503 ever return. But just in case, handle via stop-the-world. */
1504 goto stop_the_world;
1505 }
1506
1507 hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1508
1509 if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1510 notdirty_write(env_cpu(env), addr, 1 << s_bits,
1511 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1512 }
1513
1514 return hostaddr;
1515
1516 stop_the_world:
1517 cpu_loop_exit_atomic(env_cpu(env), retaddr);
1518 }
1519
1520 /*
1521 * Load Helpers
1522 *
1523 * We support two different access types. SOFTMMU_CODE_ACCESS is
1524 * specifically for reading instructions from system memory. It is
1525 * called by the translation loop and in some helpers where the code
1526 * is disassembled. It shouldn't be called directly by guest code.
1527 */
1528
1529 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1530 TCGMemOpIdx oi, uintptr_t retaddr);
1531
1532 static inline uint64_t QEMU_ALWAYS_INLINE
1533 load_memop(const void *haddr, MemOp op)
1534 {
1535 switch (op) {
1536 case MO_UB:
1537 return ldub_p(haddr);
1538 case MO_BEUW:
1539 return lduw_be_p(haddr);
1540 case MO_LEUW:
1541 return lduw_le_p(haddr);
1542 case MO_BEUL:
1543 return (uint32_t)ldl_be_p(haddr);
1544 case MO_LEUL:
1545 return (uint32_t)ldl_le_p(haddr);
1546 case MO_BEQ:
1547 return ldq_be_p(haddr);
1548 case MO_LEQ:
1549 return ldq_le_p(haddr);
1550 default:
1551 qemu_build_not_reached();
1552 }
1553 }
1554
1555 static inline uint64_t QEMU_ALWAYS_INLINE
1556 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1557 uintptr_t retaddr, MemOp op, bool code_read,
1558 FullLoadHelper *full_load)
1559 {
1560 uintptr_t mmu_idx = get_mmuidx(oi);
1561 uintptr_t index = tlb_index(env, mmu_idx, addr);
1562 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1563 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1564 const size_t tlb_off = code_read ?
1565 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1566 const MMUAccessType access_type =
1567 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1568 unsigned a_bits = get_alignment_bits(get_memop(oi));
1569 void *haddr;
1570 uint64_t res;
1571 size_t size = memop_size(op);
1572
1573 /* Handle CPU specific unaligned behaviour */
1574 if (addr & ((1 << a_bits) - 1)) {
1575 cpu_unaligned_access(env_cpu(env), addr, access_type,
1576 mmu_idx, retaddr);
1577 }
1578
1579 /* If the TLB entry is for a different page, reload and try again. */
1580 if (!tlb_hit(tlb_addr, addr)) {
1581 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1582 addr & TARGET_PAGE_MASK)) {
1583 tlb_fill(env_cpu(env), addr, size,
1584 access_type, mmu_idx, retaddr);
1585 index = tlb_index(env, mmu_idx, addr);
1586 entry = tlb_entry(env, mmu_idx, addr);
1587 }
1588 tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1589 tlb_addr &= ~TLB_INVALID_MASK;
1590 }
1591
1592 /* Handle anything that isn't just a straight memory access. */
1593 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1594 CPUIOTLBEntry *iotlbentry;
1595 bool need_swap;
1596
1597 /* For anything that is unaligned, recurse through full_load. */
1598 if ((addr & (size - 1)) != 0) {
1599 goto do_unaligned_access;
1600 }
1601
1602 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1603
1604 /* Handle watchpoints. */
1605 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1606 /* On watchpoint hit, this will longjmp out. */
1607 cpu_check_watchpoint(env_cpu(env), addr, size,
1608 iotlbentry->attrs, BP_MEM_READ, retaddr);
1609 }
1610
1611 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1612
1613 /* Handle I/O access. */
1614 if (likely(tlb_addr & TLB_MMIO)) {
1615 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1616 access_type, op ^ (need_swap * MO_BSWAP));
1617 }
1618
1619 haddr = (void *)((uintptr_t)addr + entry->addend);
1620
1621 /*
1622 * Keep these two load_memop separate to ensure that the compiler
1623 * is able to fold the entire function to a single instruction.
1624 * There is a build-time assert inside to remind you of this. ;-)
1625 */
1626 if (unlikely(need_swap)) {
1627 return load_memop(haddr, op ^ MO_BSWAP);
1628 }
1629 return load_memop(haddr, op);
1630 }
1631
1632 /* Handle slow unaligned access (it spans two pages or IO). */
1633 if (size > 1
1634 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1635 >= TARGET_PAGE_SIZE)) {
1636 target_ulong addr1, addr2;
1637 uint64_t r1, r2;
1638 unsigned shift;
1639 do_unaligned_access:
1640 addr1 = addr & ~((target_ulong)size - 1);
1641 addr2 = addr1 + size;
1642 r1 = full_load(env, addr1, oi, retaddr);
1643 r2 = full_load(env, addr2, oi, retaddr);
1644 shift = (addr & (size - 1)) * 8;
1645
1646 if (memop_big_endian(op)) {
1647 /* Big-endian combine. */
1648 res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1649 } else {
1650 /* Little-endian combine. */
1651 res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1652 }
1653 return res & MAKE_64BIT_MASK(0, size * 8);
1654 }
1655
1656 haddr = (void *)((uintptr_t)addr + entry->addend);
1657 return load_memop(haddr, op);
1658 }
1659
1660 /*
1661 * For the benefit of TCG generated code, we want to avoid the
1662 * complication of ABI-specific return type promotion and always
1663 * return a value extended to the register size of the host. This is
1664 * tcg_target_long, except in the case of a 32-bit host and 64-bit
1665 * data, and for that we always have uint64_t.
1666 *
1667 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1668 */
1669
1670 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1671 TCGMemOpIdx oi, uintptr_t retaddr)
1672 {
1673 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1674 }
1675
1676 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1677 TCGMemOpIdx oi, uintptr_t retaddr)
1678 {
1679 return full_ldub_mmu(env, addr, oi, retaddr);
1680 }
1681
1682 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1683 TCGMemOpIdx oi, uintptr_t retaddr)
1684 {
1685 return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1686 full_le_lduw_mmu);
1687 }
1688
1689 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1690 TCGMemOpIdx oi, uintptr_t retaddr)
1691 {
1692 return full_le_lduw_mmu(env, addr, oi, retaddr);
1693 }
1694
1695 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1696 TCGMemOpIdx oi, uintptr_t retaddr)
1697 {
1698 return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1699 full_be_lduw_mmu);
1700 }
1701
1702 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1703 TCGMemOpIdx oi, uintptr_t retaddr)
1704 {
1705 return full_be_lduw_mmu(env, addr, oi, retaddr);
1706 }
1707
1708 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1709 TCGMemOpIdx oi, uintptr_t retaddr)
1710 {
1711 return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1712 full_le_ldul_mmu);
1713 }
1714
1715 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1716 TCGMemOpIdx oi, uintptr_t retaddr)
1717 {
1718 return full_le_ldul_mmu(env, addr, oi, retaddr);
1719 }
1720
1721 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1722 TCGMemOpIdx oi, uintptr_t retaddr)
1723 {
1724 return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1725 full_be_ldul_mmu);
1726 }
1727
1728 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1729 TCGMemOpIdx oi, uintptr_t retaddr)
1730 {
1731 return full_be_ldul_mmu(env, addr, oi, retaddr);
1732 }
1733
1734 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1735 TCGMemOpIdx oi, uintptr_t retaddr)
1736 {
1737 return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1738 helper_le_ldq_mmu);
1739 }
1740
1741 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1742 TCGMemOpIdx oi, uintptr_t retaddr)
1743 {
1744 return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1745 helper_be_ldq_mmu);
1746 }
1747
1748 /*
1749 * Provide signed versions of the load routines as well. We can of course
1750 * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1751 */
1752
1753
1754 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1755 TCGMemOpIdx oi, uintptr_t retaddr)
1756 {
1757 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1758 }
1759
1760 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1761 TCGMemOpIdx oi, uintptr_t retaddr)
1762 {
1763 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1764 }
1765
1766 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1767 TCGMemOpIdx oi, uintptr_t retaddr)
1768 {
1769 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1770 }
1771
1772 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1773 TCGMemOpIdx oi, uintptr_t retaddr)
1774 {
1775 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1776 }
1777
1778 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1779 TCGMemOpIdx oi, uintptr_t retaddr)
1780 {
1781 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1782 }
1783
1784 /*
1785 * Load helpers for cpu_ldst.h.
1786 */
1787
1788 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
1789 int mmu_idx, uintptr_t retaddr,
1790 MemOp op, FullLoadHelper *full_load)
1791 {
1792 uint16_t meminfo;
1793 TCGMemOpIdx oi;
1794 uint64_t ret;
1795
1796 meminfo = trace_mem_get_info(op, mmu_idx, false);
1797 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
1798
1799 op &= ~MO_SIGN;
1800 oi = make_memop_idx(op, mmu_idx);
1801 ret = full_load(env, addr, oi, retaddr);
1802
1803 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
1804
1805 return ret;
1806 }
1807
1808 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1809 int mmu_idx, uintptr_t ra)
1810 {
1811 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
1812 }
1813
1814 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1815 int mmu_idx, uintptr_t ra)
1816 {
1817 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
1818 full_ldub_mmu);
1819 }
1820
1821 uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1822 int mmu_idx, uintptr_t ra)
1823 {
1824 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
1825 }
1826
1827 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1828 int mmu_idx, uintptr_t ra)
1829 {
1830 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
1831 full_be_lduw_mmu);
1832 }
1833
1834 uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1835 int mmu_idx, uintptr_t ra)
1836 {
1837 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
1838 }
1839
1840 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1841 int mmu_idx, uintptr_t ra)
1842 {
1843 return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
1844 }
1845
1846 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1847 int mmu_idx, uintptr_t ra)
1848 {
1849 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
1850 }
1851
1852 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1853 int mmu_idx, uintptr_t ra)
1854 {
1855 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
1856 full_le_lduw_mmu);
1857 }
1858
1859 uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1860 int mmu_idx, uintptr_t ra)
1861 {
1862 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
1863 }
1864
1865 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1866 int mmu_idx, uintptr_t ra)
1867 {
1868 return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
1869 }
1870
1871 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
1872 uintptr_t retaddr)
1873 {
1874 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1875 }
1876
1877 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1878 {
1879 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1880 }
1881
1882 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
1883 uintptr_t retaddr)
1884 {
1885 return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1886 }
1887
1888 int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1889 {
1890 return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1891 }
1892
1893 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
1894 uintptr_t retaddr)
1895 {
1896 return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1897 }
1898
1899 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
1900 uintptr_t retaddr)
1901 {
1902 return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1903 }
1904
1905 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
1906 uintptr_t retaddr)
1907 {
1908 return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1909 }
1910
1911 int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1912 {
1913 return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1914 }
1915
1916 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
1917 uintptr_t retaddr)
1918 {
1919 return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1920 }
1921
1922 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
1923 uintptr_t retaddr)
1924 {
1925 return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1926 }
1927
1928 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
1929 {
1930 return cpu_ldub_data_ra(env, ptr, 0);
1931 }
1932
1933 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
1934 {
1935 return cpu_ldsb_data_ra(env, ptr, 0);
1936 }
1937
1938 uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
1939 {
1940 return cpu_lduw_be_data_ra(env, ptr, 0);
1941 }
1942
1943 int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
1944 {
1945 return cpu_ldsw_be_data_ra(env, ptr, 0);
1946 }
1947
1948 uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
1949 {
1950 return cpu_ldl_be_data_ra(env, ptr, 0);
1951 }
1952
1953 uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
1954 {
1955 return cpu_ldq_be_data_ra(env, ptr, 0);
1956 }
1957
1958 uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
1959 {
1960 return cpu_lduw_le_data_ra(env, ptr, 0);
1961 }
1962
1963 int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
1964 {
1965 return cpu_ldsw_le_data_ra(env, ptr, 0);
1966 }
1967
1968 uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
1969 {
1970 return cpu_ldl_le_data_ra(env, ptr, 0);
1971 }
1972
1973 uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
1974 {
1975 return cpu_ldq_le_data_ra(env, ptr, 0);
1976 }
1977
1978 /*
1979 * Store Helpers
1980 */
1981
1982 static inline void QEMU_ALWAYS_INLINE
1983 store_memop(void *haddr, uint64_t val, MemOp op)
1984 {
1985 switch (op) {
1986 case MO_UB:
1987 stb_p(haddr, val);
1988 break;
1989 case MO_BEUW:
1990 stw_be_p(haddr, val);
1991 break;
1992 case MO_LEUW:
1993 stw_le_p(haddr, val);
1994 break;
1995 case MO_BEUL:
1996 stl_be_p(haddr, val);
1997 break;
1998 case MO_LEUL:
1999 stl_le_p(haddr, val);
2000 break;
2001 case MO_BEQ:
2002 stq_be_p(haddr, val);
2003 break;
2004 case MO_LEQ:
2005 stq_le_p(haddr, val);
2006 break;
2007 default:
2008 qemu_build_not_reached();
2009 }
2010 }
2011
2012 static inline void QEMU_ALWAYS_INLINE
2013 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2014 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
2015 {
2016 uintptr_t mmu_idx = get_mmuidx(oi);
2017 uintptr_t index = tlb_index(env, mmu_idx, addr);
2018 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
2019 target_ulong tlb_addr = tlb_addr_write(entry);
2020 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
2021 unsigned a_bits = get_alignment_bits(get_memop(oi));
2022 void *haddr;
2023 size_t size = memop_size(op);
2024
2025 /* Handle CPU specific unaligned behaviour */
2026 if (addr & ((1 << a_bits) - 1)) {
2027 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
2028 mmu_idx, retaddr);
2029 }
2030
2031 /* If the TLB entry is for a different page, reload and try again. */
2032 if (!tlb_hit(tlb_addr, addr)) {
2033 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
2034 addr & TARGET_PAGE_MASK)) {
2035 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
2036 mmu_idx, retaddr);
2037 index = tlb_index(env, mmu_idx, addr);
2038 entry = tlb_entry(env, mmu_idx, addr);
2039 }
2040 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
2041 }
2042
2043 /* Handle anything that isn't just a straight memory access. */
2044 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
2045 CPUIOTLBEntry *iotlbentry;
2046 bool need_swap;
2047
2048 /* For anything that is unaligned, recurse through byte stores. */
2049 if ((addr & (size - 1)) != 0) {
2050 goto do_unaligned_access;
2051 }
2052
2053 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
2054
2055 /* Handle watchpoints. */
2056 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2057 /* On watchpoint hit, this will longjmp out. */
2058 cpu_check_watchpoint(env_cpu(env), addr, size,
2059 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
2060 }
2061
2062 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
2063
2064 /* Handle I/O access. */
2065 if (tlb_addr & TLB_MMIO) {
2066 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
2067 op ^ (need_swap * MO_BSWAP));
2068 return;
2069 }
2070
2071 /* Ignore writes to ROM. */
2072 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
2073 return;
2074 }
2075
2076 /* Handle clean RAM pages. */
2077 if (tlb_addr & TLB_NOTDIRTY) {
2078 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
2079 }
2080
2081 haddr = (void *)((uintptr_t)addr + entry->addend);
2082
2083 /*
2084 * Keep these two store_memop separate to ensure that the compiler
2085 * is able to fold the entire function to a single instruction.
2086 * There is a build-time assert inside to remind you of this. ;-)
2087 */
2088 if (unlikely(need_swap)) {
2089 store_memop(haddr, val, op ^ MO_BSWAP);
2090 } else {
2091 store_memop(haddr, val, op);
2092 }
2093 return;
2094 }
2095
2096 /* Handle slow unaligned access (it spans two pages or IO). */
2097 if (size > 1
2098 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
2099 >= TARGET_PAGE_SIZE)) {
2100 int i;
2101 uintptr_t index2;
2102 CPUTLBEntry *entry2;
2103 target_ulong page2, tlb_addr2;
2104 size_t size2;
2105
2106 do_unaligned_access:
2107 /*
2108 * Ensure the second page is in the TLB. Note that the first page
2109 * is already guaranteed to be filled, and that the second page
2110 * cannot evict the first.
2111 */
2112 page2 = (addr + size) & TARGET_PAGE_MASK;
2113 size2 = (addr + size) & ~TARGET_PAGE_MASK;
2114 index2 = tlb_index(env, mmu_idx, page2);
2115 entry2 = tlb_entry(env, mmu_idx, page2);
2116 tlb_addr2 = tlb_addr_write(entry2);
2117 if (!tlb_hit_page(tlb_addr2, page2)) {
2118 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
2119 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
2120 mmu_idx, retaddr);
2121 index2 = tlb_index(env, mmu_idx, page2);
2122 entry2 = tlb_entry(env, mmu_idx, page2);
2123 }
2124 tlb_addr2 = tlb_addr_write(entry2);
2125 }
2126
2127 /*
2128 * Handle watchpoints. Since this may trap, all checks
2129 * must happen before any store.
2130 */
2131 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2132 cpu_check_watchpoint(env_cpu(env), addr, size - size2,
2133 env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
2134 BP_MEM_WRITE, retaddr);
2135 }
2136 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
2137 cpu_check_watchpoint(env_cpu(env), page2, size2,
2138 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
2139 BP_MEM_WRITE, retaddr);
2140 }
2141
2142 /*
2143 * XXX: not efficient, but simple.
2144 * This loop must go in the forward direction to avoid issues
2145 * with self-modifying code in Windows 64-bit.
2146 */
2147 for (i = 0; i < size; ++i) {
2148 uint8_t val8;
2149 if (memop_big_endian(op)) {
2150 /* Big-endian extract. */
2151 val8 = val >> (((size - 1) * 8) - (i * 8));
2152 } else {
2153 /* Little-endian extract. */
2154 val8 = val >> (i * 8);
2155 }
2156 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2157 }
2158 return;
2159 }
2160
2161 haddr = (void *)((uintptr_t)addr + entry->addend);
2162 store_memop(haddr, val, op);
2163 }
2164
2165 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
2166 TCGMemOpIdx oi, uintptr_t retaddr)
2167 {
2168 store_helper(env, addr, val, oi, retaddr, MO_UB);
2169 }
2170
2171 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2172 TCGMemOpIdx oi, uintptr_t retaddr)
2173 {
2174 store_helper(env, addr, val, oi, retaddr, MO_LEUW);
2175 }
2176
2177 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2178 TCGMemOpIdx oi, uintptr_t retaddr)
2179 {
2180 store_helper(env, addr, val, oi, retaddr, MO_BEUW);
2181 }
2182
2183 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2184 TCGMemOpIdx oi, uintptr_t retaddr)
2185 {
2186 store_helper(env, addr, val, oi, retaddr, MO_LEUL);
2187 }
2188
2189 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2190 TCGMemOpIdx oi, uintptr_t retaddr)
2191 {
2192 store_helper(env, addr, val, oi, retaddr, MO_BEUL);
2193 }
2194
2195 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2196 TCGMemOpIdx oi, uintptr_t retaddr)
2197 {
2198 store_helper(env, addr, val, oi, retaddr, MO_LEQ);
2199 }
2200
2201 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2202 TCGMemOpIdx oi, uintptr_t retaddr)
2203 {
2204 store_helper(env, addr, val, oi, retaddr, MO_BEQ);
2205 }
2206
2207 /*
2208 * Store Helpers for cpu_ldst.h
2209 */
2210
2211 static inline void QEMU_ALWAYS_INLINE
2212 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2213 int mmu_idx, uintptr_t retaddr, MemOp op)
2214 {
2215 TCGMemOpIdx oi;
2216 uint16_t meminfo;
2217
2218 meminfo = trace_mem_get_info(op, mmu_idx, true);
2219 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2220
2221 oi = make_memop_idx(op, mmu_idx);
2222 store_helper(env, addr, val, oi, retaddr, op);
2223
2224 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2225 }
2226
2227 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2228 int mmu_idx, uintptr_t retaddr)
2229 {
2230 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
2231 }
2232
2233 void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2234 int mmu_idx, uintptr_t retaddr)
2235 {
2236 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
2237 }
2238
2239 void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2240 int mmu_idx, uintptr_t retaddr)
2241 {
2242 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
2243 }
2244
2245 void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2246 int mmu_idx, uintptr_t retaddr)
2247 {
2248 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
2249 }
2250
2251 void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2252 int mmu_idx, uintptr_t retaddr)
2253 {
2254 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
2255 }
2256
2257 void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2258 int mmu_idx, uintptr_t retaddr)
2259 {
2260 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
2261 }
2262
2263 void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2264 int mmu_idx, uintptr_t retaddr)
2265 {
2266 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
2267 }
2268
2269 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
2270 uint32_t val, uintptr_t retaddr)
2271 {
2272 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2273 }
2274
2275 void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
2276 uint32_t val, uintptr_t retaddr)
2277 {
2278 cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2279 }
2280
2281 void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
2282 uint32_t val, uintptr_t retaddr)
2283 {
2284 cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2285 }
2286
2287 void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
2288 uint64_t val, uintptr_t retaddr)
2289 {
2290 cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2291 }
2292
2293 void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
2294 uint32_t val, uintptr_t retaddr)
2295 {
2296 cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2297 }
2298
2299 void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
2300 uint32_t val, uintptr_t retaddr)
2301 {
2302 cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2303 }
2304
2305 void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
2306 uint64_t val, uintptr_t retaddr)
2307 {
2308 cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2309 }
2310
2311 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2312 {
2313 cpu_stb_data_ra(env, ptr, val, 0);
2314 }
2315
2316 void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2317 {
2318 cpu_stw_be_data_ra(env, ptr, val, 0);
2319 }
2320
2321 void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2322 {
2323 cpu_stl_be_data_ra(env, ptr, val, 0);
2324 }
2325
2326 void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2327 {
2328 cpu_stq_be_data_ra(env, ptr, val, 0);
2329 }
2330
2331 void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2332 {
2333 cpu_stw_le_data_ra(env, ptr, val, 0);
2334 }
2335
2336 void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2337 {
2338 cpu_stl_le_data_ra(env, ptr, val, 0);
2339 }
2340
2341 void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2342 {
2343 cpu_stq_le_data_ra(env, ptr, val, 0);
2344 }
2345
2346 /* First set of helpers allows passing in of OI and RETADDR. This makes
2347 them callable from other helpers. */
2348
2349 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
2350 #define ATOMIC_NAME(X) \
2351 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
2352 #define ATOMIC_MMU_DECLS
2353 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
2354 #define ATOMIC_MMU_CLEANUP
2355 #define ATOMIC_MMU_IDX get_mmuidx(oi)
2356
2357 #include "atomic_common.c.inc"
2358
2359 #define DATA_SIZE 1
2360 #include "atomic_template.h"
2361
2362 #define DATA_SIZE 2
2363 #include "atomic_template.h"
2364
2365 #define DATA_SIZE 4
2366 #include "atomic_template.h"
2367
2368 #ifdef CONFIG_ATOMIC64
2369 #define DATA_SIZE 8
2370 #include "atomic_template.h"
2371 #endif
2372
2373 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
2374 #define DATA_SIZE 16
2375 #include "atomic_template.h"
2376 #endif
2377
2378 /* Second set of helpers are directly callable from TCG as helpers. */
2379
2380 #undef EXTRA_ARGS
2381 #undef ATOMIC_NAME
2382 #undef ATOMIC_MMU_LOOKUP
2383 #define EXTRA_ARGS , TCGMemOpIdx oi
2384 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
2385 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC())
2386
2387 #define DATA_SIZE 1
2388 #include "atomic_template.h"
2389
2390 #define DATA_SIZE 2
2391 #include "atomic_template.h"
2392
2393 #define DATA_SIZE 4
2394 #include "atomic_template.h"
2395
2396 #ifdef CONFIG_ATOMIC64
2397 #define DATA_SIZE 8
2398 #include "atomic_template.h"
2399 #endif
2400 #undef ATOMIC_MMU_IDX
2401
2402 /* Code access functions. */
2403
2404 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
2405 TCGMemOpIdx oi, uintptr_t retaddr)
2406 {
2407 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
2408 }
2409
2410 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
2411 {
2412 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
2413 return full_ldub_code(env, addr, oi, 0);
2414 }
2415
2416 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
2417 TCGMemOpIdx oi, uintptr_t retaddr)
2418 {
2419 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
2420 }
2421
2422 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
2423 {
2424 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
2425 return full_lduw_code(env, addr, oi, 0);
2426 }
2427
2428 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
2429 TCGMemOpIdx oi, uintptr_t retaddr)
2430 {
2431 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
2432 }
2433
2434 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
2435 {
2436 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
2437 return full_ldl_code(env, addr, oi, 0);
2438 }
2439
2440 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
2441 TCGMemOpIdx oi, uintptr_t retaddr)
2442 {
2443 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
2444 }
2445
2446 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
2447 {
2448 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
2449 return full_ldq_code(env, addr, oi, 0);
2450 }