Merge remote-tracking branch 'remotes/marcel/tags/rdma-pull-request' into staging
[qemu.git] / accel / tcg / cputlb.c
1 /*
2 * Common CPU TLB handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "cpu.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
30 #include "tcg/tcg.h"
31 #include "qemu/error-report.h"
32 #include "exec/log.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "translate-all.h"
37 #include "trace-root.h"
38 #include "trace/mem.h"
39 #ifdef CONFIG_PLUGIN
40 #include "qemu/plugin-memory.h"
41 #endif
42
43 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
44 /* #define DEBUG_TLB */
45 /* #define DEBUG_TLB_LOG */
46
47 #ifdef DEBUG_TLB
48 # define DEBUG_TLB_GATE 1
49 # ifdef DEBUG_TLB_LOG
50 # define DEBUG_TLB_LOG_GATE 1
51 # else
52 # define DEBUG_TLB_LOG_GATE 0
53 # endif
54 #else
55 # define DEBUG_TLB_GATE 0
56 # define DEBUG_TLB_LOG_GATE 0
57 #endif
58
59 #define tlb_debug(fmt, ...) do { \
60 if (DEBUG_TLB_LOG_GATE) { \
61 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
62 ## __VA_ARGS__); \
63 } else if (DEBUG_TLB_GATE) { \
64 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
65 } \
66 } while (0)
67
68 #define assert_cpu_is_self(cpu) do { \
69 if (DEBUG_TLB_GATE) { \
70 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \
71 } \
72 } while (0)
73
74 /* run_on_cpu_data.target_ptr should always be big enough for a
75 * target_ulong even on 32 bit builds */
76 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
77
78 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
79 */
80 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
81 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
82
83 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
84 {
85 return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
86 }
87
88 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
89 {
90 return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
91 }
92
93 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
94 size_t max_entries)
95 {
96 desc->window_begin_ns = ns;
97 desc->window_max_entries = max_entries;
98 }
99
100 /**
101 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
102 * @desc: The CPUTLBDesc portion of the TLB
103 * @fast: The CPUTLBDescFast portion of the same TLB
104 *
105 * Called with tlb_lock_held.
106 *
107 * We have two main constraints when resizing a TLB: (1) we only resize it
108 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
109 * the array or unnecessarily flushing it), which means we do not control how
110 * frequently the resizing can occur; (2) we don't have access to the guest's
111 * future scheduling decisions, and therefore have to decide the magnitude of
112 * the resize based on past observations.
113 *
114 * In general, a memory-hungry process can benefit greatly from an appropriately
115 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
116 * we just have to make the TLB as large as possible; while an oversized TLB
117 * results in minimal TLB miss rates, it also takes longer to be flushed
118 * (flushes can be _very_ frequent), and the reduced locality can also hurt
119 * performance.
120 *
121 * To achieve near-optimal performance for all kinds of workloads, we:
122 *
123 * 1. Aggressively increase the size of the TLB when the use rate of the
124 * TLB being flushed is high, since it is likely that in the near future this
125 * memory-hungry process will execute again, and its memory hungriness will
126 * probably be similar.
127 *
128 * 2. Slowly reduce the size of the TLB as the use rate declines over a
129 * reasonably large time window. The rationale is that if in such a time window
130 * we have not observed a high TLB use rate, it is likely that we won't observe
131 * it in the near future. In that case, once a time window expires we downsize
132 * the TLB to match the maximum use rate observed in the window.
133 *
134 * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
135 * since in that range performance is likely near-optimal. Recall that the TLB
136 * is direct mapped, so we want the use rate to be low (or at least not too
137 * high), since otherwise we are likely to have a significant amount of
138 * conflict misses.
139 */
140 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
141 int64_t now)
142 {
143 size_t old_size = tlb_n_entries(fast);
144 size_t rate;
145 size_t new_size = old_size;
146 int64_t window_len_ms = 100;
147 int64_t window_len_ns = window_len_ms * 1000 * 1000;
148 bool window_expired = now > desc->window_begin_ns + window_len_ns;
149
150 if (desc->n_used_entries > desc->window_max_entries) {
151 desc->window_max_entries = desc->n_used_entries;
152 }
153 rate = desc->window_max_entries * 100 / old_size;
154
155 if (rate > 70) {
156 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
157 } else if (rate < 30 && window_expired) {
158 size_t ceil = pow2ceil(desc->window_max_entries);
159 size_t expected_rate = desc->window_max_entries * 100 / ceil;
160
161 /*
162 * Avoid undersizing when the max number of entries seen is just below
163 * a pow2. For instance, if max_entries == 1025, the expected use rate
164 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
165 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
166 * later. Thus, make sure that the expected use rate remains below 70%.
167 * (and since we double the size, that means the lowest rate we'd
168 * expect to get is 35%, which is still in the 30-70% range where
169 * we consider that the size is appropriate.)
170 */
171 if (expected_rate > 70) {
172 ceil *= 2;
173 }
174 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
175 }
176
177 if (new_size == old_size) {
178 if (window_expired) {
179 tlb_window_reset(desc, now, desc->n_used_entries);
180 }
181 return;
182 }
183
184 g_free(fast->table);
185 g_free(desc->iotlb);
186
187 tlb_window_reset(desc, now, 0);
188 /* desc->n_used_entries is cleared by the caller */
189 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
190 fast->table = g_try_new(CPUTLBEntry, new_size);
191 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
192
193 /*
194 * If the allocations fail, try smaller sizes. We just freed some
195 * memory, so going back to half of new_size has a good chance of working.
196 * Increased memory pressure elsewhere in the system might cause the
197 * allocations to fail though, so we progressively reduce the allocation
198 * size, aborting if we cannot even allocate the smallest TLB we support.
199 */
200 while (fast->table == NULL || desc->iotlb == NULL) {
201 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
202 error_report("%s: %s", __func__, strerror(errno));
203 abort();
204 }
205 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
206 fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
207
208 g_free(fast->table);
209 g_free(desc->iotlb);
210 fast->table = g_try_new(CPUTLBEntry, new_size);
211 desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
212 }
213 }
214
215 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
216 {
217 desc->n_used_entries = 0;
218 desc->large_page_addr = -1;
219 desc->large_page_mask = -1;
220 desc->vindex = 0;
221 memset(fast->table, -1, sizeof_tlb(fast));
222 memset(desc->vtable, -1, sizeof(desc->vtable));
223 }
224
225 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
226 int64_t now)
227 {
228 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
229 CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
230
231 tlb_mmu_resize_locked(desc, fast, now);
232 tlb_mmu_flush_locked(desc, fast);
233 }
234
235 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
236 {
237 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
238
239 tlb_window_reset(desc, now, 0);
240 desc->n_used_entries = 0;
241 fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
242 fast->table = g_new(CPUTLBEntry, n_entries);
243 desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
244 tlb_mmu_flush_locked(desc, fast);
245 }
246
247 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
248 {
249 env_tlb(env)->d[mmu_idx].n_used_entries++;
250 }
251
252 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
253 {
254 env_tlb(env)->d[mmu_idx].n_used_entries--;
255 }
256
257 void tlb_init(CPUState *cpu)
258 {
259 CPUArchState *env = cpu->env_ptr;
260 int64_t now = get_clock_realtime();
261 int i;
262
263 qemu_spin_init(&env_tlb(env)->c.lock);
264
265 /* All tlbs are initialized flushed. */
266 env_tlb(env)->c.dirty = 0;
267
268 for (i = 0; i < NB_MMU_MODES; i++) {
269 tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
270 }
271 }
272
273 /* flush_all_helper: run fn across all cpus
274 *
275 * If the wait flag is set then the src cpu's helper will be queued as
276 * "safe" work and the loop exited creating a synchronisation point
277 * where all queued work will be finished before execution starts
278 * again.
279 */
280 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
281 run_on_cpu_data d)
282 {
283 CPUState *cpu;
284
285 CPU_FOREACH(cpu) {
286 if (cpu != src) {
287 async_run_on_cpu(cpu, fn, d);
288 }
289 }
290 }
291
292 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
293 {
294 CPUState *cpu;
295 size_t full = 0, part = 0, elide = 0;
296
297 CPU_FOREACH(cpu) {
298 CPUArchState *env = cpu->env_ptr;
299
300 full += atomic_read(&env_tlb(env)->c.full_flush_count);
301 part += atomic_read(&env_tlb(env)->c.part_flush_count);
302 elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
303 }
304 *pfull = full;
305 *ppart = part;
306 *pelide = elide;
307 }
308
309 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
310 {
311 CPUArchState *env = cpu->env_ptr;
312 uint16_t asked = data.host_int;
313 uint16_t all_dirty, work, to_clean;
314 int64_t now = get_clock_realtime();
315
316 assert_cpu_is_self(cpu);
317
318 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
319
320 qemu_spin_lock(&env_tlb(env)->c.lock);
321
322 all_dirty = env_tlb(env)->c.dirty;
323 to_clean = asked & all_dirty;
324 all_dirty &= ~to_clean;
325 env_tlb(env)->c.dirty = all_dirty;
326
327 for (work = to_clean; work != 0; work &= work - 1) {
328 int mmu_idx = ctz32(work);
329 tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
330 }
331
332 qemu_spin_unlock(&env_tlb(env)->c.lock);
333
334 cpu_tb_jmp_cache_clear(cpu);
335
336 if (to_clean == ALL_MMUIDX_BITS) {
337 atomic_set(&env_tlb(env)->c.full_flush_count,
338 env_tlb(env)->c.full_flush_count + 1);
339 } else {
340 atomic_set(&env_tlb(env)->c.part_flush_count,
341 env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
342 if (to_clean != asked) {
343 atomic_set(&env_tlb(env)->c.elide_flush_count,
344 env_tlb(env)->c.elide_flush_count +
345 ctpop16(asked & ~to_clean));
346 }
347 }
348 }
349
350 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
351 {
352 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
353
354 if (cpu->created && !qemu_cpu_is_self(cpu)) {
355 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
356 RUN_ON_CPU_HOST_INT(idxmap));
357 } else {
358 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
359 }
360 }
361
362 void tlb_flush(CPUState *cpu)
363 {
364 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
365 }
366
367 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
368 {
369 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
370
371 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
372
373 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
374 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
375 }
376
377 void tlb_flush_all_cpus(CPUState *src_cpu)
378 {
379 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
380 }
381
382 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
383 {
384 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
385
386 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
387
388 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
389 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
390 }
391
392 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
393 {
394 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
395 }
396
397 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
398 target_ulong page)
399 {
400 return tlb_hit_page(tlb_entry->addr_read, page) ||
401 tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
402 tlb_hit_page(tlb_entry->addr_code, page);
403 }
404
405 /**
406 * tlb_entry_is_empty - return true if the entry is not in use
407 * @te: pointer to CPUTLBEntry
408 */
409 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
410 {
411 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
412 }
413
414 /* Called with tlb_c.lock held */
415 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
416 target_ulong page)
417 {
418 if (tlb_hit_page_anyprot(tlb_entry, page)) {
419 memset(tlb_entry, -1, sizeof(*tlb_entry));
420 return true;
421 }
422 return false;
423 }
424
425 /* Called with tlb_c.lock held */
426 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
427 target_ulong page)
428 {
429 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
430 int k;
431
432 assert_cpu_is_self(env_cpu(env));
433 for (k = 0; k < CPU_VTLB_SIZE; k++) {
434 if (tlb_flush_entry_locked(&d->vtable[k], page)) {
435 tlb_n_used_entries_dec(env, mmu_idx);
436 }
437 }
438 }
439
440 static void tlb_flush_page_locked(CPUArchState *env, int midx,
441 target_ulong page)
442 {
443 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
444 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
445
446 /* Check if we need to flush due to large pages. */
447 if ((page & lp_mask) == lp_addr) {
448 tlb_debug("forcing full flush midx %d ("
449 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
450 midx, lp_addr, lp_mask);
451 tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
452 } else {
453 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
454 tlb_n_used_entries_dec(env, midx);
455 }
456 tlb_flush_vtlb_page_locked(env, midx, page);
457 }
458 }
459
460 /**
461 * tlb_flush_page_by_mmuidx_async_0:
462 * @cpu: cpu on which to flush
463 * @addr: page of virtual address to flush
464 * @idxmap: set of mmu_idx to flush
465 *
466 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
467 * at @addr from the tlbs indicated by @idxmap from @cpu.
468 */
469 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
470 target_ulong addr,
471 uint16_t idxmap)
472 {
473 CPUArchState *env = cpu->env_ptr;
474 int mmu_idx;
475
476 assert_cpu_is_self(cpu);
477
478 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
479
480 qemu_spin_lock(&env_tlb(env)->c.lock);
481 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
482 if ((idxmap >> mmu_idx) & 1) {
483 tlb_flush_page_locked(env, mmu_idx, addr);
484 }
485 }
486 qemu_spin_unlock(&env_tlb(env)->c.lock);
487
488 tb_flush_jmp_cache(cpu, addr);
489 }
490
491 /**
492 * tlb_flush_page_by_mmuidx_async_1:
493 * @cpu: cpu on which to flush
494 * @data: encoded addr + idxmap
495 *
496 * Helper for tlb_flush_page_by_mmuidx and friends, called through
497 * async_run_on_cpu. The idxmap parameter is encoded in the page
498 * offset of the target_ptr field. This limits the set of mmu_idx
499 * that can be passed via this method.
500 */
501 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
502 run_on_cpu_data data)
503 {
504 target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
505 target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
506 uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
507
508 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
509 }
510
511 typedef struct {
512 target_ulong addr;
513 uint16_t idxmap;
514 } TLBFlushPageByMMUIdxData;
515
516 /**
517 * tlb_flush_page_by_mmuidx_async_2:
518 * @cpu: cpu on which to flush
519 * @data: allocated addr + idxmap
520 *
521 * Helper for tlb_flush_page_by_mmuidx and friends, called through
522 * async_run_on_cpu. The addr+idxmap parameters are stored in a
523 * TLBFlushPageByMMUIdxData structure that has been allocated
524 * specifically for this helper. Free the structure when done.
525 */
526 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
527 run_on_cpu_data data)
528 {
529 TLBFlushPageByMMUIdxData *d = data.host_ptr;
530
531 tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
532 g_free(d);
533 }
534
535 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
536 {
537 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
538
539 /* This should already be page aligned */
540 addr &= TARGET_PAGE_MASK;
541
542 if (qemu_cpu_is_self(cpu)) {
543 tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
544 } else if (idxmap < TARGET_PAGE_SIZE) {
545 /*
546 * Most targets have only a few mmu_idx. In the case where
547 * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
548 * allocating memory for this operation.
549 */
550 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
551 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
552 } else {
553 TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
554
555 /* Otherwise allocate a structure, freed by the worker. */
556 d->addr = addr;
557 d->idxmap = idxmap;
558 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
559 RUN_ON_CPU_HOST_PTR(d));
560 }
561 }
562
563 void tlb_flush_page(CPUState *cpu, target_ulong addr)
564 {
565 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
566 }
567
568 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
569 uint16_t idxmap)
570 {
571 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
572
573 /* This should already be page aligned */
574 addr &= TARGET_PAGE_MASK;
575
576 /*
577 * Allocate memory to hold addr+idxmap only when needed.
578 * See tlb_flush_page_by_mmuidx for details.
579 */
580 if (idxmap < TARGET_PAGE_SIZE) {
581 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
582 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
583 } else {
584 CPUState *dst_cpu;
585
586 /* Allocate a separate data block for each destination cpu. */
587 CPU_FOREACH(dst_cpu) {
588 if (dst_cpu != src_cpu) {
589 TLBFlushPageByMMUIdxData *d
590 = g_new(TLBFlushPageByMMUIdxData, 1);
591
592 d->addr = addr;
593 d->idxmap = idxmap;
594 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
595 RUN_ON_CPU_HOST_PTR(d));
596 }
597 }
598 }
599
600 tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
601 }
602
603 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
604 {
605 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
606 }
607
608 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
609 target_ulong addr,
610 uint16_t idxmap)
611 {
612 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
613
614 /* This should already be page aligned */
615 addr &= TARGET_PAGE_MASK;
616
617 /*
618 * Allocate memory to hold addr+idxmap only when needed.
619 * See tlb_flush_page_by_mmuidx for details.
620 */
621 if (idxmap < TARGET_PAGE_SIZE) {
622 flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
623 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
624 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
625 RUN_ON_CPU_TARGET_PTR(addr | idxmap));
626 } else {
627 CPUState *dst_cpu;
628 TLBFlushPageByMMUIdxData *d;
629
630 /* Allocate a separate data block for each destination cpu. */
631 CPU_FOREACH(dst_cpu) {
632 if (dst_cpu != src_cpu) {
633 d = g_new(TLBFlushPageByMMUIdxData, 1);
634 d->addr = addr;
635 d->idxmap = idxmap;
636 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
637 RUN_ON_CPU_HOST_PTR(d));
638 }
639 }
640
641 d = g_new(TLBFlushPageByMMUIdxData, 1);
642 d->addr = addr;
643 d->idxmap = idxmap;
644 async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
645 RUN_ON_CPU_HOST_PTR(d));
646 }
647 }
648
649 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
650 {
651 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
652 }
653
654 /* update the TLBs so that writes to code in the virtual page 'addr'
655 can be detected */
656 void tlb_protect_code(ram_addr_t ram_addr)
657 {
658 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
659 DIRTY_MEMORY_CODE);
660 }
661
662 /* update the TLB so that writes in physical page 'phys_addr' are no longer
663 tested for self modifying code */
664 void tlb_unprotect_code(ram_addr_t ram_addr)
665 {
666 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
667 }
668
669
670 /*
671 * Dirty write flag handling
672 *
673 * When the TCG code writes to a location it looks up the address in
674 * the TLB and uses that data to compute the final address. If any of
675 * the lower bits of the address are set then the slow path is forced.
676 * There are a number of reasons to do this but for normal RAM the
677 * most usual is detecting writes to code regions which may invalidate
678 * generated code.
679 *
680 * Other vCPUs might be reading their TLBs during guest execution, so we update
681 * te->addr_write with atomic_set. We don't need to worry about this for
682 * oversized guests as MTTCG is disabled for them.
683 *
684 * Called with tlb_c.lock held.
685 */
686 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
687 uintptr_t start, uintptr_t length)
688 {
689 uintptr_t addr = tlb_entry->addr_write;
690
691 if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
692 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
693 addr &= TARGET_PAGE_MASK;
694 addr += tlb_entry->addend;
695 if ((addr - start) < length) {
696 #if TCG_OVERSIZED_GUEST
697 tlb_entry->addr_write |= TLB_NOTDIRTY;
698 #else
699 atomic_set(&tlb_entry->addr_write,
700 tlb_entry->addr_write | TLB_NOTDIRTY);
701 #endif
702 }
703 }
704 }
705
706 /*
707 * Called with tlb_c.lock held.
708 * Called only from the vCPU context, i.e. the TLB's owner thread.
709 */
710 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
711 {
712 *d = *s;
713 }
714
715 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
716 * the target vCPU).
717 * We must take tlb_c.lock to avoid racing with another vCPU update. The only
718 * thing actually updated is the target TLB entry ->addr_write flags.
719 */
720 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
721 {
722 CPUArchState *env;
723
724 int mmu_idx;
725
726 env = cpu->env_ptr;
727 qemu_spin_lock(&env_tlb(env)->c.lock);
728 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
729 unsigned int i;
730 unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
731
732 for (i = 0; i < n; i++) {
733 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
734 start1, length);
735 }
736
737 for (i = 0; i < CPU_VTLB_SIZE; i++) {
738 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
739 start1, length);
740 }
741 }
742 qemu_spin_unlock(&env_tlb(env)->c.lock);
743 }
744
745 /* Called with tlb_c.lock held */
746 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
747 target_ulong vaddr)
748 {
749 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
750 tlb_entry->addr_write = vaddr;
751 }
752 }
753
754 /* update the TLB corresponding to virtual page vaddr
755 so that it is no longer dirty */
756 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
757 {
758 CPUArchState *env = cpu->env_ptr;
759 int mmu_idx;
760
761 assert_cpu_is_self(cpu);
762
763 vaddr &= TARGET_PAGE_MASK;
764 qemu_spin_lock(&env_tlb(env)->c.lock);
765 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
766 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
767 }
768
769 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
770 int k;
771 for (k = 0; k < CPU_VTLB_SIZE; k++) {
772 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
773 }
774 }
775 qemu_spin_unlock(&env_tlb(env)->c.lock);
776 }
777
778 /* Our TLB does not support large pages, so remember the area covered by
779 large pages and trigger a full TLB flush if these are invalidated. */
780 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
781 target_ulong vaddr, target_ulong size)
782 {
783 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
784 target_ulong lp_mask = ~(size - 1);
785
786 if (lp_addr == (target_ulong)-1) {
787 /* No previous large page. */
788 lp_addr = vaddr;
789 } else {
790 /* Extend the existing region to include the new page.
791 This is a compromise between unnecessary flushes and
792 the cost of maintaining a full variable size TLB. */
793 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
794 while (((lp_addr ^ vaddr) & lp_mask) != 0) {
795 lp_mask <<= 1;
796 }
797 }
798 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
799 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
800 }
801
802 /* Add a new TLB entry. At most one entry for a given virtual address
803 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
804 * supplied size is only used by tlb_flush_page.
805 *
806 * Called from TCG-generated code, which is under an RCU read-side
807 * critical section.
808 */
809 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
810 hwaddr paddr, MemTxAttrs attrs, int prot,
811 int mmu_idx, target_ulong size)
812 {
813 CPUArchState *env = cpu->env_ptr;
814 CPUTLB *tlb = env_tlb(env);
815 CPUTLBDesc *desc = &tlb->d[mmu_idx];
816 MemoryRegionSection *section;
817 unsigned int index;
818 target_ulong address;
819 target_ulong write_address;
820 uintptr_t addend;
821 CPUTLBEntry *te, tn;
822 hwaddr iotlb, xlat, sz, paddr_page;
823 target_ulong vaddr_page;
824 int asidx = cpu_asidx_from_attrs(cpu, attrs);
825 int wp_flags;
826 bool is_ram, is_romd;
827
828 assert_cpu_is_self(cpu);
829
830 if (size <= TARGET_PAGE_SIZE) {
831 sz = TARGET_PAGE_SIZE;
832 } else {
833 tlb_add_large_page(env, mmu_idx, vaddr, size);
834 sz = size;
835 }
836 vaddr_page = vaddr & TARGET_PAGE_MASK;
837 paddr_page = paddr & TARGET_PAGE_MASK;
838
839 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
840 &xlat, &sz, attrs, &prot);
841 assert(sz >= TARGET_PAGE_SIZE);
842
843 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
844 " prot=%x idx=%d\n",
845 vaddr, paddr, prot, mmu_idx);
846
847 address = vaddr_page;
848 if (size < TARGET_PAGE_SIZE) {
849 /* Repeat the MMU check and TLB fill on every access. */
850 address |= TLB_INVALID_MASK;
851 }
852 if (attrs.byte_swap) {
853 address |= TLB_BSWAP;
854 }
855
856 is_ram = memory_region_is_ram(section->mr);
857 is_romd = memory_region_is_romd(section->mr);
858
859 if (is_ram || is_romd) {
860 /* RAM and ROMD both have associated host memory. */
861 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
862 } else {
863 /* I/O does not; force the host address to NULL. */
864 addend = 0;
865 }
866
867 write_address = address;
868 if (is_ram) {
869 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
870 /*
871 * Computing is_clean is expensive; avoid all that unless
872 * the page is actually writable.
873 */
874 if (prot & PAGE_WRITE) {
875 if (section->readonly) {
876 write_address |= TLB_DISCARD_WRITE;
877 } else if (cpu_physical_memory_is_clean(iotlb)) {
878 write_address |= TLB_NOTDIRTY;
879 }
880 }
881 } else {
882 /* I/O or ROMD */
883 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
884 /*
885 * Writes to romd devices must go through MMIO to enable write.
886 * Reads to romd devices go through the ram_ptr found above,
887 * but of course reads to I/O must go through MMIO.
888 */
889 write_address |= TLB_MMIO;
890 if (!is_romd) {
891 address = write_address;
892 }
893 }
894
895 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
896 TARGET_PAGE_SIZE);
897
898 index = tlb_index(env, mmu_idx, vaddr_page);
899 te = tlb_entry(env, mmu_idx, vaddr_page);
900
901 /*
902 * Hold the TLB lock for the rest of the function. We could acquire/release
903 * the lock several times in the function, but it is faster to amortize the
904 * acquisition cost by acquiring it just once. Note that this leads to
905 * a longer critical section, but this is not a concern since the TLB lock
906 * is unlikely to be contended.
907 */
908 qemu_spin_lock(&tlb->c.lock);
909
910 /* Note that the tlb is no longer clean. */
911 tlb->c.dirty |= 1 << mmu_idx;
912
913 /* Make sure there's no cached translation for the new page. */
914 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
915
916 /*
917 * Only evict the old entry to the victim tlb if it's for a
918 * different page; otherwise just overwrite the stale data.
919 */
920 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
921 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
922 CPUTLBEntry *tv = &desc->vtable[vidx];
923
924 /* Evict the old entry into the victim tlb. */
925 copy_tlb_helper_locked(tv, te);
926 desc->viotlb[vidx] = desc->iotlb[index];
927 tlb_n_used_entries_dec(env, mmu_idx);
928 }
929
930 /* refill the tlb */
931 /*
932 * At this point iotlb contains a physical section number in the lower
933 * TARGET_PAGE_BITS, and either
934 * + the ram_addr_t of the page base of the target RAM (RAM)
935 * + the offset within section->mr of the page base (I/O, ROMD)
936 * We subtract the vaddr_page (which is page aligned and thus won't
937 * disturb the low bits) to give an offset which can be added to the
938 * (non-page-aligned) vaddr of the eventual memory access to get
939 * the MemoryRegion offset for the access. Note that the vaddr we
940 * subtract here is that of the page base, and not the same as the
941 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
942 */
943 desc->iotlb[index].addr = iotlb - vaddr_page;
944 desc->iotlb[index].attrs = attrs;
945
946 /* Now calculate the new entry */
947 tn.addend = addend - vaddr_page;
948 if (prot & PAGE_READ) {
949 tn.addr_read = address;
950 if (wp_flags & BP_MEM_READ) {
951 tn.addr_read |= TLB_WATCHPOINT;
952 }
953 } else {
954 tn.addr_read = -1;
955 }
956
957 if (prot & PAGE_EXEC) {
958 tn.addr_code = address;
959 } else {
960 tn.addr_code = -1;
961 }
962
963 tn.addr_write = -1;
964 if (prot & PAGE_WRITE) {
965 tn.addr_write = write_address;
966 if (prot & PAGE_WRITE_INV) {
967 tn.addr_write |= TLB_INVALID_MASK;
968 }
969 if (wp_flags & BP_MEM_WRITE) {
970 tn.addr_write |= TLB_WATCHPOINT;
971 }
972 }
973
974 copy_tlb_helper_locked(te, &tn);
975 tlb_n_used_entries_inc(env, mmu_idx);
976 qemu_spin_unlock(&tlb->c.lock);
977 }
978
979 /* Add a new TLB entry, but without specifying the memory
980 * transaction attributes to be used.
981 */
982 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
983 hwaddr paddr, int prot,
984 int mmu_idx, target_ulong size)
985 {
986 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
987 prot, mmu_idx, size);
988 }
989
990 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
991 {
992 ram_addr_t ram_addr;
993
994 ram_addr = qemu_ram_addr_from_host(ptr);
995 if (ram_addr == RAM_ADDR_INVALID) {
996 error_report("Bad ram pointer %p", ptr);
997 abort();
998 }
999 return ram_addr;
1000 }
1001
1002 /*
1003 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
1004 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
1005 * be discarded and looked up again (e.g. via tlb_entry()).
1006 */
1007 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
1008 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1009 {
1010 CPUClass *cc = CPU_GET_CLASS(cpu);
1011 bool ok;
1012
1013 /*
1014 * This is not a probe, so only valid return is success; failure
1015 * should result in exception + longjmp to the cpu loop.
1016 */
1017 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
1018 assert(ok);
1019 }
1020
1021 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1022 int mmu_idx, target_ulong addr, uintptr_t retaddr,
1023 MMUAccessType access_type, MemOp op)
1024 {
1025 CPUState *cpu = env_cpu(env);
1026 hwaddr mr_offset;
1027 MemoryRegionSection *section;
1028 MemoryRegion *mr;
1029 uint64_t val;
1030 bool locked = false;
1031 MemTxResult r;
1032
1033 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1034 mr = section->mr;
1035 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1036 cpu->mem_io_pc = retaddr;
1037 if (!cpu->can_do_io) {
1038 cpu_io_recompile(cpu, retaddr);
1039 }
1040
1041 if (mr->global_locking && !qemu_mutex_iothread_locked()) {
1042 qemu_mutex_lock_iothread();
1043 locked = true;
1044 }
1045 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
1046 if (r != MEMTX_OK) {
1047 hwaddr physaddr = mr_offset +
1048 section->offset_within_address_space -
1049 section->offset_within_region;
1050
1051 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
1052 mmu_idx, iotlbentry->attrs, r, retaddr);
1053 }
1054 if (locked) {
1055 qemu_mutex_unlock_iothread();
1056 }
1057
1058 return val;
1059 }
1060
1061 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
1062 int mmu_idx, uint64_t val, target_ulong addr,
1063 uintptr_t retaddr, MemOp op)
1064 {
1065 CPUState *cpu = env_cpu(env);
1066 hwaddr mr_offset;
1067 MemoryRegionSection *section;
1068 MemoryRegion *mr;
1069 bool locked = false;
1070 MemTxResult r;
1071
1072 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1073 mr = section->mr;
1074 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1075 if (!cpu->can_do_io) {
1076 cpu_io_recompile(cpu, retaddr);
1077 }
1078 cpu->mem_io_pc = retaddr;
1079
1080 if (mr->global_locking && !qemu_mutex_iothread_locked()) {
1081 qemu_mutex_lock_iothread();
1082 locked = true;
1083 }
1084 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
1085 if (r != MEMTX_OK) {
1086 hwaddr physaddr = mr_offset +
1087 section->offset_within_address_space -
1088 section->offset_within_region;
1089
1090 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
1091 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
1092 retaddr);
1093 }
1094 if (locked) {
1095 qemu_mutex_unlock_iothread();
1096 }
1097 }
1098
1099 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
1100 {
1101 #if TCG_OVERSIZED_GUEST
1102 return *(target_ulong *)((uintptr_t)entry + ofs);
1103 #else
1104 /* ofs might correspond to .addr_write, so use atomic_read */
1105 return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
1106 #endif
1107 }
1108
1109 /* Return true if ADDR is present in the victim tlb, and has been copied
1110 back to the main tlb. */
1111 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1112 size_t elt_ofs, target_ulong page)
1113 {
1114 size_t vidx;
1115
1116 assert_cpu_is_self(env_cpu(env));
1117 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1118 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1119 target_ulong cmp;
1120
1121 /* elt_ofs might correspond to .addr_write, so use atomic_read */
1122 #if TCG_OVERSIZED_GUEST
1123 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1124 #else
1125 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1126 #endif
1127
1128 if (cmp == page) {
1129 /* Found entry in victim tlb, swap tlb and iotlb. */
1130 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1131
1132 qemu_spin_lock(&env_tlb(env)->c.lock);
1133 copy_tlb_helper_locked(&tmptlb, tlb);
1134 copy_tlb_helper_locked(tlb, vtlb);
1135 copy_tlb_helper_locked(vtlb, &tmptlb);
1136 qemu_spin_unlock(&env_tlb(env)->c.lock);
1137
1138 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1139 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1140 tmpio = *io; *io = *vio; *vio = tmpio;
1141 return true;
1142 }
1143 }
1144 return false;
1145 }
1146
1147 /* Macro to call the above, with local variables from the use context. */
1148 #define VICTIM_TLB_HIT(TY, ADDR) \
1149 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1150 (ADDR) & TARGET_PAGE_MASK)
1151
1152 /*
1153 * Return a ram_addr_t for the virtual address for execution.
1154 *
1155 * Return -1 if we can't translate and execute from an entire page
1156 * of RAM. This will force us to execute by loading and translating
1157 * one insn at a time, without caching.
1158 *
1159 * NOTE: This function will trigger an exception if the page is
1160 * not executable.
1161 */
1162 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1163 void **hostp)
1164 {
1165 uintptr_t mmu_idx = cpu_mmu_index(env, true);
1166 uintptr_t index = tlb_index(env, mmu_idx, addr);
1167 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1168 void *p;
1169
1170 if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1171 if (!VICTIM_TLB_HIT(addr_code, addr)) {
1172 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1173 index = tlb_index(env, mmu_idx, addr);
1174 entry = tlb_entry(env, mmu_idx, addr);
1175
1176 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1177 /*
1178 * The MMU protection covers a smaller range than a target
1179 * page, so we must redo the MMU check for every insn.
1180 */
1181 return -1;
1182 }
1183 }
1184 assert(tlb_hit(entry->addr_code, addr));
1185 }
1186
1187 if (unlikely(entry->addr_code & TLB_MMIO)) {
1188 /* The region is not backed by RAM. */
1189 if (hostp) {
1190 *hostp = NULL;
1191 }
1192 return -1;
1193 }
1194
1195 p = (void *)((uintptr_t)addr + entry->addend);
1196 if (hostp) {
1197 *hostp = p;
1198 }
1199 return qemu_ram_addr_from_host_nofail(p);
1200 }
1201
1202 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1203 {
1204 return get_page_addr_code_hostp(env, addr, NULL);
1205 }
1206
1207 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1208 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1209 {
1210 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1211
1212 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1213
1214 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1215 struct page_collection *pages
1216 = page_collection_lock(ram_addr, ram_addr + size);
1217 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1218 page_collection_unlock(pages);
1219 }
1220
1221 /*
1222 * Set both VGA and migration bits for simplicity and to remove
1223 * the notdirty callback faster.
1224 */
1225 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1226
1227 /* We remove the notdirty callback only if the code has been flushed. */
1228 if (!cpu_physical_memory_is_clean(ram_addr)) {
1229 trace_memory_notdirty_set_dirty(mem_vaddr);
1230 tlb_set_dirty(cpu, mem_vaddr);
1231 }
1232 }
1233
1234 /*
1235 * Probe for whether the specified guest access is permitted. If it is not
1236 * permitted then an exception will be taken in the same way as if this
1237 * were a real access (and we will not return).
1238 * If the size is 0 or the page requires I/O access, returns NULL; otherwise,
1239 * returns the address of the host page similar to tlb_vaddr_to_host().
1240 */
1241 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1242 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1243 {
1244 uintptr_t index = tlb_index(env, mmu_idx, addr);
1245 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1246 target_ulong tlb_addr;
1247 size_t elt_ofs;
1248 int wp_access;
1249
1250 g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1251
1252 switch (access_type) {
1253 case MMU_DATA_LOAD:
1254 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1255 wp_access = BP_MEM_READ;
1256 break;
1257 case MMU_DATA_STORE:
1258 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1259 wp_access = BP_MEM_WRITE;
1260 break;
1261 case MMU_INST_FETCH:
1262 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1263 wp_access = BP_MEM_READ;
1264 break;
1265 default:
1266 g_assert_not_reached();
1267 }
1268 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1269
1270 if (unlikely(!tlb_hit(tlb_addr, addr))) {
1271 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,
1272 addr & TARGET_PAGE_MASK)) {
1273 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr);
1274 /* TLB resize via tlb_fill may have moved the entry. */
1275 index = tlb_index(env, mmu_idx, addr);
1276 entry = tlb_entry(env, mmu_idx, addr);
1277 }
1278 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1279 }
1280
1281 if (!size) {
1282 return NULL;
1283 }
1284
1285 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) {
1286 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1287
1288 /* Reject I/O access, or other required slow-path. */
1289 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) {
1290 return NULL;
1291 }
1292
1293 /* Handle watchpoints. */
1294 if (tlb_addr & TLB_WATCHPOINT) {
1295 cpu_check_watchpoint(env_cpu(env), addr, size,
1296 iotlbentry->attrs, wp_access, retaddr);
1297 }
1298
1299 /* Handle clean RAM pages. */
1300 if (tlb_addr & TLB_NOTDIRTY) {
1301 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1302 }
1303 }
1304
1305 return (void *)((uintptr_t)addr + entry->addend);
1306 }
1307
1308 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1309 MMUAccessType access_type, int mmu_idx)
1310 {
1311 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1312 target_ulong tlb_addr, page;
1313 size_t elt_ofs;
1314
1315 switch (access_type) {
1316 case MMU_DATA_LOAD:
1317 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1318 break;
1319 case MMU_DATA_STORE:
1320 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1321 break;
1322 case MMU_INST_FETCH:
1323 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1324 break;
1325 default:
1326 g_assert_not_reached();
1327 }
1328
1329 page = addr & TARGET_PAGE_MASK;
1330 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1331
1332 if (!tlb_hit_page(tlb_addr, page)) {
1333 uintptr_t index = tlb_index(env, mmu_idx, addr);
1334
1335 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {
1336 CPUState *cs = env_cpu(env);
1337 CPUClass *cc = CPU_GET_CLASS(cs);
1338
1339 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) {
1340 /* Non-faulting page table read failed. */
1341 return NULL;
1342 }
1343
1344 /* TLB resize via tlb_fill may have moved the entry. */
1345 entry = tlb_entry(env, mmu_idx, addr);
1346 }
1347 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1348 }
1349
1350 if (tlb_addr & ~TARGET_PAGE_MASK) {
1351 /* IO access */
1352 return NULL;
1353 }
1354
1355 return (void *)((uintptr_t)addr + entry->addend);
1356 }
1357
1358
1359 #ifdef CONFIG_PLUGIN
1360 /*
1361 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1362 * This should be a hot path as we will have just looked this path up
1363 * in the softmmu lookup code (or helper). We don't handle re-fills or
1364 * checking the victim table. This is purely informational.
1365 *
1366 * This should never fail as the memory access being instrumented
1367 * should have just filled the TLB.
1368 */
1369
1370 bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
1371 bool is_store, struct qemu_plugin_hwaddr *data)
1372 {
1373 CPUArchState *env = cpu->env_ptr;
1374 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1375 uintptr_t index = tlb_index(env, mmu_idx, addr);
1376 target_ulong tlb_addr = is_store ? tlb_addr_write(tlbe) : tlbe->addr_read;
1377
1378 if (likely(tlb_hit(tlb_addr, addr))) {
1379 /* We must have an iotlb entry for MMIO */
1380 if (tlb_addr & TLB_MMIO) {
1381 CPUIOTLBEntry *iotlbentry;
1382 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1383 data->is_io = true;
1384 data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
1385 data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
1386 } else {
1387 data->is_io = false;
1388 data->v.ram.hostaddr = addr + tlbe->addend;
1389 }
1390 return true;
1391 }
1392 return false;
1393 }
1394
1395 #endif
1396
1397 /* Probe for a read-modify-write atomic operation. Do not allow unaligned
1398 * operations, or io operations to proceed. Return the host address. */
1399 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1400 TCGMemOpIdx oi, uintptr_t retaddr)
1401 {
1402 size_t mmu_idx = get_mmuidx(oi);
1403 uintptr_t index = tlb_index(env, mmu_idx, addr);
1404 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1405 target_ulong tlb_addr = tlb_addr_write(tlbe);
1406 MemOp mop = get_memop(oi);
1407 int a_bits = get_alignment_bits(mop);
1408 int s_bits = mop & MO_SIZE;
1409 void *hostaddr;
1410
1411 /* Adjust the given return address. */
1412 retaddr -= GETPC_ADJ;
1413
1414 /* Enforce guest required alignment. */
1415 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1416 /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1417 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1418 mmu_idx, retaddr);
1419 }
1420
1421 /* Enforce qemu required alignment. */
1422 if (unlikely(addr & ((1 << s_bits) - 1))) {
1423 /* We get here if guest alignment was not requested,
1424 or was not enforced by cpu_unaligned_access above.
1425 We might widen the access and emulate, but for now
1426 mark an exception and exit the cpu loop. */
1427 goto stop_the_world;
1428 }
1429
1430 /* Check TLB entry and enforce page permissions. */
1431 if (!tlb_hit(tlb_addr, addr)) {
1432 if (!VICTIM_TLB_HIT(addr_write, addr)) {
1433 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1434 mmu_idx, retaddr);
1435 index = tlb_index(env, mmu_idx, addr);
1436 tlbe = tlb_entry(env, mmu_idx, addr);
1437 }
1438 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1439 }
1440
1441 /* Notice an IO access or a needs-MMU-lookup access */
1442 if (unlikely(tlb_addr & TLB_MMIO)) {
1443 /* There's really nothing that can be done to
1444 support this apart from stop-the-world. */
1445 goto stop_the_world;
1446 }
1447
1448 /* Let the guest notice RMW on a write-only page. */
1449 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1450 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1451 mmu_idx, retaddr);
1452 /* Since we don't support reads and writes to different addresses,
1453 and we do have the proper page loaded for write, this shouldn't
1454 ever return. But just in case, handle via stop-the-world. */
1455 goto stop_the_world;
1456 }
1457
1458 hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1459
1460 if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1461 notdirty_write(env_cpu(env), addr, 1 << s_bits,
1462 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1463 }
1464
1465 return hostaddr;
1466
1467 stop_the_world:
1468 cpu_loop_exit_atomic(env_cpu(env), retaddr);
1469 }
1470
1471 /*
1472 * Load Helpers
1473 *
1474 * We support two different access types. SOFTMMU_CODE_ACCESS is
1475 * specifically for reading instructions from system memory. It is
1476 * called by the translation loop and in some helpers where the code
1477 * is disassembled. It shouldn't be called directly by guest code.
1478 */
1479
1480 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1481 TCGMemOpIdx oi, uintptr_t retaddr);
1482
1483 static inline uint64_t QEMU_ALWAYS_INLINE
1484 load_memop(const void *haddr, MemOp op)
1485 {
1486 switch (op) {
1487 case MO_UB:
1488 return ldub_p(haddr);
1489 case MO_BEUW:
1490 return lduw_be_p(haddr);
1491 case MO_LEUW:
1492 return lduw_le_p(haddr);
1493 case MO_BEUL:
1494 return (uint32_t)ldl_be_p(haddr);
1495 case MO_LEUL:
1496 return (uint32_t)ldl_le_p(haddr);
1497 case MO_BEQ:
1498 return ldq_be_p(haddr);
1499 case MO_LEQ:
1500 return ldq_le_p(haddr);
1501 default:
1502 qemu_build_not_reached();
1503 }
1504 }
1505
1506 static inline uint64_t QEMU_ALWAYS_INLINE
1507 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1508 uintptr_t retaddr, MemOp op, bool code_read,
1509 FullLoadHelper *full_load)
1510 {
1511 uintptr_t mmu_idx = get_mmuidx(oi);
1512 uintptr_t index = tlb_index(env, mmu_idx, addr);
1513 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1514 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1515 const size_t tlb_off = code_read ?
1516 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1517 const MMUAccessType access_type =
1518 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1519 unsigned a_bits = get_alignment_bits(get_memop(oi));
1520 void *haddr;
1521 uint64_t res;
1522 size_t size = memop_size(op);
1523
1524 /* Handle CPU specific unaligned behaviour */
1525 if (addr & ((1 << a_bits) - 1)) {
1526 cpu_unaligned_access(env_cpu(env), addr, access_type,
1527 mmu_idx, retaddr);
1528 }
1529
1530 /* If the TLB entry is for a different page, reload and try again. */
1531 if (!tlb_hit(tlb_addr, addr)) {
1532 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1533 addr & TARGET_PAGE_MASK)) {
1534 tlb_fill(env_cpu(env), addr, size,
1535 access_type, mmu_idx, retaddr);
1536 index = tlb_index(env, mmu_idx, addr);
1537 entry = tlb_entry(env, mmu_idx, addr);
1538 }
1539 tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1540 tlb_addr &= ~TLB_INVALID_MASK;
1541 }
1542
1543 /* Handle anything that isn't just a straight memory access. */
1544 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1545 CPUIOTLBEntry *iotlbentry;
1546 bool need_swap;
1547
1548 /* For anything that is unaligned, recurse through full_load. */
1549 if ((addr & (size - 1)) != 0) {
1550 goto do_unaligned_access;
1551 }
1552
1553 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1554
1555 /* Handle watchpoints. */
1556 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1557 /* On watchpoint hit, this will longjmp out. */
1558 cpu_check_watchpoint(env_cpu(env), addr, size,
1559 iotlbentry->attrs, BP_MEM_READ, retaddr);
1560 }
1561
1562 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1563
1564 /* Handle I/O access. */
1565 if (likely(tlb_addr & TLB_MMIO)) {
1566 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1567 access_type, op ^ (need_swap * MO_BSWAP));
1568 }
1569
1570 haddr = (void *)((uintptr_t)addr + entry->addend);
1571
1572 /*
1573 * Keep these two load_memop separate to ensure that the compiler
1574 * is able to fold the entire function to a single instruction.
1575 * There is a build-time assert inside to remind you of this. ;-)
1576 */
1577 if (unlikely(need_swap)) {
1578 return load_memop(haddr, op ^ MO_BSWAP);
1579 }
1580 return load_memop(haddr, op);
1581 }
1582
1583 /* Handle slow unaligned access (it spans two pages or IO). */
1584 if (size > 1
1585 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1586 >= TARGET_PAGE_SIZE)) {
1587 target_ulong addr1, addr2;
1588 uint64_t r1, r2;
1589 unsigned shift;
1590 do_unaligned_access:
1591 addr1 = addr & ~((target_ulong)size - 1);
1592 addr2 = addr1 + size;
1593 r1 = full_load(env, addr1, oi, retaddr);
1594 r2 = full_load(env, addr2, oi, retaddr);
1595 shift = (addr & (size - 1)) * 8;
1596
1597 if (memop_big_endian(op)) {
1598 /* Big-endian combine. */
1599 res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1600 } else {
1601 /* Little-endian combine. */
1602 res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1603 }
1604 return res & MAKE_64BIT_MASK(0, size * 8);
1605 }
1606
1607 haddr = (void *)((uintptr_t)addr + entry->addend);
1608 return load_memop(haddr, op);
1609 }
1610
1611 /*
1612 * For the benefit of TCG generated code, we want to avoid the
1613 * complication of ABI-specific return type promotion and always
1614 * return a value extended to the register size of the host. This is
1615 * tcg_target_long, except in the case of a 32-bit host and 64-bit
1616 * data, and for that we always have uint64_t.
1617 *
1618 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1619 */
1620
1621 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1622 TCGMemOpIdx oi, uintptr_t retaddr)
1623 {
1624 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1625 }
1626
1627 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1628 TCGMemOpIdx oi, uintptr_t retaddr)
1629 {
1630 return full_ldub_mmu(env, addr, oi, retaddr);
1631 }
1632
1633 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1634 TCGMemOpIdx oi, uintptr_t retaddr)
1635 {
1636 return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1637 full_le_lduw_mmu);
1638 }
1639
1640 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1641 TCGMemOpIdx oi, uintptr_t retaddr)
1642 {
1643 return full_le_lduw_mmu(env, addr, oi, retaddr);
1644 }
1645
1646 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1647 TCGMemOpIdx oi, uintptr_t retaddr)
1648 {
1649 return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1650 full_be_lduw_mmu);
1651 }
1652
1653 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1654 TCGMemOpIdx oi, uintptr_t retaddr)
1655 {
1656 return full_be_lduw_mmu(env, addr, oi, retaddr);
1657 }
1658
1659 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1660 TCGMemOpIdx oi, uintptr_t retaddr)
1661 {
1662 return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1663 full_le_ldul_mmu);
1664 }
1665
1666 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1667 TCGMemOpIdx oi, uintptr_t retaddr)
1668 {
1669 return full_le_ldul_mmu(env, addr, oi, retaddr);
1670 }
1671
1672 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1673 TCGMemOpIdx oi, uintptr_t retaddr)
1674 {
1675 return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1676 full_be_ldul_mmu);
1677 }
1678
1679 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1680 TCGMemOpIdx oi, uintptr_t retaddr)
1681 {
1682 return full_be_ldul_mmu(env, addr, oi, retaddr);
1683 }
1684
1685 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1686 TCGMemOpIdx oi, uintptr_t retaddr)
1687 {
1688 return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1689 helper_le_ldq_mmu);
1690 }
1691
1692 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1693 TCGMemOpIdx oi, uintptr_t retaddr)
1694 {
1695 return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1696 helper_be_ldq_mmu);
1697 }
1698
1699 /*
1700 * Provide signed versions of the load routines as well. We can of course
1701 * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1702 */
1703
1704
1705 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1706 TCGMemOpIdx oi, uintptr_t retaddr)
1707 {
1708 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1709 }
1710
1711 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1712 TCGMemOpIdx oi, uintptr_t retaddr)
1713 {
1714 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1715 }
1716
1717 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1718 TCGMemOpIdx oi, uintptr_t retaddr)
1719 {
1720 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1721 }
1722
1723 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1724 TCGMemOpIdx oi, uintptr_t retaddr)
1725 {
1726 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1727 }
1728
1729 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1730 TCGMemOpIdx oi, uintptr_t retaddr)
1731 {
1732 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1733 }
1734
1735 /*
1736 * Load helpers for cpu_ldst.h.
1737 */
1738
1739 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
1740 int mmu_idx, uintptr_t retaddr,
1741 MemOp op, FullLoadHelper *full_load)
1742 {
1743 uint16_t meminfo;
1744 TCGMemOpIdx oi;
1745 uint64_t ret;
1746
1747 meminfo = trace_mem_get_info(op, mmu_idx, false);
1748 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
1749
1750 op &= ~MO_SIGN;
1751 oi = make_memop_idx(op, mmu_idx);
1752 ret = full_load(env, addr, oi, retaddr);
1753
1754 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
1755
1756 return ret;
1757 }
1758
1759 uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1760 int mmu_idx, uintptr_t ra)
1761 {
1762 return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
1763 }
1764
1765 int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1766 int mmu_idx, uintptr_t ra)
1767 {
1768 return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
1769 full_ldub_mmu);
1770 }
1771
1772 uint32_t cpu_lduw_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1773 int mmu_idx, uintptr_t ra)
1774 {
1775 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUW,
1776 MO_TE == MO_LE
1777 ? full_le_lduw_mmu : full_be_lduw_mmu);
1778 }
1779
1780 int cpu_ldsw_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1781 int mmu_idx, uintptr_t ra)
1782 {
1783 return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_TESW,
1784 MO_TE == MO_LE
1785 ? full_le_lduw_mmu : full_be_lduw_mmu);
1786 }
1787
1788 uint32_t cpu_ldl_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1789 int mmu_idx, uintptr_t ra)
1790 {
1791 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEUL,
1792 MO_TE == MO_LE
1793 ? full_le_ldul_mmu : full_be_ldul_mmu);
1794 }
1795
1796 uint64_t cpu_ldq_mmuidx_ra(CPUArchState *env, abi_ptr addr,
1797 int mmu_idx, uintptr_t ra)
1798 {
1799 return cpu_load_helper(env, addr, mmu_idx, ra, MO_TEQ,
1800 MO_TE == MO_LE
1801 ? helper_le_ldq_mmu : helper_be_ldq_mmu);
1802 }
1803
1804 uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
1805 uintptr_t retaddr)
1806 {
1807 return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1808 }
1809
1810 int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1811 {
1812 return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1813 }
1814
1815 uint32_t cpu_lduw_data_ra(CPUArchState *env, target_ulong ptr,
1816 uintptr_t retaddr)
1817 {
1818 return cpu_lduw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1819 }
1820
1821 int cpu_ldsw_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1822 {
1823 return cpu_ldsw_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1824 }
1825
1826 uint32_t cpu_ldl_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1827 {
1828 return cpu_ldl_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1829 }
1830
1831 uint64_t cpu_ldq_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
1832 {
1833 return cpu_ldq_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
1834 }
1835
1836 uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
1837 {
1838 return cpu_ldub_data_ra(env, ptr, 0);
1839 }
1840
1841 int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
1842 {
1843 return cpu_ldsb_data_ra(env, ptr, 0);
1844 }
1845
1846 uint32_t cpu_lduw_data(CPUArchState *env, target_ulong ptr)
1847 {
1848 return cpu_lduw_data_ra(env, ptr, 0);
1849 }
1850
1851 int cpu_ldsw_data(CPUArchState *env, target_ulong ptr)
1852 {
1853 return cpu_ldsw_data_ra(env, ptr, 0);
1854 }
1855
1856 uint32_t cpu_ldl_data(CPUArchState *env, target_ulong ptr)
1857 {
1858 return cpu_ldl_data_ra(env, ptr, 0);
1859 }
1860
1861 uint64_t cpu_ldq_data(CPUArchState *env, target_ulong ptr)
1862 {
1863 return cpu_ldq_data_ra(env, ptr, 0);
1864 }
1865
1866 /*
1867 * Store Helpers
1868 */
1869
1870 static inline void QEMU_ALWAYS_INLINE
1871 store_memop(void *haddr, uint64_t val, MemOp op)
1872 {
1873 switch (op) {
1874 case MO_UB:
1875 stb_p(haddr, val);
1876 break;
1877 case MO_BEUW:
1878 stw_be_p(haddr, val);
1879 break;
1880 case MO_LEUW:
1881 stw_le_p(haddr, val);
1882 break;
1883 case MO_BEUL:
1884 stl_be_p(haddr, val);
1885 break;
1886 case MO_LEUL:
1887 stl_le_p(haddr, val);
1888 break;
1889 case MO_BEQ:
1890 stq_be_p(haddr, val);
1891 break;
1892 case MO_LEQ:
1893 stq_le_p(haddr, val);
1894 break;
1895 default:
1896 qemu_build_not_reached();
1897 }
1898 }
1899
1900 static inline void QEMU_ALWAYS_INLINE
1901 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
1902 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
1903 {
1904 uintptr_t mmu_idx = get_mmuidx(oi);
1905 uintptr_t index = tlb_index(env, mmu_idx, addr);
1906 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1907 target_ulong tlb_addr = tlb_addr_write(entry);
1908 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
1909 unsigned a_bits = get_alignment_bits(get_memop(oi));
1910 void *haddr;
1911 size_t size = memop_size(op);
1912
1913 /* Handle CPU specific unaligned behaviour */
1914 if (addr & ((1 << a_bits) - 1)) {
1915 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1916 mmu_idx, retaddr);
1917 }
1918
1919 /* If the TLB entry is for a different page, reload and try again. */
1920 if (!tlb_hit(tlb_addr, addr)) {
1921 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1922 addr & TARGET_PAGE_MASK)) {
1923 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
1924 mmu_idx, retaddr);
1925 index = tlb_index(env, mmu_idx, addr);
1926 entry = tlb_entry(env, mmu_idx, addr);
1927 }
1928 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
1929 }
1930
1931 /* Handle anything that isn't just a straight memory access. */
1932 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1933 CPUIOTLBEntry *iotlbentry;
1934 bool need_swap;
1935
1936 /* For anything that is unaligned, recurse through byte stores. */
1937 if ((addr & (size - 1)) != 0) {
1938 goto do_unaligned_access;
1939 }
1940
1941 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1942
1943 /* Handle watchpoints. */
1944 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1945 /* On watchpoint hit, this will longjmp out. */
1946 cpu_check_watchpoint(env_cpu(env), addr, size,
1947 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
1948 }
1949
1950 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1951
1952 /* Handle I/O access. */
1953 if (tlb_addr & TLB_MMIO) {
1954 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
1955 op ^ (need_swap * MO_BSWAP));
1956 return;
1957 }
1958
1959 /* Ignore writes to ROM. */
1960 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
1961 return;
1962 }
1963
1964 /* Handle clean RAM pages. */
1965 if (tlb_addr & TLB_NOTDIRTY) {
1966 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1967 }
1968
1969 haddr = (void *)((uintptr_t)addr + entry->addend);
1970
1971 /*
1972 * Keep these two store_memop separate to ensure that the compiler
1973 * is able to fold the entire function to a single instruction.
1974 * There is a build-time assert inside to remind you of this. ;-)
1975 */
1976 if (unlikely(need_swap)) {
1977 store_memop(haddr, val, op ^ MO_BSWAP);
1978 } else {
1979 store_memop(haddr, val, op);
1980 }
1981 return;
1982 }
1983
1984 /* Handle slow unaligned access (it spans two pages or IO). */
1985 if (size > 1
1986 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1987 >= TARGET_PAGE_SIZE)) {
1988 int i;
1989 uintptr_t index2;
1990 CPUTLBEntry *entry2;
1991 target_ulong page2, tlb_addr2;
1992 size_t size2;
1993
1994 do_unaligned_access:
1995 /*
1996 * Ensure the second page is in the TLB. Note that the first page
1997 * is already guaranteed to be filled, and that the second page
1998 * cannot evict the first.
1999 */
2000 page2 = (addr + size) & TARGET_PAGE_MASK;
2001 size2 = (addr + size) & ~TARGET_PAGE_MASK;
2002 index2 = tlb_index(env, mmu_idx, page2);
2003 entry2 = tlb_entry(env, mmu_idx, page2);
2004 tlb_addr2 = tlb_addr_write(entry2);
2005 if (!tlb_hit_page(tlb_addr2, page2)) {
2006 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
2007 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
2008 mmu_idx, retaddr);
2009 index2 = tlb_index(env, mmu_idx, page2);
2010 entry2 = tlb_entry(env, mmu_idx, page2);
2011 }
2012 tlb_addr2 = tlb_addr_write(entry2);
2013 }
2014
2015 /*
2016 * Handle watchpoints. Since this may trap, all checks
2017 * must happen before any store.
2018 */
2019 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
2020 cpu_check_watchpoint(env_cpu(env), addr, size - size2,
2021 env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
2022 BP_MEM_WRITE, retaddr);
2023 }
2024 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
2025 cpu_check_watchpoint(env_cpu(env), page2, size2,
2026 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
2027 BP_MEM_WRITE, retaddr);
2028 }
2029
2030 /*
2031 * XXX: not efficient, but simple.
2032 * This loop must go in the forward direction to avoid issues
2033 * with self-modifying code in Windows 64-bit.
2034 */
2035 for (i = 0; i < size; ++i) {
2036 uint8_t val8;
2037 if (memop_big_endian(op)) {
2038 /* Big-endian extract. */
2039 val8 = val >> (((size - 1) * 8) - (i * 8));
2040 } else {
2041 /* Little-endian extract. */
2042 val8 = val >> (i * 8);
2043 }
2044 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
2045 }
2046 return;
2047 }
2048
2049 haddr = (void *)((uintptr_t)addr + entry->addend);
2050 store_memop(haddr, val, op);
2051 }
2052
2053 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
2054 TCGMemOpIdx oi, uintptr_t retaddr)
2055 {
2056 store_helper(env, addr, val, oi, retaddr, MO_UB);
2057 }
2058
2059 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2060 TCGMemOpIdx oi, uintptr_t retaddr)
2061 {
2062 store_helper(env, addr, val, oi, retaddr, MO_LEUW);
2063 }
2064
2065 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
2066 TCGMemOpIdx oi, uintptr_t retaddr)
2067 {
2068 store_helper(env, addr, val, oi, retaddr, MO_BEUW);
2069 }
2070
2071 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2072 TCGMemOpIdx oi, uintptr_t retaddr)
2073 {
2074 store_helper(env, addr, val, oi, retaddr, MO_LEUL);
2075 }
2076
2077 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
2078 TCGMemOpIdx oi, uintptr_t retaddr)
2079 {
2080 store_helper(env, addr, val, oi, retaddr, MO_BEUL);
2081 }
2082
2083 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2084 TCGMemOpIdx oi, uintptr_t retaddr)
2085 {
2086 store_helper(env, addr, val, oi, retaddr, MO_LEQ);
2087 }
2088
2089 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
2090 TCGMemOpIdx oi, uintptr_t retaddr)
2091 {
2092 store_helper(env, addr, val, oi, retaddr, MO_BEQ);
2093 }
2094
2095 /*
2096 * Store Helpers for cpu_ldst.h
2097 */
2098
2099 static inline void QEMU_ALWAYS_INLINE
2100 cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
2101 int mmu_idx, uintptr_t retaddr, MemOp op)
2102 {
2103 TCGMemOpIdx oi;
2104 uint16_t meminfo;
2105
2106 meminfo = trace_mem_get_info(op, mmu_idx, true);
2107 trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
2108
2109 oi = make_memop_idx(op, mmu_idx);
2110 store_helper(env, addr, val, oi, retaddr, op);
2111
2112 qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
2113 }
2114
2115 void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2116 int mmu_idx, uintptr_t retaddr)
2117 {
2118 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
2119 }
2120
2121 void cpu_stw_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2122 int mmu_idx, uintptr_t retaddr)
2123 {
2124 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUW);
2125 }
2126
2127 void cpu_stl_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
2128 int mmu_idx, uintptr_t retaddr)
2129 {
2130 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEUL);
2131 }
2132
2133 void cpu_stq_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
2134 int mmu_idx, uintptr_t retaddr)
2135 {
2136 cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_TEQ);
2137 }
2138
2139 void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
2140 uint32_t val, uintptr_t retaddr)
2141 {
2142 cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2143 }
2144
2145 void cpu_stw_data_ra(CPUArchState *env, target_ulong ptr,
2146 uint32_t val, uintptr_t retaddr)
2147 {
2148 cpu_stw_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2149 }
2150
2151 void cpu_stl_data_ra(CPUArchState *env, target_ulong ptr,
2152 uint32_t val, uintptr_t retaddr)
2153 {
2154 cpu_stl_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2155 }
2156
2157 void cpu_stq_data_ra(CPUArchState *env, target_ulong ptr,
2158 uint64_t val, uintptr_t retaddr)
2159 {
2160 cpu_stq_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
2161 }
2162
2163 void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2164 {
2165 cpu_stb_data_ra(env, ptr, val, 0);
2166 }
2167
2168 void cpu_stw_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2169 {
2170 cpu_stw_data_ra(env, ptr, val, 0);
2171 }
2172
2173 void cpu_stl_data(CPUArchState *env, target_ulong ptr, uint32_t val)
2174 {
2175 cpu_stl_data_ra(env, ptr, val, 0);
2176 }
2177
2178 void cpu_stq_data(CPUArchState *env, target_ulong ptr, uint64_t val)
2179 {
2180 cpu_stq_data_ra(env, ptr, val, 0);
2181 }
2182
2183 /* First set of helpers allows passing in of OI and RETADDR. This makes
2184 them callable from other helpers. */
2185
2186 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
2187 #define ATOMIC_NAME(X) \
2188 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
2189 #define ATOMIC_MMU_DECLS
2190 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
2191 #define ATOMIC_MMU_CLEANUP
2192 #define ATOMIC_MMU_IDX get_mmuidx(oi)
2193
2194 #include "atomic_common.inc.c"
2195
2196 #define DATA_SIZE 1
2197 #include "atomic_template.h"
2198
2199 #define DATA_SIZE 2
2200 #include "atomic_template.h"
2201
2202 #define DATA_SIZE 4
2203 #include "atomic_template.h"
2204
2205 #ifdef CONFIG_ATOMIC64
2206 #define DATA_SIZE 8
2207 #include "atomic_template.h"
2208 #endif
2209
2210 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
2211 #define DATA_SIZE 16
2212 #include "atomic_template.h"
2213 #endif
2214
2215 /* Second set of helpers are directly callable from TCG as helpers. */
2216
2217 #undef EXTRA_ARGS
2218 #undef ATOMIC_NAME
2219 #undef ATOMIC_MMU_LOOKUP
2220 #define EXTRA_ARGS , TCGMemOpIdx oi
2221 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
2222 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC())
2223
2224 #define DATA_SIZE 1
2225 #include "atomic_template.h"
2226
2227 #define DATA_SIZE 2
2228 #include "atomic_template.h"
2229
2230 #define DATA_SIZE 4
2231 #include "atomic_template.h"
2232
2233 #ifdef CONFIG_ATOMIC64
2234 #define DATA_SIZE 8
2235 #include "atomic_template.h"
2236 #endif
2237 #undef ATOMIC_MMU_IDX
2238
2239 /* Code access functions. */
2240
2241 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
2242 TCGMemOpIdx oi, uintptr_t retaddr)
2243 {
2244 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
2245 }
2246
2247 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
2248 {
2249 TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
2250 return full_ldub_code(env, addr, oi, 0);
2251 }
2252
2253 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
2254 TCGMemOpIdx oi, uintptr_t retaddr)
2255 {
2256 return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
2257 }
2258
2259 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
2260 {
2261 TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
2262 return full_lduw_code(env, addr, oi, 0);
2263 }
2264
2265 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
2266 TCGMemOpIdx oi, uintptr_t retaddr)
2267 {
2268 return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
2269 }
2270
2271 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
2272 {
2273 TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
2274 return full_ldl_code(env, addr, oi, 0);
2275 }
2276
2277 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
2278 TCGMemOpIdx oi, uintptr_t retaddr)
2279 {
2280 return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
2281 }
2282
2283 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
2284 {
2285 TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
2286 return full_ldq_code(env, addr, oi, 0);
2287 }