exec: introduce qemu_ram_unset_idstr() to unset RAMBlock idstr
[qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 #endif
74
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
77 cpu_exec() */
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
82 int use_icount;
83
84 #if !defined(CONFIG_USER_ONLY)
85
86 typedef struct PhysPageEntry PhysPageEntry;
87
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 uint32_t skip : 6;
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
92 uint32_t ptr : 26;
93 };
94
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
96
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
99
100 #define P_L2_BITS 9
101 #define P_L2_SIZE (1 << P_L2_BITS)
102
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
104
105 typedef PhysPageEntry Node[P_L2_SIZE];
106
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
110 unsigned nodes_nb;
111 unsigned nodes_nb_alloc;
112 Node *nodes;
113 MemoryRegionSection *sections;
114 } PhysPageMap;
115
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
119 */
120 PhysPageEntry phys_map;
121 PhysPageMap map;
122 AddressSpace *as;
123 };
124
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
127 MemoryRegion iomem;
128 AddressSpace *as;
129 hwaddr base;
130 uint16_t sub_section[TARGET_PAGE_SIZE];
131 } subpage_t;
132
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
137
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140 static void tcg_commit(MemoryListener *listener);
141
142 static MemoryRegion io_mem_watch;
143 #endif
144
145 #if !defined(CONFIG_USER_ONLY)
146
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
148 {
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
153 }
154 }
155
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
157 {
158 unsigned i;
159 uint32_t ret;
160
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
167 }
168 return ret;
169 }
170
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
173 int level)
174 {
175 PhysPageEntry *p;
176 int i;
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
178
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
182 if (level == 0) {
183 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].skip = 0;
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
186 }
187 }
188 } else {
189 p = map->nodes[lp->ptr];
190 }
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
192
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 lp->skip = 0;
196 lp->ptr = leaf;
197 *index += step;
198 *nb -= step;
199 } else {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
201 }
202 ++lp;
203 }
204 }
205
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
208 uint16_t leaf)
209 {
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
212
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
214 }
215
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
218 */
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
220 {
221 unsigned valid_ptr = P_L2_SIZE;
222 int valid = 0;
223 PhysPageEntry *p;
224 int i;
225
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
227 return;
228 }
229
230 p = nodes[lp->ptr];
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
233 continue;
234 }
235
236 valid_ptr = i;
237 valid++;
238 if (p[i].skip) {
239 phys_page_compact(&p[i], nodes, compacted);
240 }
241 }
242
243 /* We can only compress if there's only one child. */
244 if (valid != 1) {
245 return;
246 }
247
248 assert(valid_ptr < P_L2_SIZE);
249
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
252 return;
253 }
254
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
261 * change this rule.
262 */
263 lp->skip = 0;
264 } else {
265 lp->skip += p[valid_ptr].skip;
266 }
267 }
268
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
270 {
271 DECLARE_BITMAP(compacted, nodes_nb);
272
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
275 }
276 }
277
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
280 {
281 PhysPageEntry *p;
282 hwaddr index = addr >> TARGET_PAGE_BITS;
283 int i;
284
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return &sections[PHYS_SECTION_UNASSIGNED];
288 }
289 p = nodes[lp.ptr];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
291 }
292
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return &sections[lp.ptr];
297 } else {
298 return &sections[PHYS_SECTION_UNASSIGNED];
299 }
300 }
301
302 bool memory_region_is_unassigned(MemoryRegion *mr)
303 {
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
306 }
307
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 hwaddr addr,
310 bool resolve_subpage)
311 {
312 MemoryRegionSection *section;
313 subpage_t *subpage;
314
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
319 }
320 return section;
321 }
322
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
326 {
327 MemoryRegionSection *section;
328 Int128 diff;
329
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
333
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
336
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 return section;
340 }
341
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
343 {
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
346 }
347 if (memory_region_is_romd(mr)) {
348 return !is_write;
349 }
350
351 return false;
352 }
353
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
356 bool is_write)
357 {
358 IOMMUTLBEntry iotlb;
359 MemoryRegionSection *section;
360 MemoryRegion *mr;
361 hwaddr len = *plen;
362
363 for (;;) {
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
365 mr = section->mr;
366
367 if (!mr->iommu_ops) {
368 break;
369 }
370
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
377 break;
378 }
379
380 as = iotlb.target_as;
381 }
382
383 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
386 }
387
388 *plen = len;
389 *xlat = addr;
390 return mr;
391 }
392
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
395 hwaddr *plen)
396 {
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
399
400 assert(!section->mr->iommu_ops);
401 return section;
402 }
403 #endif
404
405 void cpu_exec_init_all(void)
406 {
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
409 memory_map_init();
410 io_mem_init();
411 #endif
412 }
413
414 #if !defined(CONFIG_USER_ONLY)
415
416 static int cpu_common_post_load(void *opaque, int version_id)
417 {
418 CPUState *cpu = opaque;
419
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu, 1);
424
425 return 0;
426 }
427
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
430 .version_id = 1,
431 .minimum_version_id = 1,
432 .post_load = cpu_common_post_load,
433 .fields = (VMStateField[]) {
434 VMSTATE_UINT32(halted, CPUState),
435 VMSTATE_UINT32(interrupt_request, CPUState),
436 VMSTATE_END_OF_LIST()
437 }
438 };
439
440 #endif
441
442 CPUState *qemu_get_cpu(int index)
443 {
444 CPUState *cpu;
445
446 CPU_FOREACH(cpu) {
447 if (cpu->cpu_index == index) {
448 return cpu;
449 }
450 }
451
452 return NULL;
453 }
454
455 #if !defined(CONFIG_USER_ONLY)
456 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
457 {
458 /* We only support one address space per cpu at the moment. */
459 assert(cpu->as == as);
460
461 if (cpu->tcg_as_listener) {
462 memory_listener_unregister(cpu->tcg_as_listener);
463 } else {
464 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
465 }
466 cpu->tcg_as_listener->commit = tcg_commit;
467 memory_listener_register(cpu->tcg_as_listener, as);
468 }
469 #endif
470
471 void cpu_exec_init(CPUArchState *env)
472 {
473 CPUState *cpu = ENV_GET_CPU(env);
474 CPUClass *cc = CPU_GET_CLASS(cpu);
475 CPUState *some_cpu;
476 int cpu_index;
477
478 #if defined(CONFIG_USER_ONLY)
479 cpu_list_lock();
480 #endif
481 cpu_index = 0;
482 CPU_FOREACH(some_cpu) {
483 cpu_index++;
484 }
485 cpu->cpu_index = cpu_index;
486 cpu->numa_node = 0;
487 QTAILQ_INIT(&cpu->breakpoints);
488 QTAILQ_INIT(&cpu->watchpoints);
489 #ifndef CONFIG_USER_ONLY
490 cpu->as = &address_space_memory;
491 cpu->thread_id = qemu_get_thread_id();
492 #endif
493 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
494 #if defined(CONFIG_USER_ONLY)
495 cpu_list_unlock();
496 #endif
497 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
498 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
499 }
500 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
501 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
502 cpu_save, cpu_load, env);
503 assert(cc->vmsd == NULL);
504 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
505 #endif
506 if (cc->vmsd != NULL) {
507 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
508 }
509 }
510
511 #if defined(TARGET_HAS_ICE)
512 #if defined(CONFIG_USER_ONLY)
513 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
514 {
515 tb_invalidate_phys_page_range(pc, pc + 1, 0);
516 }
517 #else
518 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
519 {
520 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
521 if (phys != -1) {
522 tb_invalidate_phys_addr(cpu->as,
523 phys | (pc & ~TARGET_PAGE_MASK));
524 }
525 }
526 #endif
527 #endif /* TARGET_HAS_ICE */
528
529 #if defined(CONFIG_USER_ONLY)
530 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
531
532 {
533 }
534
535 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
536 int flags, CPUWatchpoint **watchpoint)
537 {
538 return -ENOSYS;
539 }
540 #else
541 /* Add a watchpoint. */
542 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
543 int flags, CPUWatchpoint **watchpoint)
544 {
545 vaddr len_mask = ~(len - 1);
546 CPUWatchpoint *wp;
547
548 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
549 if ((len & (len - 1)) || (addr & ~len_mask) ||
550 len == 0 || len > TARGET_PAGE_SIZE) {
551 error_report("tried to set invalid watchpoint at %"
552 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
553 return -EINVAL;
554 }
555 wp = g_malloc(sizeof(*wp));
556
557 wp->vaddr = addr;
558 wp->len_mask = len_mask;
559 wp->flags = flags;
560
561 /* keep all GDB-injected watchpoints in front */
562 if (flags & BP_GDB) {
563 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
564 } else {
565 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
566 }
567
568 tlb_flush_page(cpu, addr);
569
570 if (watchpoint)
571 *watchpoint = wp;
572 return 0;
573 }
574
575 /* Remove a specific watchpoint. */
576 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
577 int flags)
578 {
579 vaddr len_mask = ~(len - 1);
580 CPUWatchpoint *wp;
581
582 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
583 if (addr == wp->vaddr && len_mask == wp->len_mask
584 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
585 cpu_watchpoint_remove_by_ref(cpu, wp);
586 return 0;
587 }
588 }
589 return -ENOENT;
590 }
591
592 /* Remove a specific watchpoint by reference. */
593 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
594 {
595 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
596
597 tlb_flush_page(cpu, watchpoint->vaddr);
598
599 g_free(watchpoint);
600 }
601
602 /* Remove all matching watchpoints. */
603 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
604 {
605 CPUWatchpoint *wp, *next;
606
607 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
608 if (wp->flags & mask) {
609 cpu_watchpoint_remove_by_ref(cpu, wp);
610 }
611 }
612 }
613 #endif
614
615 /* Add a breakpoint. */
616 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
617 CPUBreakpoint **breakpoint)
618 {
619 #if defined(TARGET_HAS_ICE)
620 CPUBreakpoint *bp;
621
622 bp = g_malloc(sizeof(*bp));
623
624 bp->pc = pc;
625 bp->flags = flags;
626
627 /* keep all GDB-injected breakpoints in front */
628 if (flags & BP_GDB) {
629 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
630 } else {
631 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
632 }
633
634 breakpoint_invalidate(cpu, pc);
635
636 if (breakpoint) {
637 *breakpoint = bp;
638 }
639 return 0;
640 #else
641 return -ENOSYS;
642 #endif
643 }
644
645 /* Remove a specific breakpoint. */
646 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
647 {
648 #if defined(TARGET_HAS_ICE)
649 CPUBreakpoint *bp;
650
651 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
652 if (bp->pc == pc && bp->flags == flags) {
653 cpu_breakpoint_remove_by_ref(cpu, bp);
654 return 0;
655 }
656 }
657 return -ENOENT;
658 #else
659 return -ENOSYS;
660 #endif
661 }
662
663 /* Remove a specific breakpoint by reference. */
664 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
665 {
666 #if defined(TARGET_HAS_ICE)
667 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
668
669 breakpoint_invalidate(cpu, breakpoint->pc);
670
671 g_free(breakpoint);
672 #endif
673 }
674
675 /* Remove all matching breakpoints. */
676 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
677 {
678 #if defined(TARGET_HAS_ICE)
679 CPUBreakpoint *bp, *next;
680
681 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
682 if (bp->flags & mask) {
683 cpu_breakpoint_remove_by_ref(cpu, bp);
684 }
685 }
686 #endif
687 }
688
689 /* enable or disable single step mode. EXCP_DEBUG is returned by the
690 CPU loop after each instruction */
691 void cpu_single_step(CPUState *cpu, int enabled)
692 {
693 #if defined(TARGET_HAS_ICE)
694 if (cpu->singlestep_enabled != enabled) {
695 cpu->singlestep_enabled = enabled;
696 if (kvm_enabled()) {
697 kvm_update_guest_debug(cpu, 0);
698 } else {
699 /* must flush all the translated code to avoid inconsistencies */
700 /* XXX: only flush what is necessary */
701 CPUArchState *env = cpu->env_ptr;
702 tb_flush(env);
703 }
704 }
705 #endif
706 }
707
708 void cpu_abort(CPUState *cpu, const char *fmt, ...)
709 {
710 va_list ap;
711 va_list ap2;
712
713 va_start(ap, fmt);
714 va_copy(ap2, ap);
715 fprintf(stderr, "qemu: fatal: ");
716 vfprintf(stderr, fmt, ap);
717 fprintf(stderr, "\n");
718 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
719 if (qemu_log_enabled()) {
720 qemu_log("qemu: fatal: ");
721 qemu_log_vprintf(fmt, ap2);
722 qemu_log("\n");
723 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
724 qemu_log_flush();
725 qemu_log_close();
726 }
727 va_end(ap2);
728 va_end(ap);
729 #if defined(CONFIG_USER_ONLY)
730 {
731 struct sigaction act;
732 sigfillset(&act.sa_mask);
733 act.sa_handler = SIG_DFL;
734 sigaction(SIGABRT, &act, NULL);
735 }
736 #endif
737 abort();
738 }
739
740 #if !defined(CONFIG_USER_ONLY)
741 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
742 {
743 RAMBlock *block;
744
745 /* The list is protected by the iothread lock here. */
746 block = ram_list.mru_block;
747 if (block && addr - block->offset < block->length) {
748 goto found;
749 }
750 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
751 if (addr - block->offset < block->length) {
752 goto found;
753 }
754 }
755
756 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
757 abort();
758
759 found:
760 ram_list.mru_block = block;
761 return block;
762 }
763
764 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
765 {
766 ram_addr_t start1;
767 RAMBlock *block;
768 ram_addr_t end;
769
770 end = TARGET_PAGE_ALIGN(start + length);
771 start &= TARGET_PAGE_MASK;
772
773 block = qemu_get_ram_block(start);
774 assert(block == qemu_get_ram_block(end - 1));
775 start1 = (uintptr_t)block->host + (start - block->offset);
776 cpu_tlb_reset_dirty_all(start1, length);
777 }
778
779 /* Note: start and end must be within the same ram block. */
780 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
781 unsigned client)
782 {
783 if (length == 0)
784 return;
785 cpu_physical_memory_clear_dirty_range(start, length, client);
786
787 if (tcg_enabled()) {
788 tlb_reset_dirty_range_all(start, length);
789 }
790 }
791
792 static void cpu_physical_memory_set_dirty_tracking(bool enable)
793 {
794 in_migration = enable;
795 }
796
797 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
798 MemoryRegionSection *section,
799 target_ulong vaddr,
800 hwaddr paddr, hwaddr xlat,
801 int prot,
802 target_ulong *address)
803 {
804 hwaddr iotlb;
805 CPUWatchpoint *wp;
806
807 if (memory_region_is_ram(section->mr)) {
808 /* Normal RAM. */
809 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
810 + xlat;
811 if (!section->readonly) {
812 iotlb |= PHYS_SECTION_NOTDIRTY;
813 } else {
814 iotlb |= PHYS_SECTION_ROM;
815 }
816 } else {
817 iotlb = section - section->address_space->dispatch->map.sections;
818 iotlb += xlat;
819 }
820
821 /* Make accesses to pages with watchpoints go via the
822 watchpoint trap routines. */
823 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
824 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
825 /* Avoid trapping reads of pages with a write breakpoint. */
826 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
827 iotlb = PHYS_SECTION_WATCH + paddr;
828 *address |= TLB_MMIO;
829 break;
830 }
831 }
832 }
833
834 return iotlb;
835 }
836 #endif /* defined(CONFIG_USER_ONLY) */
837
838 #if !defined(CONFIG_USER_ONLY)
839
840 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
841 uint16_t section);
842 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
843
844 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
845
846 /*
847 * Set a custom physical guest memory alloator.
848 * Accelerators with unusual needs may need this. Hopefully, we can
849 * get rid of it eventually.
850 */
851 void phys_mem_set_alloc(void *(*alloc)(size_t))
852 {
853 phys_mem_alloc = alloc;
854 }
855
856 static uint16_t phys_section_add(PhysPageMap *map,
857 MemoryRegionSection *section)
858 {
859 /* The physical section number is ORed with a page-aligned
860 * pointer to produce the iotlb entries. Thus it should
861 * never overflow into the page-aligned value.
862 */
863 assert(map->sections_nb < TARGET_PAGE_SIZE);
864
865 if (map->sections_nb == map->sections_nb_alloc) {
866 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
867 map->sections = g_renew(MemoryRegionSection, map->sections,
868 map->sections_nb_alloc);
869 }
870 map->sections[map->sections_nb] = *section;
871 memory_region_ref(section->mr);
872 return map->sections_nb++;
873 }
874
875 static void phys_section_destroy(MemoryRegion *mr)
876 {
877 memory_region_unref(mr);
878
879 if (mr->subpage) {
880 subpage_t *subpage = container_of(mr, subpage_t, iomem);
881 memory_region_destroy(&subpage->iomem);
882 g_free(subpage);
883 }
884 }
885
886 static void phys_sections_free(PhysPageMap *map)
887 {
888 while (map->sections_nb > 0) {
889 MemoryRegionSection *section = &map->sections[--map->sections_nb];
890 phys_section_destroy(section->mr);
891 }
892 g_free(map->sections);
893 g_free(map->nodes);
894 }
895
896 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
897 {
898 subpage_t *subpage;
899 hwaddr base = section->offset_within_address_space
900 & TARGET_PAGE_MASK;
901 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
902 d->map.nodes, d->map.sections);
903 MemoryRegionSection subsection = {
904 .offset_within_address_space = base,
905 .size = int128_make64(TARGET_PAGE_SIZE),
906 };
907 hwaddr start, end;
908
909 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
910
911 if (!(existing->mr->subpage)) {
912 subpage = subpage_init(d->as, base);
913 subsection.address_space = d->as;
914 subsection.mr = &subpage->iomem;
915 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
916 phys_section_add(&d->map, &subsection));
917 } else {
918 subpage = container_of(existing->mr, subpage_t, iomem);
919 }
920 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
921 end = start + int128_get64(section->size) - 1;
922 subpage_register(subpage, start, end,
923 phys_section_add(&d->map, section));
924 }
925
926
927 static void register_multipage(AddressSpaceDispatch *d,
928 MemoryRegionSection *section)
929 {
930 hwaddr start_addr = section->offset_within_address_space;
931 uint16_t section_index = phys_section_add(&d->map, section);
932 uint64_t num_pages = int128_get64(int128_rshift(section->size,
933 TARGET_PAGE_BITS));
934
935 assert(num_pages);
936 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
937 }
938
939 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
940 {
941 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
942 AddressSpaceDispatch *d = as->next_dispatch;
943 MemoryRegionSection now = *section, remain = *section;
944 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
945
946 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
947 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
948 - now.offset_within_address_space;
949
950 now.size = int128_min(int128_make64(left), now.size);
951 register_subpage(d, &now);
952 } else {
953 now.size = int128_zero();
954 }
955 while (int128_ne(remain.size, now.size)) {
956 remain.size = int128_sub(remain.size, now.size);
957 remain.offset_within_address_space += int128_get64(now.size);
958 remain.offset_within_region += int128_get64(now.size);
959 now = remain;
960 if (int128_lt(remain.size, page_size)) {
961 register_subpage(d, &now);
962 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
963 now.size = page_size;
964 register_subpage(d, &now);
965 } else {
966 now.size = int128_and(now.size, int128_neg(page_size));
967 register_multipage(d, &now);
968 }
969 }
970 }
971
972 void qemu_flush_coalesced_mmio_buffer(void)
973 {
974 if (kvm_enabled())
975 kvm_flush_coalesced_mmio_buffer();
976 }
977
978 void qemu_mutex_lock_ramlist(void)
979 {
980 qemu_mutex_lock(&ram_list.mutex);
981 }
982
983 void qemu_mutex_unlock_ramlist(void)
984 {
985 qemu_mutex_unlock(&ram_list.mutex);
986 }
987
988 #ifdef __linux__
989
990 #include <sys/vfs.h>
991
992 #define HUGETLBFS_MAGIC 0x958458f6
993
994 static long gethugepagesize(const char *path)
995 {
996 struct statfs fs;
997 int ret;
998
999 do {
1000 ret = statfs(path, &fs);
1001 } while (ret != 0 && errno == EINTR);
1002
1003 if (ret != 0) {
1004 perror(path);
1005 return 0;
1006 }
1007
1008 if (fs.f_type != HUGETLBFS_MAGIC)
1009 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1010
1011 return fs.f_bsize;
1012 }
1013
1014 static sigjmp_buf sigjump;
1015
1016 static void sigbus_handler(int signal)
1017 {
1018 siglongjmp(sigjump, 1);
1019 }
1020
1021 static void *file_ram_alloc(RAMBlock *block,
1022 ram_addr_t memory,
1023 const char *path)
1024 {
1025 char *filename;
1026 char *sanitized_name;
1027 char *c;
1028 void *area;
1029 int fd;
1030 unsigned long hpagesize;
1031
1032 hpagesize = gethugepagesize(path);
1033 if (!hpagesize) {
1034 goto error;
1035 }
1036
1037 if (memory < hpagesize) {
1038 return NULL;
1039 }
1040
1041 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1042 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1043 goto error;
1044 }
1045
1046 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1047 sanitized_name = g_strdup(block->mr->name);
1048 for (c = sanitized_name; *c != '\0'; c++) {
1049 if (*c == '/')
1050 *c = '_';
1051 }
1052
1053 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1054 sanitized_name);
1055 g_free(sanitized_name);
1056
1057 fd = mkstemp(filename);
1058 if (fd < 0) {
1059 perror("unable to create backing store for hugepages");
1060 g_free(filename);
1061 goto error;
1062 }
1063 unlink(filename);
1064 g_free(filename);
1065
1066 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1067
1068 /*
1069 * ftruncate is not supported by hugetlbfs in older
1070 * hosts, so don't bother bailing out on errors.
1071 * If anything goes wrong with it under other filesystems,
1072 * mmap will fail.
1073 */
1074 if (ftruncate(fd, memory))
1075 perror("ftruncate");
1076
1077 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1078 if (area == MAP_FAILED) {
1079 perror("file_ram_alloc: can't mmap RAM pages");
1080 close(fd);
1081 goto error;
1082 }
1083
1084 if (mem_prealloc) {
1085 int ret, i;
1086 struct sigaction act, oldact;
1087 sigset_t set, oldset;
1088
1089 memset(&act, 0, sizeof(act));
1090 act.sa_handler = &sigbus_handler;
1091 act.sa_flags = 0;
1092
1093 ret = sigaction(SIGBUS, &act, &oldact);
1094 if (ret) {
1095 perror("file_ram_alloc: failed to install signal handler");
1096 exit(1);
1097 }
1098
1099 /* unblock SIGBUS */
1100 sigemptyset(&set);
1101 sigaddset(&set, SIGBUS);
1102 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1103
1104 if (sigsetjmp(sigjump, 1)) {
1105 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1106 exit(1);
1107 }
1108
1109 /* MAP_POPULATE silently ignores failures */
1110 for (i = 0; i < (memory/hpagesize); i++) {
1111 memset(area + (hpagesize*i), 0, 1);
1112 }
1113
1114 ret = sigaction(SIGBUS, &oldact, NULL);
1115 if (ret) {
1116 perror("file_ram_alloc: failed to reinstall signal handler");
1117 exit(1);
1118 }
1119
1120 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1121 }
1122
1123 block->fd = fd;
1124 return area;
1125
1126 error:
1127 if (mem_prealloc) {
1128 exit(1);
1129 }
1130 return NULL;
1131 }
1132 #else
1133 static void *file_ram_alloc(RAMBlock *block,
1134 ram_addr_t memory,
1135 const char *path)
1136 {
1137 fprintf(stderr, "-mem-path not supported on this host\n");
1138 exit(1);
1139 }
1140 #endif
1141
1142 static ram_addr_t find_ram_offset(ram_addr_t size)
1143 {
1144 RAMBlock *block, *next_block;
1145 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1146
1147 assert(size != 0); /* it would hand out same offset multiple times */
1148
1149 if (QTAILQ_EMPTY(&ram_list.blocks))
1150 return 0;
1151
1152 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1153 ram_addr_t end, next = RAM_ADDR_MAX;
1154
1155 end = block->offset + block->length;
1156
1157 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1158 if (next_block->offset >= end) {
1159 next = MIN(next, next_block->offset);
1160 }
1161 }
1162 if (next - end >= size && next - end < mingap) {
1163 offset = end;
1164 mingap = next - end;
1165 }
1166 }
1167
1168 if (offset == RAM_ADDR_MAX) {
1169 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1170 (uint64_t)size);
1171 abort();
1172 }
1173
1174 return offset;
1175 }
1176
1177 ram_addr_t last_ram_offset(void)
1178 {
1179 RAMBlock *block;
1180 ram_addr_t last = 0;
1181
1182 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1183 last = MAX(last, block->offset + block->length);
1184
1185 return last;
1186 }
1187
1188 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1189 {
1190 int ret;
1191
1192 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1193 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1194 "dump-guest-core", true)) {
1195 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1196 if (ret) {
1197 perror("qemu_madvise");
1198 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1199 "but dump_guest_core=off specified\n");
1200 }
1201 }
1202 }
1203
1204 static RAMBlock *find_ram_block(ram_addr_t addr)
1205 {
1206 RAMBlock *block;
1207
1208 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1209 if (block->offset == addr) {
1210 return block;
1211 }
1212 }
1213
1214 return NULL;
1215 }
1216
1217 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1218 {
1219 RAMBlock *new_block = find_ram_block(addr);
1220 RAMBlock *block;
1221
1222 assert(new_block);
1223 assert(!new_block->idstr[0]);
1224
1225 if (dev) {
1226 char *id = qdev_get_dev_path(dev);
1227 if (id) {
1228 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1229 g_free(id);
1230 }
1231 }
1232 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1233
1234 /* This assumes the iothread lock is taken here too. */
1235 qemu_mutex_lock_ramlist();
1236 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1237 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1238 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1239 new_block->idstr);
1240 abort();
1241 }
1242 }
1243 qemu_mutex_unlock_ramlist();
1244 }
1245
1246 void qemu_ram_unset_idstr(ram_addr_t addr)
1247 {
1248 RAMBlock *block = find_ram_block(addr);
1249
1250 if (block) {
1251 memset(block->idstr, 0, sizeof(block->idstr));
1252 }
1253 }
1254
1255 static int memory_try_enable_merging(void *addr, size_t len)
1256 {
1257 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1258 /* disabled by the user */
1259 return 0;
1260 }
1261
1262 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1263 }
1264
1265 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1266 MemoryRegion *mr)
1267 {
1268 RAMBlock *block, *new_block;
1269 ram_addr_t old_ram_size, new_ram_size;
1270
1271 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1272
1273 size = TARGET_PAGE_ALIGN(size);
1274 new_block = g_malloc0(sizeof(*new_block));
1275 new_block->fd = -1;
1276
1277 /* This assumes the iothread lock is taken here too. */
1278 qemu_mutex_lock_ramlist();
1279 new_block->mr = mr;
1280 new_block->offset = find_ram_offset(size);
1281 if (host) {
1282 new_block->host = host;
1283 new_block->flags |= RAM_PREALLOC_MASK;
1284 } else if (xen_enabled()) {
1285 if (mem_path) {
1286 fprintf(stderr, "-mem-path not supported with Xen\n");
1287 exit(1);
1288 }
1289 xen_ram_alloc(new_block->offset, size, mr);
1290 } else {
1291 if (mem_path) {
1292 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1293 /*
1294 * file_ram_alloc() needs to allocate just like
1295 * phys_mem_alloc, but we haven't bothered to provide
1296 * a hook there.
1297 */
1298 fprintf(stderr,
1299 "-mem-path not supported with this accelerator\n");
1300 exit(1);
1301 }
1302 new_block->host = file_ram_alloc(new_block, size, mem_path);
1303 }
1304 if (!new_block->host) {
1305 new_block->host = phys_mem_alloc(size);
1306 if (!new_block->host) {
1307 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1308 new_block->mr->name, strerror(errno));
1309 exit(1);
1310 }
1311 memory_try_enable_merging(new_block->host, size);
1312 }
1313 }
1314 new_block->length = size;
1315
1316 /* Keep the list sorted from biggest to smallest block. */
1317 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1318 if (block->length < new_block->length) {
1319 break;
1320 }
1321 }
1322 if (block) {
1323 QTAILQ_INSERT_BEFORE(block, new_block, next);
1324 } else {
1325 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1326 }
1327 ram_list.mru_block = NULL;
1328
1329 ram_list.version++;
1330 qemu_mutex_unlock_ramlist();
1331
1332 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1333
1334 if (new_ram_size > old_ram_size) {
1335 int i;
1336 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1337 ram_list.dirty_memory[i] =
1338 bitmap_zero_extend(ram_list.dirty_memory[i],
1339 old_ram_size, new_ram_size);
1340 }
1341 }
1342 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1343
1344 qemu_ram_setup_dump(new_block->host, size);
1345 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1346 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1347
1348 if (kvm_enabled())
1349 kvm_setup_guest_memory(new_block->host, size);
1350
1351 return new_block->offset;
1352 }
1353
1354 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1355 {
1356 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1357 }
1358
1359 void qemu_ram_free_from_ptr(ram_addr_t addr)
1360 {
1361 RAMBlock *block;
1362
1363 /* This assumes the iothread lock is taken here too. */
1364 qemu_mutex_lock_ramlist();
1365 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1366 if (addr == block->offset) {
1367 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1368 ram_list.mru_block = NULL;
1369 ram_list.version++;
1370 g_free(block);
1371 break;
1372 }
1373 }
1374 qemu_mutex_unlock_ramlist();
1375 }
1376
1377 void qemu_ram_free(ram_addr_t addr)
1378 {
1379 RAMBlock *block;
1380
1381 /* This assumes the iothread lock is taken here too. */
1382 qemu_mutex_lock_ramlist();
1383 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1384 if (addr == block->offset) {
1385 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1386 ram_list.mru_block = NULL;
1387 ram_list.version++;
1388 if (block->flags & RAM_PREALLOC_MASK) {
1389 ;
1390 } else if (xen_enabled()) {
1391 xen_invalidate_map_cache_entry(block->host);
1392 #ifndef _WIN32
1393 } else if (block->fd >= 0) {
1394 munmap(block->host, block->length);
1395 close(block->fd);
1396 #endif
1397 } else {
1398 qemu_anon_ram_free(block->host, block->length);
1399 }
1400 g_free(block);
1401 break;
1402 }
1403 }
1404 qemu_mutex_unlock_ramlist();
1405
1406 }
1407
1408 #ifndef _WIN32
1409 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1410 {
1411 RAMBlock *block;
1412 ram_addr_t offset;
1413 int flags;
1414 void *area, *vaddr;
1415
1416 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1417 offset = addr - block->offset;
1418 if (offset < block->length) {
1419 vaddr = block->host + offset;
1420 if (block->flags & RAM_PREALLOC_MASK) {
1421 ;
1422 } else if (xen_enabled()) {
1423 abort();
1424 } else {
1425 flags = MAP_FIXED;
1426 munmap(vaddr, length);
1427 if (block->fd >= 0) {
1428 #ifdef MAP_POPULATE
1429 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1430 MAP_PRIVATE;
1431 #else
1432 flags |= MAP_PRIVATE;
1433 #endif
1434 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1435 flags, block->fd, offset);
1436 } else {
1437 /*
1438 * Remap needs to match alloc. Accelerators that
1439 * set phys_mem_alloc never remap. If they did,
1440 * we'd need a remap hook here.
1441 */
1442 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1443
1444 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1445 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1446 flags, -1, 0);
1447 }
1448 if (area != vaddr) {
1449 fprintf(stderr, "Could not remap addr: "
1450 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1451 length, addr);
1452 exit(1);
1453 }
1454 memory_try_enable_merging(vaddr, length);
1455 qemu_ram_setup_dump(vaddr, length);
1456 }
1457 return;
1458 }
1459 }
1460 }
1461 #endif /* !_WIN32 */
1462
1463 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1464 With the exception of the softmmu code in this file, this should
1465 only be used for local memory (e.g. video ram) that the device owns,
1466 and knows it isn't going to access beyond the end of the block.
1467
1468 It should not be used for general purpose DMA.
1469 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1470 */
1471 void *qemu_get_ram_ptr(ram_addr_t addr)
1472 {
1473 RAMBlock *block = qemu_get_ram_block(addr);
1474
1475 if (xen_enabled()) {
1476 /* We need to check if the requested address is in the RAM
1477 * because we don't want to map the entire memory in QEMU.
1478 * In that case just map until the end of the page.
1479 */
1480 if (block->offset == 0) {
1481 return xen_map_cache(addr, 0, 0);
1482 } else if (block->host == NULL) {
1483 block->host =
1484 xen_map_cache(block->offset, block->length, 1);
1485 }
1486 }
1487 return block->host + (addr - block->offset);
1488 }
1489
1490 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1491 * but takes a size argument */
1492 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1493 {
1494 if (*size == 0) {
1495 return NULL;
1496 }
1497 if (xen_enabled()) {
1498 return xen_map_cache(addr, *size, 1);
1499 } else {
1500 RAMBlock *block;
1501
1502 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1503 if (addr - block->offset < block->length) {
1504 if (addr - block->offset + *size > block->length)
1505 *size = block->length - addr + block->offset;
1506 return block->host + (addr - block->offset);
1507 }
1508 }
1509
1510 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1511 abort();
1512 }
1513 }
1514
1515 /* Some of the softmmu routines need to translate from a host pointer
1516 (typically a TLB entry) back to a ram offset. */
1517 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1518 {
1519 RAMBlock *block;
1520 uint8_t *host = ptr;
1521
1522 if (xen_enabled()) {
1523 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1524 return qemu_get_ram_block(*ram_addr)->mr;
1525 }
1526
1527 block = ram_list.mru_block;
1528 if (block && block->host && host - block->host < block->length) {
1529 goto found;
1530 }
1531
1532 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1533 /* This case append when the block is not mapped. */
1534 if (block->host == NULL) {
1535 continue;
1536 }
1537 if (host - block->host < block->length) {
1538 goto found;
1539 }
1540 }
1541
1542 return NULL;
1543
1544 found:
1545 *ram_addr = block->offset + (host - block->host);
1546 return block->mr;
1547 }
1548
1549 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1550 uint64_t val, unsigned size)
1551 {
1552 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1553 tb_invalidate_phys_page_fast(ram_addr, size);
1554 }
1555 switch (size) {
1556 case 1:
1557 stb_p(qemu_get_ram_ptr(ram_addr), val);
1558 break;
1559 case 2:
1560 stw_p(qemu_get_ram_ptr(ram_addr), val);
1561 break;
1562 case 4:
1563 stl_p(qemu_get_ram_ptr(ram_addr), val);
1564 break;
1565 default:
1566 abort();
1567 }
1568 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1569 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1570 /* we remove the notdirty callback only if the code has been
1571 flushed */
1572 if (!cpu_physical_memory_is_clean(ram_addr)) {
1573 CPUArchState *env = current_cpu->env_ptr;
1574 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1575 }
1576 }
1577
1578 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1579 unsigned size, bool is_write)
1580 {
1581 return is_write;
1582 }
1583
1584 static const MemoryRegionOps notdirty_mem_ops = {
1585 .write = notdirty_mem_write,
1586 .valid.accepts = notdirty_mem_accepts,
1587 .endianness = DEVICE_NATIVE_ENDIAN,
1588 };
1589
1590 /* Generate a debug exception if a watchpoint has been hit. */
1591 static void check_watchpoint(int offset, int len_mask, int flags)
1592 {
1593 CPUState *cpu = current_cpu;
1594 CPUArchState *env = cpu->env_ptr;
1595 target_ulong pc, cs_base;
1596 target_ulong vaddr;
1597 CPUWatchpoint *wp;
1598 int cpu_flags;
1599
1600 if (cpu->watchpoint_hit) {
1601 /* We re-entered the check after replacing the TB. Now raise
1602 * the debug interrupt so that is will trigger after the
1603 * current instruction. */
1604 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1605 return;
1606 }
1607 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1608 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1609 if ((vaddr == (wp->vaddr & len_mask) ||
1610 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1611 wp->flags |= BP_WATCHPOINT_HIT;
1612 if (!cpu->watchpoint_hit) {
1613 cpu->watchpoint_hit = wp;
1614 tb_check_watchpoint(cpu);
1615 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1616 cpu->exception_index = EXCP_DEBUG;
1617 cpu_loop_exit(cpu);
1618 } else {
1619 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1620 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1621 cpu_resume_from_signal(cpu, NULL);
1622 }
1623 }
1624 } else {
1625 wp->flags &= ~BP_WATCHPOINT_HIT;
1626 }
1627 }
1628 }
1629
1630 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1631 so these check for a hit then pass through to the normal out-of-line
1632 phys routines. */
1633 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1634 unsigned size)
1635 {
1636 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1637 switch (size) {
1638 case 1: return ldub_phys(&address_space_memory, addr);
1639 case 2: return lduw_phys(&address_space_memory, addr);
1640 case 4: return ldl_phys(&address_space_memory, addr);
1641 default: abort();
1642 }
1643 }
1644
1645 static void watch_mem_write(void *opaque, hwaddr addr,
1646 uint64_t val, unsigned size)
1647 {
1648 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1649 switch (size) {
1650 case 1:
1651 stb_phys(&address_space_memory, addr, val);
1652 break;
1653 case 2:
1654 stw_phys(&address_space_memory, addr, val);
1655 break;
1656 case 4:
1657 stl_phys(&address_space_memory, addr, val);
1658 break;
1659 default: abort();
1660 }
1661 }
1662
1663 static const MemoryRegionOps watch_mem_ops = {
1664 .read = watch_mem_read,
1665 .write = watch_mem_write,
1666 .endianness = DEVICE_NATIVE_ENDIAN,
1667 };
1668
1669 static uint64_t subpage_read(void *opaque, hwaddr addr,
1670 unsigned len)
1671 {
1672 subpage_t *subpage = opaque;
1673 uint8_t buf[4];
1674
1675 #if defined(DEBUG_SUBPAGE)
1676 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1677 subpage, len, addr);
1678 #endif
1679 address_space_read(subpage->as, addr + subpage->base, buf, len);
1680 switch (len) {
1681 case 1:
1682 return ldub_p(buf);
1683 case 2:
1684 return lduw_p(buf);
1685 case 4:
1686 return ldl_p(buf);
1687 default:
1688 abort();
1689 }
1690 }
1691
1692 static void subpage_write(void *opaque, hwaddr addr,
1693 uint64_t value, unsigned len)
1694 {
1695 subpage_t *subpage = opaque;
1696 uint8_t buf[4];
1697
1698 #if defined(DEBUG_SUBPAGE)
1699 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1700 " value %"PRIx64"\n",
1701 __func__, subpage, len, addr, value);
1702 #endif
1703 switch (len) {
1704 case 1:
1705 stb_p(buf, value);
1706 break;
1707 case 2:
1708 stw_p(buf, value);
1709 break;
1710 case 4:
1711 stl_p(buf, value);
1712 break;
1713 default:
1714 abort();
1715 }
1716 address_space_write(subpage->as, addr + subpage->base, buf, len);
1717 }
1718
1719 static bool subpage_accepts(void *opaque, hwaddr addr,
1720 unsigned len, bool is_write)
1721 {
1722 subpage_t *subpage = opaque;
1723 #if defined(DEBUG_SUBPAGE)
1724 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1725 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1726 #endif
1727
1728 return address_space_access_valid(subpage->as, addr + subpage->base,
1729 len, is_write);
1730 }
1731
1732 static const MemoryRegionOps subpage_ops = {
1733 .read = subpage_read,
1734 .write = subpage_write,
1735 .valid.accepts = subpage_accepts,
1736 .endianness = DEVICE_NATIVE_ENDIAN,
1737 };
1738
1739 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1740 uint16_t section)
1741 {
1742 int idx, eidx;
1743
1744 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1745 return -1;
1746 idx = SUBPAGE_IDX(start);
1747 eidx = SUBPAGE_IDX(end);
1748 #if defined(DEBUG_SUBPAGE)
1749 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1750 __func__, mmio, start, end, idx, eidx, section);
1751 #endif
1752 for (; idx <= eidx; idx++) {
1753 mmio->sub_section[idx] = section;
1754 }
1755
1756 return 0;
1757 }
1758
1759 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1760 {
1761 subpage_t *mmio;
1762
1763 mmio = g_malloc0(sizeof(subpage_t));
1764
1765 mmio->as = as;
1766 mmio->base = base;
1767 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1768 "subpage", TARGET_PAGE_SIZE);
1769 mmio->iomem.subpage = true;
1770 #if defined(DEBUG_SUBPAGE)
1771 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1772 mmio, base, TARGET_PAGE_SIZE);
1773 #endif
1774 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1775
1776 return mmio;
1777 }
1778
1779 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1780 {
1781 MemoryRegionSection section = {
1782 .address_space = &address_space_memory,
1783 .mr = mr,
1784 .offset_within_address_space = 0,
1785 .offset_within_region = 0,
1786 .size = int128_2_64(),
1787 };
1788
1789 return phys_section_add(map, &section);
1790 }
1791
1792 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1793 {
1794 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1795 }
1796
1797 static void io_mem_init(void)
1798 {
1799 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1800 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1801 "unassigned", UINT64_MAX);
1802 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1803 "notdirty", UINT64_MAX);
1804 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1805 "watch", UINT64_MAX);
1806 }
1807
1808 static void mem_begin(MemoryListener *listener)
1809 {
1810 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1811 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1812 uint16_t n;
1813
1814 n = dummy_section(&d->map, &io_mem_unassigned);
1815 assert(n == PHYS_SECTION_UNASSIGNED);
1816 n = dummy_section(&d->map, &io_mem_notdirty);
1817 assert(n == PHYS_SECTION_NOTDIRTY);
1818 n = dummy_section(&d->map, &io_mem_rom);
1819 assert(n == PHYS_SECTION_ROM);
1820 n = dummy_section(&d->map, &io_mem_watch);
1821 assert(n == PHYS_SECTION_WATCH);
1822
1823 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1824 d->as = as;
1825 as->next_dispatch = d;
1826 }
1827
1828 static void mem_commit(MemoryListener *listener)
1829 {
1830 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1831 AddressSpaceDispatch *cur = as->dispatch;
1832 AddressSpaceDispatch *next = as->next_dispatch;
1833
1834 phys_page_compact_all(next, next->map.nodes_nb);
1835
1836 as->dispatch = next;
1837
1838 if (cur) {
1839 phys_sections_free(&cur->map);
1840 g_free(cur);
1841 }
1842 }
1843
1844 static void tcg_commit(MemoryListener *listener)
1845 {
1846 CPUState *cpu;
1847
1848 /* since each CPU stores ram addresses in its TLB cache, we must
1849 reset the modified entries */
1850 /* XXX: slow ! */
1851 CPU_FOREACH(cpu) {
1852 /* FIXME: Disentangle the cpu.h circular files deps so we can
1853 directly get the right CPU from listener. */
1854 if (cpu->tcg_as_listener != listener) {
1855 continue;
1856 }
1857 tlb_flush(cpu, 1);
1858 }
1859 }
1860
1861 static void core_log_global_start(MemoryListener *listener)
1862 {
1863 cpu_physical_memory_set_dirty_tracking(true);
1864 }
1865
1866 static void core_log_global_stop(MemoryListener *listener)
1867 {
1868 cpu_physical_memory_set_dirty_tracking(false);
1869 }
1870
1871 static MemoryListener core_memory_listener = {
1872 .log_global_start = core_log_global_start,
1873 .log_global_stop = core_log_global_stop,
1874 .priority = 1,
1875 };
1876
1877 void address_space_init_dispatch(AddressSpace *as)
1878 {
1879 as->dispatch = NULL;
1880 as->dispatch_listener = (MemoryListener) {
1881 .begin = mem_begin,
1882 .commit = mem_commit,
1883 .region_add = mem_add,
1884 .region_nop = mem_add,
1885 .priority = 0,
1886 };
1887 memory_listener_register(&as->dispatch_listener, as);
1888 }
1889
1890 void address_space_destroy_dispatch(AddressSpace *as)
1891 {
1892 AddressSpaceDispatch *d = as->dispatch;
1893
1894 memory_listener_unregister(&as->dispatch_listener);
1895 g_free(d);
1896 as->dispatch = NULL;
1897 }
1898
1899 static void memory_map_init(void)
1900 {
1901 system_memory = g_malloc(sizeof(*system_memory));
1902
1903 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1904 address_space_init(&address_space_memory, system_memory, "memory");
1905
1906 system_io = g_malloc(sizeof(*system_io));
1907 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1908 65536);
1909 address_space_init(&address_space_io, system_io, "I/O");
1910
1911 memory_listener_register(&core_memory_listener, &address_space_memory);
1912 }
1913
1914 MemoryRegion *get_system_memory(void)
1915 {
1916 return system_memory;
1917 }
1918
1919 MemoryRegion *get_system_io(void)
1920 {
1921 return system_io;
1922 }
1923
1924 #endif /* !defined(CONFIG_USER_ONLY) */
1925
1926 /* physical memory access (slow version, mainly for debug) */
1927 #if defined(CONFIG_USER_ONLY)
1928 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1929 uint8_t *buf, int len, int is_write)
1930 {
1931 int l, flags;
1932 target_ulong page;
1933 void * p;
1934
1935 while (len > 0) {
1936 page = addr & TARGET_PAGE_MASK;
1937 l = (page + TARGET_PAGE_SIZE) - addr;
1938 if (l > len)
1939 l = len;
1940 flags = page_get_flags(page);
1941 if (!(flags & PAGE_VALID))
1942 return -1;
1943 if (is_write) {
1944 if (!(flags & PAGE_WRITE))
1945 return -1;
1946 /* XXX: this code should not depend on lock_user */
1947 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1948 return -1;
1949 memcpy(p, buf, l);
1950 unlock_user(p, addr, l);
1951 } else {
1952 if (!(flags & PAGE_READ))
1953 return -1;
1954 /* XXX: this code should not depend on lock_user */
1955 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1956 return -1;
1957 memcpy(buf, p, l);
1958 unlock_user(p, addr, 0);
1959 }
1960 len -= l;
1961 buf += l;
1962 addr += l;
1963 }
1964 return 0;
1965 }
1966
1967 #else
1968
1969 static void invalidate_and_set_dirty(hwaddr addr,
1970 hwaddr length)
1971 {
1972 if (cpu_physical_memory_is_clean(addr)) {
1973 /* invalidate code */
1974 tb_invalidate_phys_page_range(addr, addr + length, 0);
1975 /* set dirty bit */
1976 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1977 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1978 }
1979 xen_modified_memory(addr, length);
1980 }
1981
1982 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1983 {
1984 unsigned access_size_max = mr->ops->valid.max_access_size;
1985
1986 /* Regions are assumed to support 1-4 byte accesses unless
1987 otherwise specified. */
1988 if (access_size_max == 0) {
1989 access_size_max = 4;
1990 }
1991
1992 /* Bound the maximum access by the alignment of the address. */
1993 if (!mr->ops->impl.unaligned) {
1994 unsigned align_size_max = addr & -addr;
1995 if (align_size_max != 0 && align_size_max < access_size_max) {
1996 access_size_max = align_size_max;
1997 }
1998 }
1999
2000 /* Don't attempt accesses larger than the maximum. */
2001 if (l > access_size_max) {
2002 l = access_size_max;
2003 }
2004 if (l & (l - 1)) {
2005 l = 1 << (qemu_fls(l) - 1);
2006 }
2007
2008 return l;
2009 }
2010
2011 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2012 int len, bool is_write)
2013 {
2014 hwaddr l;
2015 uint8_t *ptr;
2016 uint64_t val;
2017 hwaddr addr1;
2018 MemoryRegion *mr;
2019 bool error = false;
2020
2021 while (len > 0) {
2022 l = len;
2023 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2024
2025 if (is_write) {
2026 if (!memory_access_is_direct(mr, is_write)) {
2027 l = memory_access_size(mr, l, addr1);
2028 /* XXX: could force current_cpu to NULL to avoid
2029 potential bugs */
2030 switch (l) {
2031 case 8:
2032 /* 64 bit write access */
2033 val = ldq_p(buf);
2034 error |= io_mem_write(mr, addr1, val, 8);
2035 break;
2036 case 4:
2037 /* 32 bit write access */
2038 val = ldl_p(buf);
2039 error |= io_mem_write(mr, addr1, val, 4);
2040 break;
2041 case 2:
2042 /* 16 bit write access */
2043 val = lduw_p(buf);
2044 error |= io_mem_write(mr, addr1, val, 2);
2045 break;
2046 case 1:
2047 /* 8 bit write access */
2048 val = ldub_p(buf);
2049 error |= io_mem_write(mr, addr1, val, 1);
2050 break;
2051 default:
2052 abort();
2053 }
2054 } else {
2055 addr1 += memory_region_get_ram_addr(mr);
2056 /* RAM case */
2057 ptr = qemu_get_ram_ptr(addr1);
2058 memcpy(ptr, buf, l);
2059 invalidate_and_set_dirty(addr1, l);
2060 }
2061 } else {
2062 if (!memory_access_is_direct(mr, is_write)) {
2063 /* I/O case */
2064 l = memory_access_size(mr, l, addr1);
2065 switch (l) {
2066 case 8:
2067 /* 64 bit read access */
2068 error |= io_mem_read(mr, addr1, &val, 8);
2069 stq_p(buf, val);
2070 break;
2071 case 4:
2072 /* 32 bit read access */
2073 error |= io_mem_read(mr, addr1, &val, 4);
2074 stl_p(buf, val);
2075 break;
2076 case 2:
2077 /* 16 bit read access */
2078 error |= io_mem_read(mr, addr1, &val, 2);
2079 stw_p(buf, val);
2080 break;
2081 case 1:
2082 /* 8 bit read access */
2083 error |= io_mem_read(mr, addr1, &val, 1);
2084 stb_p(buf, val);
2085 break;
2086 default:
2087 abort();
2088 }
2089 } else {
2090 /* RAM case */
2091 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2092 memcpy(buf, ptr, l);
2093 }
2094 }
2095 len -= l;
2096 buf += l;
2097 addr += l;
2098 }
2099
2100 return error;
2101 }
2102
2103 bool address_space_write(AddressSpace *as, hwaddr addr,
2104 const uint8_t *buf, int len)
2105 {
2106 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2107 }
2108
2109 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2110 {
2111 return address_space_rw(as, addr, buf, len, false);
2112 }
2113
2114
2115 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2116 int len, int is_write)
2117 {
2118 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2119 }
2120
2121 enum write_rom_type {
2122 WRITE_DATA,
2123 FLUSH_CACHE,
2124 };
2125
2126 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2127 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2128 {
2129 hwaddr l;
2130 uint8_t *ptr;
2131 hwaddr addr1;
2132 MemoryRegion *mr;
2133
2134 while (len > 0) {
2135 l = len;
2136 mr = address_space_translate(as, addr, &addr1, &l, true);
2137
2138 if (!(memory_region_is_ram(mr) ||
2139 memory_region_is_romd(mr))) {
2140 /* do nothing */
2141 } else {
2142 addr1 += memory_region_get_ram_addr(mr);
2143 /* ROM/RAM case */
2144 ptr = qemu_get_ram_ptr(addr1);
2145 switch (type) {
2146 case WRITE_DATA:
2147 memcpy(ptr, buf, l);
2148 invalidate_and_set_dirty(addr1, l);
2149 break;
2150 case FLUSH_CACHE:
2151 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2152 break;
2153 }
2154 }
2155 len -= l;
2156 buf += l;
2157 addr += l;
2158 }
2159 }
2160
2161 /* used for ROM loading : can write in RAM and ROM */
2162 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2163 const uint8_t *buf, int len)
2164 {
2165 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2166 }
2167
2168 void cpu_flush_icache_range(hwaddr start, int len)
2169 {
2170 /*
2171 * This function should do the same thing as an icache flush that was
2172 * triggered from within the guest. For TCG we are always cache coherent,
2173 * so there is no need to flush anything. For KVM / Xen we need to flush
2174 * the host's instruction cache at least.
2175 */
2176 if (tcg_enabled()) {
2177 return;
2178 }
2179
2180 cpu_physical_memory_write_rom_internal(&address_space_memory,
2181 start, NULL, len, FLUSH_CACHE);
2182 }
2183
2184 typedef struct {
2185 MemoryRegion *mr;
2186 void *buffer;
2187 hwaddr addr;
2188 hwaddr len;
2189 } BounceBuffer;
2190
2191 static BounceBuffer bounce;
2192
2193 typedef struct MapClient {
2194 void *opaque;
2195 void (*callback)(void *opaque);
2196 QLIST_ENTRY(MapClient) link;
2197 } MapClient;
2198
2199 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2200 = QLIST_HEAD_INITIALIZER(map_client_list);
2201
2202 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2203 {
2204 MapClient *client = g_malloc(sizeof(*client));
2205
2206 client->opaque = opaque;
2207 client->callback = callback;
2208 QLIST_INSERT_HEAD(&map_client_list, client, link);
2209 return client;
2210 }
2211
2212 static void cpu_unregister_map_client(void *_client)
2213 {
2214 MapClient *client = (MapClient *)_client;
2215
2216 QLIST_REMOVE(client, link);
2217 g_free(client);
2218 }
2219
2220 static void cpu_notify_map_clients(void)
2221 {
2222 MapClient *client;
2223
2224 while (!QLIST_EMPTY(&map_client_list)) {
2225 client = QLIST_FIRST(&map_client_list);
2226 client->callback(client->opaque);
2227 cpu_unregister_map_client(client);
2228 }
2229 }
2230
2231 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2232 {
2233 MemoryRegion *mr;
2234 hwaddr l, xlat;
2235
2236 while (len > 0) {
2237 l = len;
2238 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2239 if (!memory_access_is_direct(mr, is_write)) {
2240 l = memory_access_size(mr, l, addr);
2241 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2242 return false;
2243 }
2244 }
2245
2246 len -= l;
2247 addr += l;
2248 }
2249 return true;
2250 }
2251
2252 /* Map a physical memory region into a host virtual address.
2253 * May map a subset of the requested range, given by and returned in *plen.
2254 * May return NULL if resources needed to perform the mapping are exhausted.
2255 * Use only for reads OR writes - not for read-modify-write operations.
2256 * Use cpu_register_map_client() to know when retrying the map operation is
2257 * likely to succeed.
2258 */
2259 void *address_space_map(AddressSpace *as,
2260 hwaddr addr,
2261 hwaddr *plen,
2262 bool is_write)
2263 {
2264 hwaddr len = *plen;
2265 hwaddr done = 0;
2266 hwaddr l, xlat, base;
2267 MemoryRegion *mr, *this_mr;
2268 ram_addr_t raddr;
2269
2270 if (len == 0) {
2271 return NULL;
2272 }
2273
2274 l = len;
2275 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2276 if (!memory_access_is_direct(mr, is_write)) {
2277 if (bounce.buffer) {
2278 return NULL;
2279 }
2280 /* Avoid unbounded allocations */
2281 l = MIN(l, TARGET_PAGE_SIZE);
2282 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2283 bounce.addr = addr;
2284 bounce.len = l;
2285
2286 memory_region_ref(mr);
2287 bounce.mr = mr;
2288 if (!is_write) {
2289 address_space_read(as, addr, bounce.buffer, l);
2290 }
2291
2292 *plen = l;
2293 return bounce.buffer;
2294 }
2295
2296 base = xlat;
2297 raddr = memory_region_get_ram_addr(mr);
2298
2299 for (;;) {
2300 len -= l;
2301 addr += l;
2302 done += l;
2303 if (len == 0) {
2304 break;
2305 }
2306
2307 l = len;
2308 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2309 if (this_mr != mr || xlat != base + done) {
2310 break;
2311 }
2312 }
2313
2314 memory_region_ref(mr);
2315 *plen = done;
2316 return qemu_ram_ptr_length(raddr + base, plen);
2317 }
2318
2319 /* Unmaps a memory region previously mapped by address_space_map().
2320 * Will also mark the memory as dirty if is_write == 1. access_len gives
2321 * the amount of memory that was actually read or written by the caller.
2322 */
2323 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2324 int is_write, hwaddr access_len)
2325 {
2326 if (buffer != bounce.buffer) {
2327 MemoryRegion *mr;
2328 ram_addr_t addr1;
2329
2330 mr = qemu_ram_addr_from_host(buffer, &addr1);
2331 assert(mr != NULL);
2332 if (is_write) {
2333 while (access_len) {
2334 unsigned l;
2335 l = TARGET_PAGE_SIZE;
2336 if (l > access_len)
2337 l = access_len;
2338 invalidate_and_set_dirty(addr1, l);
2339 addr1 += l;
2340 access_len -= l;
2341 }
2342 }
2343 if (xen_enabled()) {
2344 xen_invalidate_map_cache_entry(buffer);
2345 }
2346 memory_region_unref(mr);
2347 return;
2348 }
2349 if (is_write) {
2350 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2351 }
2352 qemu_vfree(bounce.buffer);
2353 bounce.buffer = NULL;
2354 memory_region_unref(bounce.mr);
2355 cpu_notify_map_clients();
2356 }
2357
2358 void *cpu_physical_memory_map(hwaddr addr,
2359 hwaddr *plen,
2360 int is_write)
2361 {
2362 return address_space_map(&address_space_memory, addr, plen, is_write);
2363 }
2364
2365 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2366 int is_write, hwaddr access_len)
2367 {
2368 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2369 }
2370
2371 /* warning: addr must be aligned */
2372 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2373 enum device_endian endian)
2374 {
2375 uint8_t *ptr;
2376 uint64_t val;
2377 MemoryRegion *mr;
2378 hwaddr l = 4;
2379 hwaddr addr1;
2380
2381 mr = address_space_translate(as, addr, &addr1, &l, false);
2382 if (l < 4 || !memory_access_is_direct(mr, false)) {
2383 /* I/O case */
2384 io_mem_read(mr, addr1, &val, 4);
2385 #if defined(TARGET_WORDS_BIGENDIAN)
2386 if (endian == DEVICE_LITTLE_ENDIAN) {
2387 val = bswap32(val);
2388 }
2389 #else
2390 if (endian == DEVICE_BIG_ENDIAN) {
2391 val = bswap32(val);
2392 }
2393 #endif
2394 } else {
2395 /* RAM case */
2396 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2397 & TARGET_PAGE_MASK)
2398 + addr1);
2399 switch (endian) {
2400 case DEVICE_LITTLE_ENDIAN:
2401 val = ldl_le_p(ptr);
2402 break;
2403 case DEVICE_BIG_ENDIAN:
2404 val = ldl_be_p(ptr);
2405 break;
2406 default:
2407 val = ldl_p(ptr);
2408 break;
2409 }
2410 }
2411 return val;
2412 }
2413
2414 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2415 {
2416 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2417 }
2418
2419 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2420 {
2421 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2422 }
2423
2424 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2425 {
2426 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2427 }
2428
2429 /* warning: addr must be aligned */
2430 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2431 enum device_endian endian)
2432 {
2433 uint8_t *ptr;
2434 uint64_t val;
2435 MemoryRegion *mr;
2436 hwaddr l = 8;
2437 hwaddr addr1;
2438
2439 mr = address_space_translate(as, addr, &addr1, &l,
2440 false);
2441 if (l < 8 || !memory_access_is_direct(mr, false)) {
2442 /* I/O case */
2443 io_mem_read(mr, addr1, &val, 8);
2444 #if defined(TARGET_WORDS_BIGENDIAN)
2445 if (endian == DEVICE_LITTLE_ENDIAN) {
2446 val = bswap64(val);
2447 }
2448 #else
2449 if (endian == DEVICE_BIG_ENDIAN) {
2450 val = bswap64(val);
2451 }
2452 #endif
2453 } else {
2454 /* RAM case */
2455 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2456 & TARGET_PAGE_MASK)
2457 + addr1);
2458 switch (endian) {
2459 case DEVICE_LITTLE_ENDIAN:
2460 val = ldq_le_p(ptr);
2461 break;
2462 case DEVICE_BIG_ENDIAN:
2463 val = ldq_be_p(ptr);
2464 break;
2465 default:
2466 val = ldq_p(ptr);
2467 break;
2468 }
2469 }
2470 return val;
2471 }
2472
2473 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2474 {
2475 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2476 }
2477
2478 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2479 {
2480 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2481 }
2482
2483 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2484 {
2485 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2486 }
2487
2488 /* XXX: optimize */
2489 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2490 {
2491 uint8_t val;
2492 address_space_rw(as, addr, &val, 1, 0);
2493 return val;
2494 }
2495
2496 /* warning: addr must be aligned */
2497 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2498 enum device_endian endian)
2499 {
2500 uint8_t *ptr;
2501 uint64_t val;
2502 MemoryRegion *mr;
2503 hwaddr l = 2;
2504 hwaddr addr1;
2505
2506 mr = address_space_translate(as, addr, &addr1, &l,
2507 false);
2508 if (l < 2 || !memory_access_is_direct(mr, false)) {
2509 /* I/O case */
2510 io_mem_read(mr, addr1, &val, 2);
2511 #if defined(TARGET_WORDS_BIGENDIAN)
2512 if (endian == DEVICE_LITTLE_ENDIAN) {
2513 val = bswap16(val);
2514 }
2515 #else
2516 if (endian == DEVICE_BIG_ENDIAN) {
2517 val = bswap16(val);
2518 }
2519 #endif
2520 } else {
2521 /* RAM case */
2522 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2523 & TARGET_PAGE_MASK)
2524 + addr1);
2525 switch (endian) {
2526 case DEVICE_LITTLE_ENDIAN:
2527 val = lduw_le_p(ptr);
2528 break;
2529 case DEVICE_BIG_ENDIAN:
2530 val = lduw_be_p(ptr);
2531 break;
2532 default:
2533 val = lduw_p(ptr);
2534 break;
2535 }
2536 }
2537 return val;
2538 }
2539
2540 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2541 {
2542 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2543 }
2544
2545 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2546 {
2547 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2548 }
2549
2550 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2551 {
2552 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2553 }
2554
2555 /* warning: addr must be aligned. The ram page is not masked as dirty
2556 and the code inside is not invalidated. It is useful if the dirty
2557 bits are used to track modified PTEs */
2558 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2559 {
2560 uint8_t *ptr;
2561 MemoryRegion *mr;
2562 hwaddr l = 4;
2563 hwaddr addr1;
2564
2565 mr = address_space_translate(as, addr, &addr1, &l,
2566 true);
2567 if (l < 4 || !memory_access_is_direct(mr, true)) {
2568 io_mem_write(mr, addr1, val, 4);
2569 } else {
2570 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2571 ptr = qemu_get_ram_ptr(addr1);
2572 stl_p(ptr, val);
2573
2574 if (unlikely(in_migration)) {
2575 if (cpu_physical_memory_is_clean(addr1)) {
2576 /* invalidate code */
2577 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2578 /* set dirty bit */
2579 cpu_physical_memory_set_dirty_flag(addr1,
2580 DIRTY_MEMORY_MIGRATION);
2581 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2582 }
2583 }
2584 }
2585 }
2586
2587 /* warning: addr must be aligned */
2588 static inline void stl_phys_internal(AddressSpace *as,
2589 hwaddr addr, uint32_t val,
2590 enum device_endian endian)
2591 {
2592 uint8_t *ptr;
2593 MemoryRegion *mr;
2594 hwaddr l = 4;
2595 hwaddr addr1;
2596
2597 mr = address_space_translate(as, addr, &addr1, &l,
2598 true);
2599 if (l < 4 || !memory_access_is_direct(mr, true)) {
2600 #if defined(TARGET_WORDS_BIGENDIAN)
2601 if (endian == DEVICE_LITTLE_ENDIAN) {
2602 val = bswap32(val);
2603 }
2604 #else
2605 if (endian == DEVICE_BIG_ENDIAN) {
2606 val = bswap32(val);
2607 }
2608 #endif
2609 io_mem_write(mr, addr1, val, 4);
2610 } else {
2611 /* RAM case */
2612 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2613 ptr = qemu_get_ram_ptr(addr1);
2614 switch (endian) {
2615 case DEVICE_LITTLE_ENDIAN:
2616 stl_le_p(ptr, val);
2617 break;
2618 case DEVICE_BIG_ENDIAN:
2619 stl_be_p(ptr, val);
2620 break;
2621 default:
2622 stl_p(ptr, val);
2623 break;
2624 }
2625 invalidate_and_set_dirty(addr1, 4);
2626 }
2627 }
2628
2629 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2630 {
2631 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2632 }
2633
2634 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2635 {
2636 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2637 }
2638
2639 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2640 {
2641 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2642 }
2643
2644 /* XXX: optimize */
2645 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2646 {
2647 uint8_t v = val;
2648 address_space_rw(as, addr, &v, 1, 1);
2649 }
2650
2651 /* warning: addr must be aligned */
2652 static inline void stw_phys_internal(AddressSpace *as,
2653 hwaddr addr, uint32_t val,
2654 enum device_endian endian)
2655 {
2656 uint8_t *ptr;
2657 MemoryRegion *mr;
2658 hwaddr l = 2;
2659 hwaddr addr1;
2660
2661 mr = address_space_translate(as, addr, &addr1, &l, true);
2662 if (l < 2 || !memory_access_is_direct(mr, true)) {
2663 #if defined(TARGET_WORDS_BIGENDIAN)
2664 if (endian == DEVICE_LITTLE_ENDIAN) {
2665 val = bswap16(val);
2666 }
2667 #else
2668 if (endian == DEVICE_BIG_ENDIAN) {
2669 val = bswap16(val);
2670 }
2671 #endif
2672 io_mem_write(mr, addr1, val, 2);
2673 } else {
2674 /* RAM case */
2675 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2676 ptr = qemu_get_ram_ptr(addr1);
2677 switch (endian) {
2678 case DEVICE_LITTLE_ENDIAN:
2679 stw_le_p(ptr, val);
2680 break;
2681 case DEVICE_BIG_ENDIAN:
2682 stw_be_p(ptr, val);
2683 break;
2684 default:
2685 stw_p(ptr, val);
2686 break;
2687 }
2688 invalidate_and_set_dirty(addr1, 2);
2689 }
2690 }
2691
2692 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2693 {
2694 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2695 }
2696
2697 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2698 {
2699 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2700 }
2701
2702 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2703 {
2704 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2705 }
2706
2707 /* XXX: optimize */
2708 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2709 {
2710 val = tswap64(val);
2711 address_space_rw(as, addr, (void *) &val, 8, 1);
2712 }
2713
2714 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2715 {
2716 val = cpu_to_le64(val);
2717 address_space_rw(as, addr, (void *) &val, 8, 1);
2718 }
2719
2720 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2721 {
2722 val = cpu_to_be64(val);
2723 address_space_rw(as, addr, (void *) &val, 8, 1);
2724 }
2725
2726 /* virtual memory access for debug (includes writing to ROM) */
2727 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2728 uint8_t *buf, int len, int is_write)
2729 {
2730 int l;
2731 hwaddr phys_addr;
2732 target_ulong page;
2733
2734 while (len > 0) {
2735 page = addr & TARGET_PAGE_MASK;
2736 phys_addr = cpu_get_phys_page_debug(cpu, page);
2737 /* if no physical page mapped, return an error */
2738 if (phys_addr == -1)
2739 return -1;
2740 l = (page + TARGET_PAGE_SIZE) - addr;
2741 if (l > len)
2742 l = len;
2743 phys_addr += (addr & ~TARGET_PAGE_MASK);
2744 if (is_write) {
2745 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2746 } else {
2747 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2748 }
2749 len -= l;
2750 buf += l;
2751 addr += l;
2752 }
2753 return 0;
2754 }
2755 #endif
2756
2757 #if !defined(CONFIG_USER_ONLY)
2758
2759 /*
2760 * A helper function for the _utterly broken_ virtio device model to find out if
2761 * it's running on a big endian machine. Don't do this at home kids!
2762 */
2763 bool virtio_is_big_endian(void);
2764 bool virtio_is_big_endian(void)
2765 {
2766 #if defined(TARGET_WORDS_BIGENDIAN)
2767 return true;
2768 #else
2769 return false;
2770 #endif
2771 }
2772
2773 #endif
2774
2775 #ifndef CONFIG_USER_ONLY
2776 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2777 {
2778 MemoryRegion*mr;
2779 hwaddr l = 1;
2780
2781 mr = address_space_translate(&address_space_memory,
2782 phys_addr, &phys_addr, &l, false);
2783
2784 return !(memory_region_is_ram(mr) ||
2785 memory_region_is_romd(mr));
2786 }
2787
2788 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2789 {
2790 RAMBlock *block;
2791
2792 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2793 func(block->host, block->offset, block->length, opaque);
2794 }
2795 }
2796 #endif