exec: Change cpu_breakpoint_{insert,remove{,_by_ref,_all}} argument
[qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 #endif
74
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
77 cpu_exec() */
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
82 int use_icount;
83
84 #if !defined(CONFIG_USER_ONLY)
85
86 typedef struct PhysPageEntry PhysPageEntry;
87
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 uint32_t skip : 6;
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
92 uint32_t ptr : 26;
93 };
94
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
96
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
99
100 #define P_L2_BITS 9
101 #define P_L2_SIZE (1 << P_L2_BITS)
102
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
104
105 typedef PhysPageEntry Node[P_L2_SIZE];
106
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
110 unsigned nodes_nb;
111 unsigned nodes_nb_alloc;
112 Node *nodes;
113 MemoryRegionSection *sections;
114 } PhysPageMap;
115
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
119 */
120 PhysPageEntry phys_map;
121 PhysPageMap map;
122 AddressSpace *as;
123 };
124
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
127 MemoryRegion iomem;
128 AddressSpace *as;
129 hwaddr base;
130 uint16_t sub_section[TARGET_PAGE_SIZE];
131 } subpage_t;
132
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
137
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140 static void tcg_commit(MemoryListener *listener);
141
142 static MemoryRegion io_mem_watch;
143 #endif
144
145 #if !defined(CONFIG_USER_ONLY)
146
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
148 {
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
153 }
154 }
155
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
157 {
158 unsigned i;
159 uint32_t ret;
160
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
167 }
168 return ret;
169 }
170
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
173 int level)
174 {
175 PhysPageEntry *p;
176 int i;
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
178
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
182 if (level == 0) {
183 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].skip = 0;
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
186 }
187 }
188 } else {
189 p = map->nodes[lp->ptr];
190 }
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
192
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 lp->skip = 0;
196 lp->ptr = leaf;
197 *index += step;
198 *nb -= step;
199 } else {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
201 }
202 ++lp;
203 }
204 }
205
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
208 uint16_t leaf)
209 {
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
212
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
214 }
215
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
218 */
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
220 {
221 unsigned valid_ptr = P_L2_SIZE;
222 int valid = 0;
223 PhysPageEntry *p;
224 int i;
225
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
227 return;
228 }
229
230 p = nodes[lp->ptr];
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
233 continue;
234 }
235
236 valid_ptr = i;
237 valid++;
238 if (p[i].skip) {
239 phys_page_compact(&p[i], nodes, compacted);
240 }
241 }
242
243 /* We can only compress if there's only one child. */
244 if (valid != 1) {
245 return;
246 }
247
248 assert(valid_ptr < P_L2_SIZE);
249
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
252 return;
253 }
254
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
261 * change this rule.
262 */
263 lp->skip = 0;
264 } else {
265 lp->skip += p[valid_ptr].skip;
266 }
267 }
268
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
270 {
271 DECLARE_BITMAP(compacted, nodes_nb);
272
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
275 }
276 }
277
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
280 {
281 PhysPageEntry *p;
282 hwaddr index = addr >> TARGET_PAGE_BITS;
283 int i;
284
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return &sections[PHYS_SECTION_UNASSIGNED];
288 }
289 p = nodes[lp.ptr];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
291 }
292
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return &sections[lp.ptr];
297 } else {
298 return &sections[PHYS_SECTION_UNASSIGNED];
299 }
300 }
301
302 bool memory_region_is_unassigned(MemoryRegion *mr)
303 {
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
306 }
307
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 hwaddr addr,
310 bool resolve_subpage)
311 {
312 MemoryRegionSection *section;
313 subpage_t *subpage;
314
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
319 }
320 return section;
321 }
322
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
326 {
327 MemoryRegionSection *section;
328 Int128 diff;
329
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
333
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
336
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
339 return section;
340 }
341
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
343 {
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
346 }
347 if (memory_region_is_romd(mr)) {
348 return !is_write;
349 }
350
351 return false;
352 }
353
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
356 bool is_write)
357 {
358 IOMMUTLBEntry iotlb;
359 MemoryRegionSection *section;
360 MemoryRegion *mr;
361 hwaddr len = *plen;
362
363 for (;;) {
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
365 mr = section->mr;
366
367 if (!mr->iommu_ops) {
368 break;
369 }
370
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
377 break;
378 }
379
380 as = iotlb.target_as;
381 }
382
383 if (memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
386 }
387
388 *plen = len;
389 *xlat = addr;
390 return mr;
391 }
392
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
395 hwaddr *plen)
396 {
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
399
400 assert(!section->mr->iommu_ops);
401 return section;
402 }
403 #endif
404
405 void cpu_exec_init_all(void)
406 {
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
409 memory_map_init();
410 io_mem_init();
411 #endif
412 }
413
414 #if !defined(CONFIG_USER_ONLY)
415
416 static int cpu_common_post_load(void *opaque, int version_id)
417 {
418 CPUState *cpu = opaque;
419
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu->env_ptr, 1);
424
425 return 0;
426 }
427
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
430 .version_id = 1,
431 .minimum_version_id = 1,
432 .minimum_version_id_old = 1,
433 .post_load = cpu_common_post_load,
434 .fields = (VMStateField []) {
435 VMSTATE_UINT32(halted, CPUState),
436 VMSTATE_UINT32(interrupt_request, CPUState),
437 VMSTATE_END_OF_LIST()
438 }
439 };
440
441 #endif
442
443 CPUState *qemu_get_cpu(int index)
444 {
445 CPUState *cpu;
446
447 CPU_FOREACH(cpu) {
448 if (cpu->cpu_index == index) {
449 return cpu;
450 }
451 }
452
453 return NULL;
454 }
455
456 #if !defined(CONFIG_USER_ONLY)
457 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
458 {
459 /* We only support one address space per cpu at the moment. */
460 assert(cpu->as == as);
461
462 if (cpu->tcg_as_listener) {
463 memory_listener_unregister(cpu->tcg_as_listener);
464 } else {
465 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
466 }
467 cpu->tcg_as_listener->commit = tcg_commit;
468 memory_listener_register(cpu->tcg_as_listener, as);
469 }
470 #endif
471
472 void cpu_exec_init(CPUArchState *env)
473 {
474 CPUState *cpu = ENV_GET_CPU(env);
475 CPUClass *cc = CPU_GET_CLASS(cpu);
476 CPUState *some_cpu;
477 int cpu_index;
478
479 #if defined(CONFIG_USER_ONLY)
480 cpu_list_lock();
481 #endif
482 cpu_index = 0;
483 CPU_FOREACH(some_cpu) {
484 cpu_index++;
485 }
486 cpu->cpu_index = cpu_index;
487 cpu->numa_node = 0;
488 QTAILQ_INIT(&cpu->breakpoints);
489 QTAILQ_INIT(&cpu->watchpoints);
490 #ifndef CONFIG_USER_ONLY
491 cpu->as = &address_space_memory;
492 cpu->thread_id = qemu_get_thread_id();
493 #endif
494 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
495 #if defined(CONFIG_USER_ONLY)
496 cpu_list_unlock();
497 #endif
498 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
499 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
500 }
501 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
502 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
503 cpu_save, cpu_load, env);
504 assert(cc->vmsd == NULL);
505 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
506 #endif
507 if (cc->vmsd != NULL) {
508 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
509 }
510 }
511
512 #if defined(TARGET_HAS_ICE)
513 #if defined(CONFIG_USER_ONLY)
514 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
515 {
516 tb_invalidate_phys_page_range(pc, pc + 1, 0);
517 }
518 #else
519 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
520 {
521 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
522 if (phys != -1) {
523 tb_invalidate_phys_addr(cpu->as,
524 phys | (pc & ~TARGET_PAGE_MASK));
525 }
526 }
527 #endif
528 #endif /* TARGET_HAS_ICE */
529
530 #if defined(CONFIG_USER_ONLY)
531 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
532
533 {
534 }
535
536 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
537 int flags, CPUWatchpoint **watchpoint)
538 {
539 return -ENOSYS;
540 }
541 #else
542 /* Add a watchpoint. */
543 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
544 int flags, CPUWatchpoint **watchpoint)
545 {
546 CPUArchState *env = cpu->env_ptr;
547 vaddr len_mask = ~(len - 1);
548 CPUWatchpoint *wp;
549
550 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
551 if ((len & (len - 1)) || (addr & ~len_mask) ||
552 len == 0 || len > TARGET_PAGE_SIZE) {
553 error_report("tried to set invalid watchpoint at %"
554 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
555 return -EINVAL;
556 }
557 wp = g_malloc(sizeof(*wp));
558
559 wp->vaddr = addr;
560 wp->len_mask = len_mask;
561 wp->flags = flags;
562
563 /* keep all GDB-injected watchpoints in front */
564 if (flags & BP_GDB) {
565 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
566 } else {
567 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
568 }
569
570 tlb_flush_page(env, addr);
571
572 if (watchpoint)
573 *watchpoint = wp;
574 return 0;
575 }
576
577 /* Remove a specific watchpoint. */
578 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
579 int flags)
580 {
581 vaddr len_mask = ~(len - 1);
582 CPUWatchpoint *wp;
583
584 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
585 if (addr == wp->vaddr && len_mask == wp->len_mask
586 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
587 cpu_watchpoint_remove_by_ref(cpu, wp);
588 return 0;
589 }
590 }
591 return -ENOENT;
592 }
593
594 /* Remove a specific watchpoint by reference. */
595 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
596 {
597 CPUArchState *env = cpu->env_ptr;
598
599 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
600
601 tlb_flush_page(env, watchpoint->vaddr);
602
603 g_free(watchpoint);
604 }
605
606 /* Remove all matching watchpoints. */
607 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
608 {
609 CPUWatchpoint *wp, *next;
610
611 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
612 if (wp->flags & mask) {
613 cpu_watchpoint_remove_by_ref(cpu, wp);
614 }
615 }
616 }
617 #endif
618
619 /* Add a breakpoint. */
620 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
621 CPUBreakpoint **breakpoint)
622 {
623 #if defined(TARGET_HAS_ICE)
624 CPUBreakpoint *bp;
625
626 bp = g_malloc(sizeof(*bp));
627
628 bp->pc = pc;
629 bp->flags = flags;
630
631 /* keep all GDB-injected breakpoints in front */
632 if (flags & BP_GDB) {
633 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
634 } else {
635 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
636 }
637
638 breakpoint_invalidate(cpu, pc);
639
640 if (breakpoint) {
641 *breakpoint = bp;
642 }
643 return 0;
644 #else
645 return -ENOSYS;
646 #endif
647 }
648
649 /* Remove a specific breakpoint. */
650 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
651 {
652 #if defined(TARGET_HAS_ICE)
653 CPUBreakpoint *bp;
654
655 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
656 if (bp->pc == pc && bp->flags == flags) {
657 cpu_breakpoint_remove_by_ref(cpu, bp);
658 return 0;
659 }
660 }
661 return -ENOENT;
662 #else
663 return -ENOSYS;
664 #endif
665 }
666
667 /* Remove a specific breakpoint by reference. */
668 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
669 {
670 #if defined(TARGET_HAS_ICE)
671 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
672
673 breakpoint_invalidate(cpu, breakpoint->pc);
674
675 g_free(breakpoint);
676 #endif
677 }
678
679 /* Remove all matching breakpoints. */
680 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
681 {
682 #if defined(TARGET_HAS_ICE)
683 CPUBreakpoint *bp, *next;
684
685 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
686 if (bp->flags & mask) {
687 cpu_breakpoint_remove_by_ref(cpu, bp);
688 }
689 }
690 #endif
691 }
692
693 /* enable or disable single step mode. EXCP_DEBUG is returned by the
694 CPU loop after each instruction */
695 void cpu_single_step(CPUState *cpu, int enabled)
696 {
697 #if defined(TARGET_HAS_ICE)
698 if (cpu->singlestep_enabled != enabled) {
699 cpu->singlestep_enabled = enabled;
700 if (kvm_enabled()) {
701 kvm_update_guest_debug(cpu, 0);
702 } else {
703 /* must flush all the translated code to avoid inconsistencies */
704 /* XXX: only flush what is necessary */
705 CPUArchState *env = cpu->env_ptr;
706 tb_flush(env);
707 }
708 }
709 #endif
710 }
711
712 void cpu_abort(CPUArchState *env, const char *fmt, ...)
713 {
714 CPUState *cpu = ENV_GET_CPU(env);
715 va_list ap;
716 va_list ap2;
717
718 va_start(ap, fmt);
719 va_copy(ap2, ap);
720 fprintf(stderr, "qemu: fatal: ");
721 vfprintf(stderr, fmt, ap);
722 fprintf(stderr, "\n");
723 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
724 if (qemu_log_enabled()) {
725 qemu_log("qemu: fatal: ");
726 qemu_log_vprintf(fmt, ap2);
727 qemu_log("\n");
728 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
729 qemu_log_flush();
730 qemu_log_close();
731 }
732 va_end(ap2);
733 va_end(ap);
734 #if defined(CONFIG_USER_ONLY)
735 {
736 struct sigaction act;
737 sigfillset(&act.sa_mask);
738 act.sa_handler = SIG_DFL;
739 sigaction(SIGABRT, &act, NULL);
740 }
741 #endif
742 abort();
743 }
744
745 #if !defined(CONFIG_USER_ONLY)
746 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
747 {
748 RAMBlock *block;
749
750 /* The list is protected by the iothread lock here. */
751 block = ram_list.mru_block;
752 if (block && addr - block->offset < block->length) {
753 goto found;
754 }
755 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
756 if (addr - block->offset < block->length) {
757 goto found;
758 }
759 }
760
761 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
762 abort();
763
764 found:
765 ram_list.mru_block = block;
766 return block;
767 }
768
769 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
770 {
771 ram_addr_t start1;
772 RAMBlock *block;
773 ram_addr_t end;
774
775 end = TARGET_PAGE_ALIGN(start + length);
776 start &= TARGET_PAGE_MASK;
777
778 block = qemu_get_ram_block(start);
779 assert(block == qemu_get_ram_block(end - 1));
780 start1 = (uintptr_t)block->host + (start - block->offset);
781 cpu_tlb_reset_dirty_all(start1, length);
782 }
783
784 /* Note: start and end must be within the same ram block. */
785 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
786 unsigned client)
787 {
788 if (length == 0)
789 return;
790 cpu_physical_memory_clear_dirty_range(start, length, client);
791
792 if (tcg_enabled()) {
793 tlb_reset_dirty_range_all(start, length);
794 }
795 }
796
797 static void cpu_physical_memory_set_dirty_tracking(bool enable)
798 {
799 in_migration = enable;
800 }
801
802 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
803 MemoryRegionSection *section,
804 target_ulong vaddr,
805 hwaddr paddr, hwaddr xlat,
806 int prot,
807 target_ulong *address)
808 {
809 CPUState *cpu = ENV_GET_CPU(env);
810 hwaddr iotlb;
811 CPUWatchpoint *wp;
812
813 if (memory_region_is_ram(section->mr)) {
814 /* Normal RAM. */
815 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
816 + xlat;
817 if (!section->readonly) {
818 iotlb |= PHYS_SECTION_NOTDIRTY;
819 } else {
820 iotlb |= PHYS_SECTION_ROM;
821 }
822 } else {
823 iotlb = section - section->address_space->dispatch->map.sections;
824 iotlb += xlat;
825 }
826
827 /* Make accesses to pages with watchpoints go via the
828 watchpoint trap routines. */
829 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
830 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
831 /* Avoid trapping reads of pages with a write breakpoint. */
832 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
833 iotlb = PHYS_SECTION_WATCH + paddr;
834 *address |= TLB_MMIO;
835 break;
836 }
837 }
838 }
839
840 return iotlb;
841 }
842 #endif /* defined(CONFIG_USER_ONLY) */
843
844 #if !defined(CONFIG_USER_ONLY)
845
846 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
847 uint16_t section);
848 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
849
850 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
851
852 /*
853 * Set a custom physical guest memory alloator.
854 * Accelerators with unusual needs may need this. Hopefully, we can
855 * get rid of it eventually.
856 */
857 void phys_mem_set_alloc(void *(*alloc)(size_t))
858 {
859 phys_mem_alloc = alloc;
860 }
861
862 static uint16_t phys_section_add(PhysPageMap *map,
863 MemoryRegionSection *section)
864 {
865 /* The physical section number is ORed with a page-aligned
866 * pointer to produce the iotlb entries. Thus it should
867 * never overflow into the page-aligned value.
868 */
869 assert(map->sections_nb < TARGET_PAGE_SIZE);
870
871 if (map->sections_nb == map->sections_nb_alloc) {
872 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
873 map->sections = g_renew(MemoryRegionSection, map->sections,
874 map->sections_nb_alloc);
875 }
876 map->sections[map->sections_nb] = *section;
877 memory_region_ref(section->mr);
878 return map->sections_nb++;
879 }
880
881 static void phys_section_destroy(MemoryRegion *mr)
882 {
883 memory_region_unref(mr);
884
885 if (mr->subpage) {
886 subpage_t *subpage = container_of(mr, subpage_t, iomem);
887 memory_region_destroy(&subpage->iomem);
888 g_free(subpage);
889 }
890 }
891
892 static void phys_sections_free(PhysPageMap *map)
893 {
894 while (map->sections_nb > 0) {
895 MemoryRegionSection *section = &map->sections[--map->sections_nb];
896 phys_section_destroy(section->mr);
897 }
898 g_free(map->sections);
899 g_free(map->nodes);
900 }
901
902 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
903 {
904 subpage_t *subpage;
905 hwaddr base = section->offset_within_address_space
906 & TARGET_PAGE_MASK;
907 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
908 d->map.nodes, d->map.sections);
909 MemoryRegionSection subsection = {
910 .offset_within_address_space = base,
911 .size = int128_make64(TARGET_PAGE_SIZE),
912 };
913 hwaddr start, end;
914
915 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
916
917 if (!(existing->mr->subpage)) {
918 subpage = subpage_init(d->as, base);
919 subsection.address_space = d->as;
920 subsection.mr = &subpage->iomem;
921 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
922 phys_section_add(&d->map, &subsection));
923 } else {
924 subpage = container_of(existing->mr, subpage_t, iomem);
925 }
926 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
927 end = start + int128_get64(section->size) - 1;
928 subpage_register(subpage, start, end,
929 phys_section_add(&d->map, section));
930 }
931
932
933 static void register_multipage(AddressSpaceDispatch *d,
934 MemoryRegionSection *section)
935 {
936 hwaddr start_addr = section->offset_within_address_space;
937 uint16_t section_index = phys_section_add(&d->map, section);
938 uint64_t num_pages = int128_get64(int128_rshift(section->size,
939 TARGET_PAGE_BITS));
940
941 assert(num_pages);
942 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
943 }
944
945 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
946 {
947 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
948 AddressSpaceDispatch *d = as->next_dispatch;
949 MemoryRegionSection now = *section, remain = *section;
950 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
951
952 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
953 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
954 - now.offset_within_address_space;
955
956 now.size = int128_min(int128_make64(left), now.size);
957 register_subpage(d, &now);
958 } else {
959 now.size = int128_zero();
960 }
961 while (int128_ne(remain.size, now.size)) {
962 remain.size = int128_sub(remain.size, now.size);
963 remain.offset_within_address_space += int128_get64(now.size);
964 remain.offset_within_region += int128_get64(now.size);
965 now = remain;
966 if (int128_lt(remain.size, page_size)) {
967 register_subpage(d, &now);
968 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
969 now.size = page_size;
970 register_subpage(d, &now);
971 } else {
972 now.size = int128_and(now.size, int128_neg(page_size));
973 register_multipage(d, &now);
974 }
975 }
976 }
977
978 void qemu_flush_coalesced_mmio_buffer(void)
979 {
980 if (kvm_enabled())
981 kvm_flush_coalesced_mmio_buffer();
982 }
983
984 void qemu_mutex_lock_ramlist(void)
985 {
986 qemu_mutex_lock(&ram_list.mutex);
987 }
988
989 void qemu_mutex_unlock_ramlist(void)
990 {
991 qemu_mutex_unlock(&ram_list.mutex);
992 }
993
994 #ifdef __linux__
995
996 #include <sys/vfs.h>
997
998 #define HUGETLBFS_MAGIC 0x958458f6
999
1000 static long gethugepagesize(const char *path)
1001 {
1002 struct statfs fs;
1003 int ret;
1004
1005 do {
1006 ret = statfs(path, &fs);
1007 } while (ret != 0 && errno == EINTR);
1008
1009 if (ret != 0) {
1010 perror(path);
1011 return 0;
1012 }
1013
1014 if (fs.f_type != HUGETLBFS_MAGIC)
1015 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1016
1017 return fs.f_bsize;
1018 }
1019
1020 static sigjmp_buf sigjump;
1021
1022 static void sigbus_handler(int signal)
1023 {
1024 siglongjmp(sigjump, 1);
1025 }
1026
1027 static void *file_ram_alloc(RAMBlock *block,
1028 ram_addr_t memory,
1029 const char *path)
1030 {
1031 char *filename;
1032 char *sanitized_name;
1033 char *c;
1034 void *area;
1035 int fd;
1036 unsigned long hpagesize;
1037
1038 hpagesize = gethugepagesize(path);
1039 if (!hpagesize) {
1040 goto error;
1041 }
1042
1043 if (memory < hpagesize) {
1044 return NULL;
1045 }
1046
1047 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1048 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1049 goto error;
1050 }
1051
1052 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1053 sanitized_name = g_strdup(block->mr->name);
1054 for (c = sanitized_name; *c != '\0'; c++) {
1055 if (*c == '/')
1056 *c = '_';
1057 }
1058
1059 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1060 sanitized_name);
1061 g_free(sanitized_name);
1062
1063 fd = mkstemp(filename);
1064 if (fd < 0) {
1065 perror("unable to create backing store for hugepages");
1066 g_free(filename);
1067 goto error;
1068 }
1069 unlink(filename);
1070 g_free(filename);
1071
1072 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1073
1074 /*
1075 * ftruncate is not supported by hugetlbfs in older
1076 * hosts, so don't bother bailing out on errors.
1077 * If anything goes wrong with it under other filesystems,
1078 * mmap will fail.
1079 */
1080 if (ftruncate(fd, memory))
1081 perror("ftruncate");
1082
1083 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1084 if (area == MAP_FAILED) {
1085 perror("file_ram_alloc: can't mmap RAM pages");
1086 close(fd);
1087 goto error;
1088 }
1089
1090 if (mem_prealloc) {
1091 int ret, i;
1092 struct sigaction act, oldact;
1093 sigset_t set, oldset;
1094
1095 memset(&act, 0, sizeof(act));
1096 act.sa_handler = &sigbus_handler;
1097 act.sa_flags = 0;
1098
1099 ret = sigaction(SIGBUS, &act, &oldact);
1100 if (ret) {
1101 perror("file_ram_alloc: failed to install signal handler");
1102 exit(1);
1103 }
1104
1105 /* unblock SIGBUS */
1106 sigemptyset(&set);
1107 sigaddset(&set, SIGBUS);
1108 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1109
1110 if (sigsetjmp(sigjump, 1)) {
1111 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1112 exit(1);
1113 }
1114
1115 /* MAP_POPULATE silently ignores failures */
1116 for (i = 0; i < (memory/hpagesize); i++) {
1117 memset(area + (hpagesize*i), 0, 1);
1118 }
1119
1120 ret = sigaction(SIGBUS, &oldact, NULL);
1121 if (ret) {
1122 perror("file_ram_alloc: failed to reinstall signal handler");
1123 exit(1);
1124 }
1125
1126 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1127 }
1128
1129 block->fd = fd;
1130 return area;
1131
1132 error:
1133 if (mem_prealloc) {
1134 exit(1);
1135 }
1136 return NULL;
1137 }
1138 #else
1139 static void *file_ram_alloc(RAMBlock *block,
1140 ram_addr_t memory,
1141 const char *path)
1142 {
1143 fprintf(stderr, "-mem-path not supported on this host\n");
1144 exit(1);
1145 }
1146 #endif
1147
1148 static ram_addr_t find_ram_offset(ram_addr_t size)
1149 {
1150 RAMBlock *block, *next_block;
1151 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1152
1153 assert(size != 0); /* it would hand out same offset multiple times */
1154
1155 if (QTAILQ_EMPTY(&ram_list.blocks))
1156 return 0;
1157
1158 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1159 ram_addr_t end, next = RAM_ADDR_MAX;
1160
1161 end = block->offset + block->length;
1162
1163 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1164 if (next_block->offset >= end) {
1165 next = MIN(next, next_block->offset);
1166 }
1167 }
1168 if (next - end >= size && next - end < mingap) {
1169 offset = end;
1170 mingap = next - end;
1171 }
1172 }
1173
1174 if (offset == RAM_ADDR_MAX) {
1175 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1176 (uint64_t)size);
1177 abort();
1178 }
1179
1180 return offset;
1181 }
1182
1183 ram_addr_t last_ram_offset(void)
1184 {
1185 RAMBlock *block;
1186 ram_addr_t last = 0;
1187
1188 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1189 last = MAX(last, block->offset + block->length);
1190
1191 return last;
1192 }
1193
1194 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1195 {
1196 int ret;
1197
1198 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1199 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1200 "dump-guest-core", true)) {
1201 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1202 if (ret) {
1203 perror("qemu_madvise");
1204 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1205 "but dump_guest_core=off specified\n");
1206 }
1207 }
1208 }
1209
1210 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1211 {
1212 RAMBlock *new_block, *block;
1213
1214 new_block = NULL;
1215 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1216 if (block->offset == addr) {
1217 new_block = block;
1218 break;
1219 }
1220 }
1221 assert(new_block);
1222 assert(!new_block->idstr[0]);
1223
1224 if (dev) {
1225 char *id = qdev_get_dev_path(dev);
1226 if (id) {
1227 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1228 g_free(id);
1229 }
1230 }
1231 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1232
1233 /* This assumes the iothread lock is taken here too. */
1234 qemu_mutex_lock_ramlist();
1235 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1236 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1237 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1238 new_block->idstr);
1239 abort();
1240 }
1241 }
1242 qemu_mutex_unlock_ramlist();
1243 }
1244
1245 static int memory_try_enable_merging(void *addr, size_t len)
1246 {
1247 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1248 /* disabled by the user */
1249 return 0;
1250 }
1251
1252 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1253 }
1254
1255 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1256 MemoryRegion *mr)
1257 {
1258 RAMBlock *block, *new_block;
1259 ram_addr_t old_ram_size, new_ram_size;
1260
1261 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1262
1263 size = TARGET_PAGE_ALIGN(size);
1264 new_block = g_malloc0(sizeof(*new_block));
1265 new_block->fd = -1;
1266
1267 /* This assumes the iothread lock is taken here too. */
1268 qemu_mutex_lock_ramlist();
1269 new_block->mr = mr;
1270 new_block->offset = find_ram_offset(size);
1271 if (host) {
1272 new_block->host = host;
1273 new_block->flags |= RAM_PREALLOC_MASK;
1274 } else if (xen_enabled()) {
1275 if (mem_path) {
1276 fprintf(stderr, "-mem-path not supported with Xen\n");
1277 exit(1);
1278 }
1279 xen_ram_alloc(new_block->offset, size, mr);
1280 } else {
1281 if (mem_path) {
1282 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1283 /*
1284 * file_ram_alloc() needs to allocate just like
1285 * phys_mem_alloc, but we haven't bothered to provide
1286 * a hook there.
1287 */
1288 fprintf(stderr,
1289 "-mem-path not supported with this accelerator\n");
1290 exit(1);
1291 }
1292 new_block->host = file_ram_alloc(new_block, size, mem_path);
1293 }
1294 if (!new_block->host) {
1295 new_block->host = phys_mem_alloc(size);
1296 if (!new_block->host) {
1297 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1298 new_block->mr->name, strerror(errno));
1299 exit(1);
1300 }
1301 memory_try_enable_merging(new_block->host, size);
1302 }
1303 }
1304 new_block->length = size;
1305
1306 /* Keep the list sorted from biggest to smallest block. */
1307 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1308 if (block->length < new_block->length) {
1309 break;
1310 }
1311 }
1312 if (block) {
1313 QTAILQ_INSERT_BEFORE(block, new_block, next);
1314 } else {
1315 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1316 }
1317 ram_list.mru_block = NULL;
1318
1319 ram_list.version++;
1320 qemu_mutex_unlock_ramlist();
1321
1322 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1323
1324 if (new_ram_size > old_ram_size) {
1325 int i;
1326 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1327 ram_list.dirty_memory[i] =
1328 bitmap_zero_extend(ram_list.dirty_memory[i],
1329 old_ram_size, new_ram_size);
1330 }
1331 }
1332 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1333
1334 qemu_ram_setup_dump(new_block->host, size);
1335 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1336 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1337
1338 if (kvm_enabled())
1339 kvm_setup_guest_memory(new_block->host, size);
1340
1341 return new_block->offset;
1342 }
1343
1344 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1345 {
1346 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1347 }
1348
1349 void qemu_ram_free_from_ptr(ram_addr_t addr)
1350 {
1351 RAMBlock *block;
1352
1353 /* This assumes the iothread lock is taken here too. */
1354 qemu_mutex_lock_ramlist();
1355 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1356 if (addr == block->offset) {
1357 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1358 ram_list.mru_block = NULL;
1359 ram_list.version++;
1360 g_free(block);
1361 break;
1362 }
1363 }
1364 qemu_mutex_unlock_ramlist();
1365 }
1366
1367 void qemu_ram_free(ram_addr_t addr)
1368 {
1369 RAMBlock *block;
1370
1371 /* This assumes the iothread lock is taken here too. */
1372 qemu_mutex_lock_ramlist();
1373 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1374 if (addr == block->offset) {
1375 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1376 ram_list.mru_block = NULL;
1377 ram_list.version++;
1378 if (block->flags & RAM_PREALLOC_MASK) {
1379 ;
1380 } else if (xen_enabled()) {
1381 xen_invalidate_map_cache_entry(block->host);
1382 #ifndef _WIN32
1383 } else if (block->fd >= 0) {
1384 munmap(block->host, block->length);
1385 close(block->fd);
1386 #endif
1387 } else {
1388 qemu_anon_ram_free(block->host, block->length);
1389 }
1390 g_free(block);
1391 break;
1392 }
1393 }
1394 qemu_mutex_unlock_ramlist();
1395
1396 }
1397
1398 #ifndef _WIN32
1399 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1400 {
1401 RAMBlock *block;
1402 ram_addr_t offset;
1403 int flags;
1404 void *area, *vaddr;
1405
1406 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1407 offset = addr - block->offset;
1408 if (offset < block->length) {
1409 vaddr = block->host + offset;
1410 if (block->flags & RAM_PREALLOC_MASK) {
1411 ;
1412 } else if (xen_enabled()) {
1413 abort();
1414 } else {
1415 flags = MAP_FIXED;
1416 munmap(vaddr, length);
1417 if (block->fd >= 0) {
1418 #ifdef MAP_POPULATE
1419 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1420 MAP_PRIVATE;
1421 #else
1422 flags |= MAP_PRIVATE;
1423 #endif
1424 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1425 flags, block->fd, offset);
1426 } else {
1427 /*
1428 * Remap needs to match alloc. Accelerators that
1429 * set phys_mem_alloc never remap. If they did,
1430 * we'd need a remap hook here.
1431 */
1432 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1433
1434 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1435 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1436 flags, -1, 0);
1437 }
1438 if (area != vaddr) {
1439 fprintf(stderr, "Could not remap addr: "
1440 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1441 length, addr);
1442 exit(1);
1443 }
1444 memory_try_enable_merging(vaddr, length);
1445 qemu_ram_setup_dump(vaddr, length);
1446 }
1447 return;
1448 }
1449 }
1450 }
1451 #endif /* !_WIN32 */
1452
1453 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1454 With the exception of the softmmu code in this file, this should
1455 only be used for local memory (e.g. video ram) that the device owns,
1456 and knows it isn't going to access beyond the end of the block.
1457
1458 It should not be used for general purpose DMA.
1459 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1460 */
1461 void *qemu_get_ram_ptr(ram_addr_t addr)
1462 {
1463 RAMBlock *block = qemu_get_ram_block(addr);
1464
1465 if (xen_enabled()) {
1466 /* We need to check if the requested address is in the RAM
1467 * because we don't want to map the entire memory in QEMU.
1468 * In that case just map until the end of the page.
1469 */
1470 if (block->offset == 0) {
1471 return xen_map_cache(addr, 0, 0);
1472 } else if (block->host == NULL) {
1473 block->host =
1474 xen_map_cache(block->offset, block->length, 1);
1475 }
1476 }
1477 return block->host + (addr - block->offset);
1478 }
1479
1480 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1481 * but takes a size argument */
1482 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1483 {
1484 if (*size == 0) {
1485 return NULL;
1486 }
1487 if (xen_enabled()) {
1488 return xen_map_cache(addr, *size, 1);
1489 } else {
1490 RAMBlock *block;
1491
1492 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1493 if (addr - block->offset < block->length) {
1494 if (addr - block->offset + *size > block->length)
1495 *size = block->length - addr + block->offset;
1496 return block->host + (addr - block->offset);
1497 }
1498 }
1499
1500 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1501 abort();
1502 }
1503 }
1504
1505 /* Some of the softmmu routines need to translate from a host pointer
1506 (typically a TLB entry) back to a ram offset. */
1507 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1508 {
1509 RAMBlock *block;
1510 uint8_t *host = ptr;
1511
1512 if (xen_enabled()) {
1513 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1514 return qemu_get_ram_block(*ram_addr)->mr;
1515 }
1516
1517 block = ram_list.mru_block;
1518 if (block && block->host && host - block->host < block->length) {
1519 goto found;
1520 }
1521
1522 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1523 /* This case append when the block is not mapped. */
1524 if (block->host == NULL) {
1525 continue;
1526 }
1527 if (host - block->host < block->length) {
1528 goto found;
1529 }
1530 }
1531
1532 return NULL;
1533
1534 found:
1535 *ram_addr = block->offset + (host - block->host);
1536 return block->mr;
1537 }
1538
1539 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1540 uint64_t val, unsigned size)
1541 {
1542 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1543 tb_invalidate_phys_page_fast(ram_addr, size);
1544 }
1545 switch (size) {
1546 case 1:
1547 stb_p(qemu_get_ram_ptr(ram_addr), val);
1548 break;
1549 case 2:
1550 stw_p(qemu_get_ram_ptr(ram_addr), val);
1551 break;
1552 case 4:
1553 stl_p(qemu_get_ram_ptr(ram_addr), val);
1554 break;
1555 default:
1556 abort();
1557 }
1558 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1559 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1560 /* we remove the notdirty callback only if the code has been
1561 flushed */
1562 if (!cpu_physical_memory_is_clean(ram_addr)) {
1563 CPUArchState *env = current_cpu->env_ptr;
1564 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1565 }
1566 }
1567
1568 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1569 unsigned size, bool is_write)
1570 {
1571 return is_write;
1572 }
1573
1574 static const MemoryRegionOps notdirty_mem_ops = {
1575 .write = notdirty_mem_write,
1576 .valid.accepts = notdirty_mem_accepts,
1577 .endianness = DEVICE_NATIVE_ENDIAN,
1578 };
1579
1580 /* Generate a debug exception if a watchpoint has been hit. */
1581 static void check_watchpoint(int offset, int len_mask, int flags)
1582 {
1583 CPUState *cpu = current_cpu;
1584 CPUArchState *env = cpu->env_ptr;
1585 target_ulong pc, cs_base;
1586 target_ulong vaddr;
1587 CPUWatchpoint *wp;
1588 int cpu_flags;
1589
1590 if (cpu->watchpoint_hit) {
1591 /* We re-entered the check after replacing the TB. Now raise
1592 * the debug interrupt so that is will trigger after the
1593 * current instruction. */
1594 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1595 return;
1596 }
1597 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1598 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1599 if ((vaddr == (wp->vaddr & len_mask) ||
1600 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1601 wp->flags |= BP_WATCHPOINT_HIT;
1602 if (!cpu->watchpoint_hit) {
1603 cpu->watchpoint_hit = wp;
1604 tb_check_watchpoint(cpu);
1605 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1606 cpu->exception_index = EXCP_DEBUG;
1607 cpu_loop_exit(cpu);
1608 } else {
1609 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1610 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1611 cpu_resume_from_signal(env, NULL);
1612 }
1613 }
1614 } else {
1615 wp->flags &= ~BP_WATCHPOINT_HIT;
1616 }
1617 }
1618 }
1619
1620 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1621 so these check for a hit then pass through to the normal out-of-line
1622 phys routines. */
1623 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1624 unsigned size)
1625 {
1626 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1627 switch (size) {
1628 case 1: return ldub_phys(&address_space_memory, addr);
1629 case 2: return lduw_phys(&address_space_memory, addr);
1630 case 4: return ldl_phys(&address_space_memory, addr);
1631 default: abort();
1632 }
1633 }
1634
1635 static void watch_mem_write(void *opaque, hwaddr addr,
1636 uint64_t val, unsigned size)
1637 {
1638 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1639 switch (size) {
1640 case 1:
1641 stb_phys(&address_space_memory, addr, val);
1642 break;
1643 case 2:
1644 stw_phys(&address_space_memory, addr, val);
1645 break;
1646 case 4:
1647 stl_phys(&address_space_memory, addr, val);
1648 break;
1649 default: abort();
1650 }
1651 }
1652
1653 static const MemoryRegionOps watch_mem_ops = {
1654 .read = watch_mem_read,
1655 .write = watch_mem_write,
1656 .endianness = DEVICE_NATIVE_ENDIAN,
1657 };
1658
1659 static uint64_t subpage_read(void *opaque, hwaddr addr,
1660 unsigned len)
1661 {
1662 subpage_t *subpage = opaque;
1663 uint8_t buf[4];
1664
1665 #if defined(DEBUG_SUBPAGE)
1666 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1667 subpage, len, addr);
1668 #endif
1669 address_space_read(subpage->as, addr + subpage->base, buf, len);
1670 switch (len) {
1671 case 1:
1672 return ldub_p(buf);
1673 case 2:
1674 return lduw_p(buf);
1675 case 4:
1676 return ldl_p(buf);
1677 default:
1678 abort();
1679 }
1680 }
1681
1682 static void subpage_write(void *opaque, hwaddr addr,
1683 uint64_t value, unsigned len)
1684 {
1685 subpage_t *subpage = opaque;
1686 uint8_t buf[4];
1687
1688 #if defined(DEBUG_SUBPAGE)
1689 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1690 " value %"PRIx64"\n",
1691 __func__, subpage, len, addr, value);
1692 #endif
1693 switch (len) {
1694 case 1:
1695 stb_p(buf, value);
1696 break;
1697 case 2:
1698 stw_p(buf, value);
1699 break;
1700 case 4:
1701 stl_p(buf, value);
1702 break;
1703 default:
1704 abort();
1705 }
1706 address_space_write(subpage->as, addr + subpage->base, buf, len);
1707 }
1708
1709 static bool subpage_accepts(void *opaque, hwaddr addr,
1710 unsigned len, bool is_write)
1711 {
1712 subpage_t *subpage = opaque;
1713 #if defined(DEBUG_SUBPAGE)
1714 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1715 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1716 #endif
1717
1718 return address_space_access_valid(subpage->as, addr + subpage->base,
1719 len, is_write);
1720 }
1721
1722 static const MemoryRegionOps subpage_ops = {
1723 .read = subpage_read,
1724 .write = subpage_write,
1725 .valid.accepts = subpage_accepts,
1726 .endianness = DEVICE_NATIVE_ENDIAN,
1727 };
1728
1729 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1730 uint16_t section)
1731 {
1732 int idx, eidx;
1733
1734 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1735 return -1;
1736 idx = SUBPAGE_IDX(start);
1737 eidx = SUBPAGE_IDX(end);
1738 #if defined(DEBUG_SUBPAGE)
1739 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1740 __func__, mmio, start, end, idx, eidx, section);
1741 #endif
1742 for (; idx <= eidx; idx++) {
1743 mmio->sub_section[idx] = section;
1744 }
1745
1746 return 0;
1747 }
1748
1749 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1750 {
1751 subpage_t *mmio;
1752
1753 mmio = g_malloc0(sizeof(subpage_t));
1754
1755 mmio->as = as;
1756 mmio->base = base;
1757 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1758 "subpage", TARGET_PAGE_SIZE);
1759 mmio->iomem.subpage = true;
1760 #if defined(DEBUG_SUBPAGE)
1761 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1762 mmio, base, TARGET_PAGE_SIZE);
1763 #endif
1764 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1765
1766 return mmio;
1767 }
1768
1769 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1770 {
1771 MemoryRegionSection section = {
1772 .address_space = &address_space_memory,
1773 .mr = mr,
1774 .offset_within_address_space = 0,
1775 .offset_within_region = 0,
1776 .size = int128_2_64(),
1777 };
1778
1779 return phys_section_add(map, &section);
1780 }
1781
1782 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1783 {
1784 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1785 }
1786
1787 static void io_mem_init(void)
1788 {
1789 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1790 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1791 "unassigned", UINT64_MAX);
1792 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1793 "notdirty", UINT64_MAX);
1794 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1795 "watch", UINT64_MAX);
1796 }
1797
1798 static void mem_begin(MemoryListener *listener)
1799 {
1800 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1801 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1802 uint16_t n;
1803
1804 n = dummy_section(&d->map, &io_mem_unassigned);
1805 assert(n == PHYS_SECTION_UNASSIGNED);
1806 n = dummy_section(&d->map, &io_mem_notdirty);
1807 assert(n == PHYS_SECTION_NOTDIRTY);
1808 n = dummy_section(&d->map, &io_mem_rom);
1809 assert(n == PHYS_SECTION_ROM);
1810 n = dummy_section(&d->map, &io_mem_watch);
1811 assert(n == PHYS_SECTION_WATCH);
1812
1813 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1814 d->as = as;
1815 as->next_dispatch = d;
1816 }
1817
1818 static void mem_commit(MemoryListener *listener)
1819 {
1820 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1821 AddressSpaceDispatch *cur = as->dispatch;
1822 AddressSpaceDispatch *next = as->next_dispatch;
1823
1824 phys_page_compact_all(next, next->map.nodes_nb);
1825
1826 as->dispatch = next;
1827
1828 if (cur) {
1829 phys_sections_free(&cur->map);
1830 g_free(cur);
1831 }
1832 }
1833
1834 static void tcg_commit(MemoryListener *listener)
1835 {
1836 CPUState *cpu;
1837
1838 /* since each CPU stores ram addresses in its TLB cache, we must
1839 reset the modified entries */
1840 /* XXX: slow ! */
1841 CPU_FOREACH(cpu) {
1842 CPUArchState *env = cpu->env_ptr;
1843
1844 /* FIXME: Disentangle the cpu.h circular files deps so we can
1845 directly get the right CPU from listener. */
1846 if (cpu->tcg_as_listener != listener) {
1847 continue;
1848 }
1849 tlb_flush(env, 1);
1850 }
1851 }
1852
1853 static void core_log_global_start(MemoryListener *listener)
1854 {
1855 cpu_physical_memory_set_dirty_tracking(true);
1856 }
1857
1858 static void core_log_global_stop(MemoryListener *listener)
1859 {
1860 cpu_physical_memory_set_dirty_tracking(false);
1861 }
1862
1863 static MemoryListener core_memory_listener = {
1864 .log_global_start = core_log_global_start,
1865 .log_global_stop = core_log_global_stop,
1866 .priority = 1,
1867 };
1868
1869 void address_space_init_dispatch(AddressSpace *as)
1870 {
1871 as->dispatch = NULL;
1872 as->dispatch_listener = (MemoryListener) {
1873 .begin = mem_begin,
1874 .commit = mem_commit,
1875 .region_add = mem_add,
1876 .region_nop = mem_add,
1877 .priority = 0,
1878 };
1879 memory_listener_register(&as->dispatch_listener, as);
1880 }
1881
1882 void address_space_destroy_dispatch(AddressSpace *as)
1883 {
1884 AddressSpaceDispatch *d = as->dispatch;
1885
1886 memory_listener_unregister(&as->dispatch_listener);
1887 g_free(d);
1888 as->dispatch = NULL;
1889 }
1890
1891 static void memory_map_init(void)
1892 {
1893 system_memory = g_malloc(sizeof(*system_memory));
1894
1895 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1896 address_space_init(&address_space_memory, system_memory, "memory");
1897
1898 system_io = g_malloc(sizeof(*system_io));
1899 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1900 65536);
1901 address_space_init(&address_space_io, system_io, "I/O");
1902
1903 memory_listener_register(&core_memory_listener, &address_space_memory);
1904 }
1905
1906 MemoryRegion *get_system_memory(void)
1907 {
1908 return system_memory;
1909 }
1910
1911 MemoryRegion *get_system_io(void)
1912 {
1913 return system_io;
1914 }
1915
1916 #endif /* !defined(CONFIG_USER_ONLY) */
1917
1918 /* physical memory access (slow version, mainly for debug) */
1919 #if defined(CONFIG_USER_ONLY)
1920 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1921 uint8_t *buf, int len, int is_write)
1922 {
1923 int l, flags;
1924 target_ulong page;
1925 void * p;
1926
1927 while (len > 0) {
1928 page = addr & TARGET_PAGE_MASK;
1929 l = (page + TARGET_PAGE_SIZE) - addr;
1930 if (l > len)
1931 l = len;
1932 flags = page_get_flags(page);
1933 if (!(flags & PAGE_VALID))
1934 return -1;
1935 if (is_write) {
1936 if (!(flags & PAGE_WRITE))
1937 return -1;
1938 /* XXX: this code should not depend on lock_user */
1939 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1940 return -1;
1941 memcpy(p, buf, l);
1942 unlock_user(p, addr, l);
1943 } else {
1944 if (!(flags & PAGE_READ))
1945 return -1;
1946 /* XXX: this code should not depend on lock_user */
1947 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1948 return -1;
1949 memcpy(buf, p, l);
1950 unlock_user(p, addr, 0);
1951 }
1952 len -= l;
1953 buf += l;
1954 addr += l;
1955 }
1956 return 0;
1957 }
1958
1959 #else
1960
1961 static void invalidate_and_set_dirty(hwaddr addr,
1962 hwaddr length)
1963 {
1964 if (cpu_physical_memory_is_clean(addr)) {
1965 /* invalidate code */
1966 tb_invalidate_phys_page_range(addr, addr + length, 0);
1967 /* set dirty bit */
1968 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1969 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1970 }
1971 xen_modified_memory(addr, length);
1972 }
1973
1974 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1975 {
1976 unsigned access_size_max = mr->ops->valid.max_access_size;
1977
1978 /* Regions are assumed to support 1-4 byte accesses unless
1979 otherwise specified. */
1980 if (access_size_max == 0) {
1981 access_size_max = 4;
1982 }
1983
1984 /* Bound the maximum access by the alignment of the address. */
1985 if (!mr->ops->impl.unaligned) {
1986 unsigned align_size_max = addr & -addr;
1987 if (align_size_max != 0 && align_size_max < access_size_max) {
1988 access_size_max = align_size_max;
1989 }
1990 }
1991
1992 /* Don't attempt accesses larger than the maximum. */
1993 if (l > access_size_max) {
1994 l = access_size_max;
1995 }
1996 if (l & (l - 1)) {
1997 l = 1 << (qemu_fls(l) - 1);
1998 }
1999
2000 return l;
2001 }
2002
2003 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2004 int len, bool is_write)
2005 {
2006 hwaddr l;
2007 uint8_t *ptr;
2008 uint64_t val;
2009 hwaddr addr1;
2010 MemoryRegion *mr;
2011 bool error = false;
2012
2013 while (len > 0) {
2014 l = len;
2015 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2016
2017 if (is_write) {
2018 if (!memory_access_is_direct(mr, is_write)) {
2019 l = memory_access_size(mr, l, addr1);
2020 /* XXX: could force current_cpu to NULL to avoid
2021 potential bugs */
2022 switch (l) {
2023 case 8:
2024 /* 64 bit write access */
2025 val = ldq_p(buf);
2026 error |= io_mem_write(mr, addr1, val, 8);
2027 break;
2028 case 4:
2029 /* 32 bit write access */
2030 val = ldl_p(buf);
2031 error |= io_mem_write(mr, addr1, val, 4);
2032 break;
2033 case 2:
2034 /* 16 bit write access */
2035 val = lduw_p(buf);
2036 error |= io_mem_write(mr, addr1, val, 2);
2037 break;
2038 case 1:
2039 /* 8 bit write access */
2040 val = ldub_p(buf);
2041 error |= io_mem_write(mr, addr1, val, 1);
2042 break;
2043 default:
2044 abort();
2045 }
2046 } else {
2047 addr1 += memory_region_get_ram_addr(mr);
2048 /* RAM case */
2049 ptr = qemu_get_ram_ptr(addr1);
2050 memcpy(ptr, buf, l);
2051 invalidate_and_set_dirty(addr1, l);
2052 }
2053 } else {
2054 if (!memory_access_is_direct(mr, is_write)) {
2055 /* I/O case */
2056 l = memory_access_size(mr, l, addr1);
2057 switch (l) {
2058 case 8:
2059 /* 64 bit read access */
2060 error |= io_mem_read(mr, addr1, &val, 8);
2061 stq_p(buf, val);
2062 break;
2063 case 4:
2064 /* 32 bit read access */
2065 error |= io_mem_read(mr, addr1, &val, 4);
2066 stl_p(buf, val);
2067 break;
2068 case 2:
2069 /* 16 bit read access */
2070 error |= io_mem_read(mr, addr1, &val, 2);
2071 stw_p(buf, val);
2072 break;
2073 case 1:
2074 /* 8 bit read access */
2075 error |= io_mem_read(mr, addr1, &val, 1);
2076 stb_p(buf, val);
2077 break;
2078 default:
2079 abort();
2080 }
2081 } else {
2082 /* RAM case */
2083 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2084 memcpy(buf, ptr, l);
2085 }
2086 }
2087 len -= l;
2088 buf += l;
2089 addr += l;
2090 }
2091
2092 return error;
2093 }
2094
2095 bool address_space_write(AddressSpace *as, hwaddr addr,
2096 const uint8_t *buf, int len)
2097 {
2098 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2099 }
2100
2101 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2102 {
2103 return address_space_rw(as, addr, buf, len, false);
2104 }
2105
2106
2107 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2108 int len, int is_write)
2109 {
2110 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2111 }
2112
2113 enum write_rom_type {
2114 WRITE_DATA,
2115 FLUSH_CACHE,
2116 };
2117
2118 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2119 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2120 {
2121 hwaddr l;
2122 uint8_t *ptr;
2123 hwaddr addr1;
2124 MemoryRegion *mr;
2125
2126 while (len > 0) {
2127 l = len;
2128 mr = address_space_translate(as, addr, &addr1, &l, true);
2129
2130 if (!(memory_region_is_ram(mr) ||
2131 memory_region_is_romd(mr))) {
2132 /* do nothing */
2133 } else {
2134 addr1 += memory_region_get_ram_addr(mr);
2135 /* ROM/RAM case */
2136 ptr = qemu_get_ram_ptr(addr1);
2137 switch (type) {
2138 case WRITE_DATA:
2139 memcpy(ptr, buf, l);
2140 invalidate_and_set_dirty(addr1, l);
2141 break;
2142 case FLUSH_CACHE:
2143 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2144 break;
2145 }
2146 }
2147 len -= l;
2148 buf += l;
2149 addr += l;
2150 }
2151 }
2152
2153 /* used for ROM loading : can write in RAM and ROM */
2154 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2155 const uint8_t *buf, int len)
2156 {
2157 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2158 }
2159
2160 void cpu_flush_icache_range(hwaddr start, int len)
2161 {
2162 /*
2163 * This function should do the same thing as an icache flush that was
2164 * triggered from within the guest. For TCG we are always cache coherent,
2165 * so there is no need to flush anything. For KVM / Xen we need to flush
2166 * the host's instruction cache at least.
2167 */
2168 if (tcg_enabled()) {
2169 return;
2170 }
2171
2172 cpu_physical_memory_write_rom_internal(&address_space_memory,
2173 start, NULL, len, FLUSH_CACHE);
2174 }
2175
2176 typedef struct {
2177 MemoryRegion *mr;
2178 void *buffer;
2179 hwaddr addr;
2180 hwaddr len;
2181 } BounceBuffer;
2182
2183 static BounceBuffer bounce;
2184
2185 typedef struct MapClient {
2186 void *opaque;
2187 void (*callback)(void *opaque);
2188 QLIST_ENTRY(MapClient) link;
2189 } MapClient;
2190
2191 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2192 = QLIST_HEAD_INITIALIZER(map_client_list);
2193
2194 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2195 {
2196 MapClient *client = g_malloc(sizeof(*client));
2197
2198 client->opaque = opaque;
2199 client->callback = callback;
2200 QLIST_INSERT_HEAD(&map_client_list, client, link);
2201 return client;
2202 }
2203
2204 static void cpu_unregister_map_client(void *_client)
2205 {
2206 MapClient *client = (MapClient *)_client;
2207
2208 QLIST_REMOVE(client, link);
2209 g_free(client);
2210 }
2211
2212 static void cpu_notify_map_clients(void)
2213 {
2214 MapClient *client;
2215
2216 while (!QLIST_EMPTY(&map_client_list)) {
2217 client = QLIST_FIRST(&map_client_list);
2218 client->callback(client->opaque);
2219 cpu_unregister_map_client(client);
2220 }
2221 }
2222
2223 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2224 {
2225 MemoryRegion *mr;
2226 hwaddr l, xlat;
2227
2228 while (len > 0) {
2229 l = len;
2230 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2231 if (!memory_access_is_direct(mr, is_write)) {
2232 l = memory_access_size(mr, l, addr);
2233 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2234 return false;
2235 }
2236 }
2237
2238 len -= l;
2239 addr += l;
2240 }
2241 return true;
2242 }
2243
2244 /* Map a physical memory region into a host virtual address.
2245 * May map a subset of the requested range, given by and returned in *plen.
2246 * May return NULL if resources needed to perform the mapping are exhausted.
2247 * Use only for reads OR writes - not for read-modify-write operations.
2248 * Use cpu_register_map_client() to know when retrying the map operation is
2249 * likely to succeed.
2250 */
2251 void *address_space_map(AddressSpace *as,
2252 hwaddr addr,
2253 hwaddr *plen,
2254 bool is_write)
2255 {
2256 hwaddr len = *plen;
2257 hwaddr done = 0;
2258 hwaddr l, xlat, base;
2259 MemoryRegion *mr, *this_mr;
2260 ram_addr_t raddr;
2261
2262 if (len == 0) {
2263 return NULL;
2264 }
2265
2266 l = len;
2267 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2268 if (!memory_access_is_direct(mr, is_write)) {
2269 if (bounce.buffer) {
2270 return NULL;
2271 }
2272 /* Avoid unbounded allocations */
2273 l = MIN(l, TARGET_PAGE_SIZE);
2274 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2275 bounce.addr = addr;
2276 bounce.len = l;
2277
2278 memory_region_ref(mr);
2279 bounce.mr = mr;
2280 if (!is_write) {
2281 address_space_read(as, addr, bounce.buffer, l);
2282 }
2283
2284 *plen = l;
2285 return bounce.buffer;
2286 }
2287
2288 base = xlat;
2289 raddr = memory_region_get_ram_addr(mr);
2290
2291 for (;;) {
2292 len -= l;
2293 addr += l;
2294 done += l;
2295 if (len == 0) {
2296 break;
2297 }
2298
2299 l = len;
2300 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2301 if (this_mr != mr || xlat != base + done) {
2302 break;
2303 }
2304 }
2305
2306 memory_region_ref(mr);
2307 *plen = done;
2308 return qemu_ram_ptr_length(raddr + base, plen);
2309 }
2310
2311 /* Unmaps a memory region previously mapped by address_space_map().
2312 * Will also mark the memory as dirty if is_write == 1. access_len gives
2313 * the amount of memory that was actually read or written by the caller.
2314 */
2315 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2316 int is_write, hwaddr access_len)
2317 {
2318 if (buffer != bounce.buffer) {
2319 MemoryRegion *mr;
2320 ram_addr_t addr1;
2321
2322 mr = qemu_ram_addr_from_host(buffer, &addr1);
2323 assert(mr != NULL);
2324 if (is_write) {
2325 while (access_len) {
2326 unsigned l;
2327 l = TARGET_PAGE_SIZE;
2328 if (l > access_len)
2329 l = access_len;
2330 invalidate_and_set_dirty(addr1, l);
2331 addr1 += l;
2332 access_len -= l;
2333 }
2334 }
2335 if (xen_enabled()) {
2336 xen_invalidate_map_cache_entry(buffer);
2337 }
2338 memory_region_unref(mr);
2339 return;
2340 }
2341 if (is_write) {
2342 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2343 }
2344 qemu_vfree(bounce.buffer);
2345 bounce.buffer = NULL;
2346 memory_region_unref(bounce.mr);
2347 cpu_notify_map_clients();
2348 }
2349
2350 void *cpu_physical_memory_map(hwaddr addr,
2351 hwaddr *plen,
2352 int is_write)
2353 {
2354 return address_space_map(&address_space_memory, addr, plen, is_write);
2355 }
2356
2357 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2358 int is_write, hwaddr access_len)
2359 {
2360 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2361 }
2362
2363 /* warning: addr must be aligned */
2364 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2365 enum device_endian endian)
2366 {
2367 uint8_t *ptr;
2368 uint64_t val;
2369 MemoryRegion *mr;
2370 hwaddr l = 4;
2371 hwaddr addr1;
2372
2373 mr = address_space_translate(as, addr, &addr1, &l, false);
2374 if (l < 4 || !memory_access_is_direct(mr, false)) {
2375 /* I/O case */
2376 io_mem_read(mr, addr1, &val, 4);
2377 #if defined(TARGET_WORDS_BIGENDIAN)
2378 if (endian == DEVICE_LITTLE_ENDIAN) {
2379 val = bswap32(val);
2380 }
2381 #else
2382 if (endian == DEVICE_BIG_ENDIAN) {
2383 val = bswap32(val);
2384 }
2385 #endif
2386 } else {
2387 /* RAM case */
2388 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2389 & TARGET_PAGE_MASK)
2390 + addr1);
2391 switch (endian) {
2392 case DEVICE_LITTLE_ENDIAN:
2393 val = ldl_le_p(ptr);
2394 break;
2395 case DEVICE_BIG_ENDIAN:
2396 val = ldl_be_p(ptr);
2397 break;
2398 default:
2399 val = ldl_p(ptr);
2400 break;
2401 }
2402 }
2403 return val;
2404 }
2405
2406 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2407 {
2408 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2409 }
2410
2411 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2412 {
2413 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2414 }
2415
2416 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2417 {
2418 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2419 }
2420
2421 /* warning: addr must be aligned */
2422 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2423 enum device_endian endian)
2424 {
2425 uint8_t *ptr;
2426 uint64_t val;
2427 MemoryRegion *mr;
2428 hwaddr l = 8;
2429 hwaddr addr1;
2430
2431 mr = address_space_translate(as, addr, &addr1, &l,
2432 false);
2433 if (l < 8 || !memory_access_is_direct(mr, false)) {
2434 /* I/O case */
2435 io_mem_read(mr, addr1, &val, 8);
2436 #if defined(TARGET_WORDS_BIGENDIAN)
2437 if (endian == DEVICE_LITTLE_ENDIAN) {
2438 val = bswap64(val);
2439 }
2440 #else
2441 if (endian == DEVICE_BIG_ENDIAN) {
2442 val = bswap64(val);
2443 }
2444 #endif
2445 } else {
2446 /* RAM case */
2447 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2448 & TARGET_PAGE_MASK)
2449 + addr1);
2450 switch (endian) {
2451 case DEVICE_LITTLE_ENDIAN:
2452 val = ldq_le_p(ptr);
2453 break;
2454 case DEVICE_BIG_ENDIAN:
2455 val = ldq_be_p(ptr);
2456 break;
2457 default:
2458 val = ldq_p(ptr);
2459 break;
2460 }
2461 }
2462 return val;
2463 }
2464
2465 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2466 {
2467 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2468 }
2469
2470 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2471 {
2472 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2473 }
2474
2475 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2476 {
2477 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2478 }
2479
2480 /* XXX: optimize */
2481 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2482 {
2483 uint8_t val;
2484 address_space_rw(as, addr, &val, 1, 0);
2485 return val;
2486 }
2487
2488 /* warning: addr must be aligned */
2489 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2490 enum device_endian endian)
2491 {
2492 uint8_t *ptr;
2493 uint64_t val;
2494 MemoryRegion *mr;
2495 hwaddr l = 2;
2496 hwaddr addr1;
2497
2498 mr = address_space_translate(as, addr, &addr1, &l,
2499 false);
2500 if (l < 2 || !memory_access_is_direct(mr, false)) {
2501 /* I/O case */
2502 io_mem_read(mr, addr1, &val, 2);
2503 #if defined(TARGET_WORDS_BIGENDIAN)
2504 if (endian == DEVICE_LITTLE_ENDIAN) {
2505 val = bswap16(val);
2506 }
2507 #else
2508 if (endian == DEVICE_BIG_ENDIAN) {
2509 val = bswap16(val);
2510 }
2511 #endif
2512 } else {
2513 /* RAM case */
2514 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2515 & TARGET_PAGE_MASK)
2516 + addr1);
2517 switch (endian) {
2518 case DEVICE_LITTLE_ENDIAN:
2519 val = lduw_le_p(ptr);
2520 break;
2521 case DEVICE_BIG_ENDIAN:
2522 val = lduw_be_p(ptr);
2523 break;
2524 default:
2525 val = lduw_p(ptr);
2526 break;
2527 }
2528 }
2529 return val;
2530 }
2531
2532 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2533 {
2534 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2535 }
2536
2537 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2538 {
2539 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2540 }
2541
2542 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2543 {
2544 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2545 }
2546
2547 /* warning: addr must be aligned. The ram page is not masked as dirty
2548 and the code inside is not invalidated. It is useful if the dirty
2549 bits are used to track modified PTEs */
2550 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2551 {
2552 uint8_t *ptr;
2553 MemoryRegion *mr;
2554 hwaddr l = 4;
2555 hwaddr addr1;
2556
2557 mr = address_space_translate(as, addr, &addr1, &l,
2558 true);
2559 if (l < 4 || !memory_access_is_direct(mr, true)) {
2560 io_mem_write(mr, addr1, val, 4);
2561 } else {
2562 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2563 ptr = qemu_get_ram_ptr(addr1);
2564 stl_p(ptr, val);
2565
2566 if (unlikely(in_migration)) {
2567 if (cpu_physical_memory_is_clean(addr1)) {
2568 /* invalidate code */
2569 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2570 /* set dirty bit */
2571 cpu_physical_memory_set_dirty_flag(addr1,
2572 DIRTY_MEMORY_MIGRATION);
2573 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2574 }
2575 }
2576 }
2577 }
2578
2579 /* warning: addr must be aligned */
2580 static inline void stl_phys_internal(AddressSpace *as,
2581 hwaddr addr, uint32_t val,
2582 enum device_endian endian)
2583 {
2584 uint8_t *ptr;
2585 MemoryRegion *mr;
2586 hwaddr l = 4;
2587 hwaddr addr1;
2588
2589 mr = address_space_translate(as, addr, &addr1, &l,
2590 true);
2591 if (l < 4 || !memory_access_is_direct(mr, true)) {
2592 #if defined(TARGET_WORDS_BIGENDIAN)
2593 if (endian == DEVICE_LITTLE_ENDIAN) {
2594 val = bswap32(val);
2595 }
2596 #else
2597 if (endian == DEVICE_BIG_ENDIAN) {
2598 val = bswap32(val);
2599 }
2600 #endif
2601 io_mem_write(mr, addr1, val, 4);
2602 } else {
2603 /* RAM case */
2604 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2605 ptr = qemu_get_ram_ptr(addr1);
2606 switch (endian) {
2607 case DEVICE_LITTLE_ENDIAN:
2608 stl_le_p(ptr, val);
2609 break;
2610 case DEVICE_BIG_ENDIAN:
2611 stl_be_p(ptr, val);
2612 break;
2613 default:
2614 stl_p(ptr, val);
2615 break;
2616 }
2617 invalidate_and_set_dirty(addr1, 4);
2618 }
2619 }
2620
2621 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2622 {
2623 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2624 }
2625
2626 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2627 {
2628 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2629 }
2630
2631 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2632 {
2633 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2634 }
2635
2636 /* XXX: optimize */
2637 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2638 {
2639 uint8_t v = val;
2640 address_space_rw(as, addr, &v, 1, 1);
2641 }
2642
2643 /* warning: addr must be aligned */
2644 static inline void stw_phys_internal(AddressSpace *as,
2645 hwaddr addr, uint32_t val,
2646 enum device_endian endian)
2647 {
2648 uint8_t *ptr;
2649 MemoryRegion *mr;
2650 hwaddr l = 2;
2651 hwaddr addr1;
2652
2653 mr = address_space_translate(as, addr, &addr1, &l, true);
2654 if (l < 2 || !memory_access_is_direct(mr, true)) {
2655 #if defined(TARGET_WORDS_BIGENDIAN)
2656 if (endian == DEVICE_LITTLE_ENDIAN) {
2657 val = bswap16(val);
2658 }
2659 #else
2660 if (endian == DEVICE_BIG_ENDIAN) {
2661 val = bswap16(val);
2662 }
2663 #endif
2664 io_mem_write(mr, addr1, val, 2);
2665 } else {
2666 /* RAM case */
2667 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2668 ptr = qemu_get_ram_ptr(addr1);
2669 switch (endian) {
2670 case DEVICE_LITTLE_ENDIAN:
2671 stw_le_p(ptr, val);
2672 break;
2673 case DEVICE_BIG_ENDIAN:
2674 stw_be_p(ptr, val);
2675 break;
2676 default:
2677 stw_p(ptr, val);
2678 break;
2679 }
2680 invalidate_and_set_dirty(addr1, 2);
2681 }
2682 }
2683
2684 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2685 {
2686 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2687 }
2688
2689 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2690 {
2691 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2692 }
2693
2694 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2695 {
2696 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2697 }
2698
2699 /* XXX: optimize */
2700 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2701 {
2702 val = tswap64(val);
2703 address_space_rw(as, addr, (void *) &val, 8, 1);
2704 }
2705
2706 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2707 {
2708 val = cpu_to_le64(val);
2709 address_space_rw(as, addr, (void *) &val, 8, 1);
2710 }
2711
2712 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2713 {
2714 val = cpu_to_be64(val);
2715 address_space_rw(as, addr, (void *) &val, 8, 1);
2716 }
2717
2718 /* virtual memory access for debug (includes writing to ROM) */
2719 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2720 uint8_t *buf, int len, int is_write)
2721 {
2722 int l;
2723 hwaddr phys_addr;
2724 target_ulong page;
2725
2726 while (len > 0) {
2727 page = addr & TARGET_PAGE_MASK;
2728 phys_addr = cpu_get_phys_page_debug(cpu, page);
2729 /* if no physical page mapped, return an error */
2730 if (phys_addr == -1)
2731 return -1;
2732 l = (page + TARGET_PAGE_SIZE) - addr;
2733 if (l > len)
2734 l = len;
2735 phys_addr += (addr & ~TARGET_PAGE_MASK);
2736 if (is_write) {
2737 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2738 } else {
2739 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2740 }
2741 len -= l;
2742 buf += l;
2743 addr += l;
2744 }
2745 return 0;
2746 }
2747 #endif
2748
2749 #if !defined(CONFIG_USER_ONLY)
2750
2751 /*
2752 * A helper function for the _utterly broken_ virtio device model to find out if
2753 * it's running on a big endian machine. Don't do this at home kids!
2754 */
2755 bool virtio_is_big_endian(void);
2756 bool virtio_is_big_endian(void)
2757 {
2758 #if defined(TARGET_WORDS_BIGENDIAN)
2759 return true;
2760 #else
2761 return false;
2762 #endif
2763 }
2764
2765 #endif
2766
2767 #ifndef CONFIG_USER_ONLY
2768 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2769 {
2770 MemoryRegion*mr;
2771 hwaddr l = 1;
2772
2773 mr = address_space_translate(&address_space_memory,
2774 phys_addr, &phys_addr, &l, false);
2775
2776 return !(memory_region_is_ram(mr) ||
2777 memory_region_is_romd(mr));
2778 }
2779
2780 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2781 {
2782 RAMBlock *block;
2783
2784 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2785 func(block->host, block->offset, block->length, opaque);
2786 }
2787 }
2788 #endif