memory: move mem_path handling to memory_region_allocate_system_memory
[qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
74 #define RAM_PREALLOC (1 << 0)
75
76 #endif
77
78 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
79 /* current CPU in the current thread. It is only valid inside
80 cpu_exec() */
81 DEFINE_TLS(CPUState *, current_cpu);
82 /* 0 = Do not count executed instructions.
83 1 = Precise instruction counting.
84 2 = Adaptive rate instruction counting. */
85 int use_icount;
86
87 #if !defined(CONFIG_USER_ONLY)
88
89 typedef struct PhysPageEntry PhysPageEntry;
90
91 struct PhysPageEntry {
92 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
93 uint32_t skip : 6;
94 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
95 uint32_t ptr : 26;
96 };
97
98 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
99
100 /* Size of the L2 (and L3, etc) page tables. */
101 #define ADDR_SPACE_BITS 64
102
103 #define P_L2_BITS 9
104 #define P_L2_SIZE (1 << P_L2_BITS)
105
106 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
107
108 typedef PhysPageEntry Node[P_L2_SIZE];
109
110 typedef struct PhysPageMap {
111 unsigned sections_nb;
112 unsigned sections_nb_alloc;
113 unsigned nodes_nb;
114 unsigned nodes_nb_alloc;
115 Node *nodes;
116 MemoryRegionSection *sections;
117 } PhysPageMap;
118
119 struct AddressSpaceDispatch {
120 /* This is a multi-level map on the physical address space.
121 * The bottom level has pointers to MemoryRegionSections.
122 */
123 PhysPageEntry phys_map;
124 PhysPageMap map;
125 AddressSpace *as;
126 };
127
128 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
129 typedef struct subpage_t {
130 MemoryRegion iomem;
131 AddressSpace *as;
132 hwaddr base;
133 uint16_t sub_section[TARGET_PAGE_SIZE];
134 } subpage_t;
135
136 #define PHYS_SECTION_UNASSIGNED 0
137 #define PHYS_SECTION_NOTDIRTY 1
138 #define PHYS_SECTION_ROM 2
139 #define PHYS_SECTION_WATCH 3
140
141 static void io_mem_init(void);
142 static void memory_map_init(void);
143 static void tcg_commit(MemoryListener *listener);
144
145 static MemoryRegion io_mem_watch;
146 #endif
147
148 #if !defined(CONFIG_USER_ONLY)
149
150 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
151 {
152 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
153 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
154 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
155 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 }
157 }
158
159 static uint32_t phys_map_node_alloc(PhysPageMap *map)
160 {
161 unsigned i;
162 uint32_t ret;
163
164 ret = map->nodes_nb++;
165 assert(ret != PHYS_MAP_NODE_NIL);
166 assert(ret != map->nodes_nb_alloc);
167 for (i = 0; i < P_L2_SIZE; ++i) {
168 map->nodes[ret][i].skip = 1;
169 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
170 }
171 return ret;
172 }
173
174 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
175 hwaddr *index, hwaddr *nb, uint16_t leaf,
176 int level)
177 {
178 PhysPageEntry *p;
179 int i;
180 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
181
182 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
183 lp->ptr = phys_map_node_alloc(map);
184 p = map->nodes[lp->ptr];
185 if (level == 0) {
186 for (i = 0; i < P_L2_SIZE; i++) {
187 p[i].skip = 0;
188 p[i].ptr = PHYS_SECTION_UNASSIGNED;
189 }
190 }
191 } else {
192 p = map->nodes[lp->ptr];
193 }
194 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
195
196 while (*nb && lp < &p[P_L2_SIZE]) {
197 if ((*index & (step - 1)) == 0 && *nb >= step) {
198 lp->skip = 0;
199 lp->ptr = leaf;
200 *index += step;
201 *nb -= step;
202 } else {
203 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
204 }
205 ++lp;
206 }
207 }
208
209 static void phys_page_set(AddressSpaceDispatch *d,
210 hwaddr index, hwaddr nb,
211 uint16_t leaf)
212 {
213 /* Wildly overreserve - it doesn't matter much. */
214 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
215
216 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
217 }
218
219 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
220 * and update our entry so we can skip it and go directly to the destination.
221 */
222 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
223 {
224 unsigned valid_ptr = P_L2_SIZE;
225 int valid = 0;
226 PhysPageEntry *p;
227 int i;
228
229 if (lp->ptr == PHYS_MAP_NODE_NIL) {
230 return;
231 }
232
233 p = nodes[lp->ptr];
234 for (i = 0; i < P_L2_SIZE; i++) {
235 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
236 continue;
237 }
238
239 valid_ptr = i;
240 valid++;
241 if (p[i].skip) {
242 phys_page_compact(&p[i], nodes, compacted);
243 }
244 }
245
246 /* We can only compress if there's only one child. */
247 if (valid != 1) {
248 return;
249 }
250
251 assert(valid_ptr < P_L2_SIZE);
252
253 /* Don't compress if it won't fit in the # of bits we have. */
254 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
255 return;
256 }
257
258 lp->ptr = p[valid_ptr].ptr;
259 if (!p[valid_ptr].skip) {
260 /* If our only child is a leaf, make this a leaf. */
261 /* By design, we should have made this node a leaf to begin with so we
262 * should never reach here.
263 * But since it's so simple to handle this, let's do it just in case we
264 * change this rule.
265 */
266 lp->skip = 0;
267 } else {
268 lp->skip += p[valid_ptr].skip;
269 }
270 }
271
272 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
273 {
274 DECLARE_BITMAP(compacted, nodes_nb);
275
276 if (d->phys_map.skip) {
277 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 }
279 }
280
281 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
282 Node *nodes, MemoryRegionSection *sections)
283 {
284 PhysPageEntry *p;
285 hwaddr index = addr >> TARGET_PAGE_BITS;
286 int i;
287
288 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
289 if (lp.ptr == PHYS_MAP_NODE_NIL) {
290 return &sections[PHYS_SECTION_UNASSIGNED];
291 }
292 p = nodes[lp.ptr];
293 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
294 }
295
296 if (sections[lp.ptr].size.hi ||
297 range_covers_byte(sections[lp.ptr].offset_within_address_space,
298 sections[lp.ptr].size.lo, addr)) {
299 return &sections[lp.ptr];
300 } else {
301 return &sections[PHYS_SECTION_UNASSIGNED];
302 }
303 }
304
305 bool memory_region_is_unassigned(MemoryRegion *mr)
306 {
307 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
308 && mr != &io_mem_watch;
309 }
310
311 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
312 hwaddr addr,
313 bool resolve_subpage)
314 {
315 MemoryRegionSection *section;
316 subpage_t *subpage;
317
318 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
319 if (resolve_subpage && section->mr->subpage) {
320 subpage = container_of(section->mr, subpage_t, iomem);
321 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
322 }
323 return section;
324 }
325
326 static MemoryRegionSection *
327 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
328 hwaddr *plen, bool resolve_subpage)
329 {
330 MemoryRegionSection *section;
331 Int128 diff;
332
333 section = address_space_lookup_region(d, addr, resolve_subpage);
334 /* Compute offset within MemoryRegionSection */
335 addr -= section->offset_within_address_space;
336
337 /* Compute offset within MemoryRegion */
338 *xlat = addr + section->offset_within_region;
339
340 diff = int128_sub(section->mr->size, int128_make64(addr));
341 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
342 return section;
343 }
344
345 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
346 {
347 if (memory_region_is_ram(mr)) {
348 return !(is_write && mr->readonly);
349 }
350 if (memory_region_is_romd(mr)) {
351 return !is_write;
352 }
353
354 return false;
355 }
356
357 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
358 hwaddr *xlat, hwaddr *plen,
359 bool is_write)
360 {
361 IOMMUTLBEntry iotlb;
362 MemoryRegionSection *section;
363 MemoryRegion *mr;
364 hwaddr len = *plen;
365
366 for (;;) {
367 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
368 mr = section->mr;
369
370 if (!mr->iommu_ops) {
371 break;
372 }
373
374 iotlb = mr->iommu_ops->translate(mr, addr);
375 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
376 | (addr & iotlb.addr_mask));
377 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
378 if (!(iotlb.perm & (1 << is_write))) {
379 mr = &io_mem_unassigned;
380 break;
381 }
382
383 as = iotlb.target_as;
384 }
385
386 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
387 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
388 len = MIN(page, len);
389 }
390
391 *plen = len;
392 *xlat = addr;
393 return mr;
394 }
395
396 MemoryRegionSection *
397 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
398 hwaddr *plen)
399 {
400 MemoryRegionSection *section;
401 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
402
403 assert(!section->mr->iommu_ops);
404 return section;
405 }
406 #endif
407
408 void cpu_exec_init_all(void)
409 {
410 #if !defined(CONFIG_USER_ONLY)
411 qemu_mutex_init(&ram_list.mutex);
412 memory_map_init();
413 io_mem_init();
414 #endif
415 }
416
417 #if !defined(CONFIG_USER_ONLY)
418
419 static int cpu_common_post_load(void *opaque, int version_id)
420 {
421 CPUState *cpu = opaque;
422
423 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
424 version_id is increased. */
425 cpu->interrupt_request &= ~0x01;
426 tlb_flush(cpu, 1);
427
428 return 0;
429 }
430
431 const VMStateDescription vmstate_cpu_common = {
432 .name = "cpu_common",
433 .version_id = 1,
434 .minimum_version_id = 1,
435 .post_load = cpu_common_post_load,
436 .fields = (VMStateField[]) {
437 VMSTATE_UINT32(halted, CPUState),
438 VMSTATE_UINT32(interrupt_request, CPUState),
439 VMSTATE_END_OF_LIST()
440 }
441 };
442
443 #endif
444
445 CPUState *qemu_get_cpu(int index)
446 {
447 CPUState *cpu;
448
449 CPU_FOREACH(cpu) {
450 if (cpu->cpu_index == index) {
451 return cpu;
452 }
453 }
454
455 return NULL;
456 }
457
458 #if !defined(CONFIG_USER_ONLY)
459 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
460 {
461 /* We only support one address space per cpu at the moment. */
462 assert(cpu->as == as);
463
464 if (cpu->tcg_as_listener) {
465 memory_listener_unregister(cpu->tcg_as_listener);
466 } else {
467 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
468 }
469 cpu->tcg_as_listener->commit = tcg_commit;
470 memory_listener_register(cpu->tcg_as_listener, as);
471 }
472 #endif
473
474 void cpu_exec_init(CPUArchState *env)
475 {
476 CPUState *cpu = ENV_GET_CPU(env);
477 CPUClass *cc = CPU_GET_CLASS(cpu);
478 CPUState *some_cpu;
479 int cpu_index;
480
481 #if defined(CONFIG_USER_ONLY)
482 cpu_list_lock();
483 #endif
484 cpu_index = 0;
485 CPU_FOREACH(some_cpu) {
486 cpu_index++;
487 }
488 cpu->cpu_index = cpu_index;
489 cpu->numa_node = 0;
490 QTAILQ_INIT(&cpu->breakpoints);
491 QTAILQ_INIT(&cpu->watchpoints);
492 #ifndef CONFIG_USER_ONLY
493 cpu->as = &address_space_memory;
494 cpu->thread_id = qemu_get_thread_id();
495 #endif
496 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
497 #if defined(CONFIG_USER_ONLY)
498 cpu_list_unlock();
499 #endif
500 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
501 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
502 }
503 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
504 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
505 cpu_save, cpu_load, env);
506 assert(cc->vmsd == NULL);
507 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
508 #endif
509 if (cc->vmsd != NULL) {
510 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
511 }
512 }
513
514 #if defined(TARGET_HAS_ICE)
515 #if defined(CONFIG_USER_ONLY)
516 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
517 {
518 tb_invalidate_phys_page_range(pc, pc + 1, 0);
519 }
520 #else
521 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
522 {
523 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
524 if (phys != -1) {
525 tb_invalidate_phys_addr(cpu->as,
526 phys | (pc & ~TARGET_PAGE_MASK));
527 }
528 }
529 #endif
530 #endif /* TARGET_HAS_ICE */
531
532 #if defined(CONFIG_USER_ONLY)
533 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
534
535 {
536 }
537
538 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
539 int flags, CPUWatchpoint **watchpoint)
540 {
541 return -ENOSYS;
542 }
543 #else
544 /* Add a watchpoint. */
545 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
546 int flags, CPUWatchpoint **watchpoint)
547 {
548 vaddr len_mask = ~(len - 1);
549 CPUWatchpoint *wp;
550
551 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
552 if ((len & (len - 1)) || (addr & ~len_mask) ||
553 len == 0 || len > TARGET_PAGE_SIZE) {
554 error_report("tried to set invalid watchpoint at %"
555 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
556 return -EINVAL;
557 }
558 wp = g_malloc(sizeof(*wp));
559
560 wp->vaddr = addr;
561 wp->len_mask = len_mask;
562 wp->flags = flags;
563
564 /* keep all GDB-injected watchpoints in front */
565 if (flags & BP_GDB) {
566 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
567 } else {
568 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
569 }
570
571 tlb_flush_page(cpu, addr);
572
573 if (watchpoint)
574 *watchpoint = wp;
575 return 0;
576 }
577
578 /* Remove a specific watchpoint. */
579 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
580 int flags)
581 {
582 vaddr len_mask = ~(len - 1);
583 CPUWatchpoint *wp;
584
585 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
586 if (addr == wp->vaddr && len_mask == wp->len_mask
587 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
588 cpu_watchpoint_remove_by_ref(cpu, wp);
589 return 0;
590 }
591 }
592 return -ENOENT;
593 }
594
595 /* Remove a specific watchpoint by reference. */
596 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
597 {
598 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
599
600 tlb_flush_page(cpu, watchpoint->vaddr);
601
602 g_free(watchpoint);
603 }
604
605 /* Remove all matching watchpoints. */
606 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
607 {
608 CPUWatchpoint *wp, *next;
609
610 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
611 if (wp->flags & mask) {
612 cpu_watchpoint_remove_by_ref(cpu, wp);
613 }
614 }
615 }
616 #endif
617
618 /* Add a breakpoint. */
619 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
620 CPUBreakpoint **breakpoint)
621 {
622 #if defined(TARGET_HAS_ICE)
623 CPUBreakpoint *bp;
624
625 bp = g_malloc(sizeof(*bp));
626
627 bp->pc = pc;
628 bp->flags = flags;
629
630 /* keep all GDB-injected breakpoints in front */
631 if (flags & BP_GDB) {
632 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
633 } else {
634 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
635 }
636
637 breakpoint_invalidate(cpu, pc);
638
639 if (breakpoint) {
640 *breakpoint = bp;
641 }
642 return 0;
643 #else
644 return -ENOSYS;
645 #endif
646 }
647
648 /* Remove a specific breakpoint. */
649 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
650 {
651 #if defined(TARGET_HAS_ICE)
652 CPUBreakpoint *bp;
653
654 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
655 if (bp->pc == pc && bp->flags == flags) {
656 cpu_breakpoint_remove_by_ref(cpu, bp);
657 return 0;
658 }
659 }
660 return -ENOENT;
661 #else
662 return -ENOSYS;
663 #endif
664 }
665
666 /* Remove a specific breakpoint by reference. */
667 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
668 {
669 #if defined(TARGET_HAS_ICE)
670 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
671
672 breakpoint_invalidate(cpu, breakpoint->pc);
673
674 g_free(breakpoint);
675 #endif
676 }
677
678 /* Remove all matching breakpoints. */
679 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
680 {
681 #if defined(TARGET_HAS_ICE)
682 CPUBreakpoint *bp, *next;
683
684 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
685 if (bp->flags & mask) {
686 cpu_breakpoint_remove_by_ref(cpu, bp);
687 }
688 }
689 #endif
690 }
691
692 /* enable or disable single step mode. EXCP_DEBUG is returned by the
693 CPU loop after each instruction */
694 void cpu_single_step(CPUState *cpu, int enabled)
695 {
696 #if defined(TARGET_HAS_ICE)
697 if (cpu->singlestep_enabled != enabled) {
698 cpu->singlestep_enabled = enabled;
699 if (kvm_enabled()) {
700 kvm_update_guest_debug(cpu, 0);
701 } else {
702 /* must flush all the translated code to avoid inconsistencies */
703 /* XXX: only flush what is necessary */
704 CPUArchState *env = cpu->env_ptr;
705 tb_flush(env);
706 }
707 }
708 #endif
709 }
710
711 void cpu_abort(CPUState *cpu, const char *fmt, ...)
712 {
713 va_list ap;
714 va_list ap2;
715
716 va_start(ap, fmt);
717 va_copy(ap2, ap);
718 fprintf(stderr, "qemu: fatal: ");
719 vfprintf(stderr, fmt, ap);
720 fprintf(stderr, "\n");
721 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
722 if (qemu_log_enabled()) {
723 qemu_log("qemu: fatal: ");
724 qemu_log_vprintf(fmt, ap2);
725 qemu_log("\n");
726 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
727 qemu_log_flush();
728 qemu_log_close();
729 }
730 va_end(ap2);
731 va_end(ap);
732 #if defined(CONFIG_USER_ONLY)
733 {
734 struct sigaction act;
735 sigfillset(&act.sa_mask);
736 act.sa_handler = SIG_DFL;
737 sigaction(SIGABRT, &act, NULL);
738 }
739 #endif
740 abort();
741 }
742
743 #if !defined(CONFIG_USER_ONLY)
744 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
745 {
746 RAMBlock *block;
747
748 /* The list is protected by the iothread lock here. */
749 block = ram_list.mru_block;
750 if (block && addr - block->offset < block->length) {
751 goto found;
752 }
753 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
754 if (addr - block->offset < block->length) {
755 goto found;
756 }
757 }
758
759 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
760 abort();
761
762 found:
763 ram_list.mru_block = block;
764 return block;
765 }
766
767 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
768 {
769 ram_addr_t start1;
770 RAMBlock *block;
771 ram_addr_t end;
772
773 end = TARGET_PAGE_ALIGN(start + length);
774 start &= TARGET_PAGE_MASK;
775
776 block = qemu_get_ram_block(start);
777 assert(block == qemu_get_ram_block(end - 1));
778 start1 = (uintptr_t)block->host + (start - block->offset);
779 cpu_tlb_reset_dirty_all(start1, length);
780 }
781
782 /* Note: start and end must be within the same ram block. */
783 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
784 unsigned client)
785 {
786 if (length == 0)
787 return;
788 cpu_physical_memory_clear_dirty_range(start, length, client);
789
790 if (tcg_enabled()) {
791 tlb_reset_dirty_range_all(start, length);
792 }
793 }
794
795 static void cpu_physical_memory_set_dirty_tracking(bool enable)
796 {
797 in_migration = enable;
798 }
799
800 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
801 MemoryRegionSection *section,
802 target_ulong vaddr,
803 hwaddr paddr, hwaddr xlat,
804 int prot,
805 target_ulong *address)
806 {
807 hwaddr iotlb;
808 CPUWatchpoint *wp;
809
810 if (memory_region_is_ram(section->mr)) {
811 /* Normal RAM. */
812 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
813 + xlat;
814 if (!section->readonly) {
815 iotlb |= PHYS_SECTION_NOTDIRTY;
816 } else {
817 iotlb |= PHYS_SECTION_ROM;
818 }
819 } else {
820 iotlb = section - section->address_space->dispatch->map.sections;
821 iotlb += xlat;
822 }
823
824 /* Make accesses to pages with watchpoints go via the
825 watchpoint trap routines. */
826 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
827 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
828 /* Avoid trapping reads of pages with a write breakpoint. */
829 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
830 iotlb = PHYS_SECTION_WATCH + paddr;
831 *address |= TLB_MMIO;
832 break;
833 }
834 }
835 }
836
837 return iotlb;
838 }
839 #endif /* defined(CONFIG_USER_ONLY) */
840
841 #if !defined(CONFIG_USER_ONLY)
842
843 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
844 uint16_t section);
845 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
846
847 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
848
849 /*
850 * Set a custom physical guest memory alloator.
851 * Accelerators with unusual needs may need this. Hopefully, we can
852 * get rid of it eventually.
853 */
854 void phys_mem_set_alloc(void *(*alloc)(size_t))
855 {
856 phys_mem_alloc = alloc;
857 }
858
859 static uint16_t phys_section_add(PhysPageMap *map,
860 MemoryRegionSection *section)
861 {
862 /* The physical section number is ORed with a page-aligned
863 * pointer to produce the iotlb entries. Thus it should
864 * never overflow into the page-aligned value.
865 */
866 assert(map->sections_nb < TARGET_PAGE_SIZE);
867
868 if (map->sections_nb == map->sections_nb_alloc) {
869 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
870 map->sections = g_renew(MemoryRegionSection, map->sections,
871 map->sections_nb_alloc);
872 }
873 map->sections[map->sections_nb] = *section;
874 memory_region_ref(section->mr);
875 return map->sections_nb++;
876 }
877
878 static void phys_section_destroy(MemoryRegion *mr)
879 {
880 memory_region_unref(mr);
881
882 if (mr->subpage) {
883 subpage_t *subpage = container_of(mr, subpage_t, iomem);
884 memory_region_destroy(&subpage->iomem);
885 g_free(subpage);
886 }
887 }
888
889 static void phys_sections_free(PhysPageMap *map)
890 {
891 while (map->sections_nb > 0) {
892 MemoryRegionSection *section = &map->sections[--map->sections_nb];
893 phys_section_destroy(section->mr);
894 }
895 g_free(map->sections);
896 g_free(map->nodes);
897 }
898
899 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
900 {
901 subpage_t *subpage;
902 hwaddr base = section->offset_within_address_space
903 & TARGET_PAGE_MASK;
904 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
905 d->map.nodes, d->map.sections);
906 MemoryRegionSection subsection = {
907 .offset_within_address_space = base,
908 .size = int128_make64(TARGET_PAGE_SIZE),
909 };
910 hwaddr start, end;
911
912 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
913
914 if (!(existing->mr->subpage)) {
915 subpage = subpage_init(d->as, base);
916 subsection.address_space = d->as;
917 subsection.mr = &subpage->iomem;
918 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
919 phys_section_add(&d->map, &subsection));
920 } else {
921 subpage = container_of(existing->mr, subpage_t, iomem);
922 }
923 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
924 end = start + int128_get64(section->size) - 1;
925 subpage_register(subpage, start, end,
926 phys_section_add(&d->map, section));
927 }
928
929
930 static void register_multipage(AddressSpaceDispatch *d,
931 MemoryRegionSection *section)
932 {
933 hwaddr start_addr = section->offset_within_address_space;
934 uint16_t section_index = phys_section_add(&d->map, section);
935 uint64_t num_pages = int128_get64(int128_rshift(section->size,
936 TARGET_PAGE_BITS));
937
938 assert(num_pages);
939 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
940 }
941
942 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
943 {
944 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
945 AddressSpaceDispatch *d = as->next_dispatch;
946 MemoryRegionSection now = *section, remain = *section;
947 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
948
949 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
950 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
951 - now.offset_within_address_space;
952
953 now.size = int128_min(int128_make64(left), now.size);
954 register_subpage(d, &now);
955 } else {
956 now.size = int128_zero();
957 }
958 while (int128_ne(remain.size, now.size)) {
959 remain.size = int128_sub(remain.size, now.size);
960 remain.offset_within_address_space += int128_get64(now.size);
961 remain.offset_within_region += int128_get64(now.size);
962 now = remain;
963 if (int128_lt(remain.size, page_size)) {
964 register_subpage(d, &now);
965 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
966 now.size = page_size;
967 register_subpage(d, &now);
968 } else {
969 now.size = int128_and(now.size, int128_neg(page_size));
970 register_multipage(d, &now);
971 }
972 }
973 }
974
975 void qemu_flush_coalesced_mmio_buffer(void)
976 {
977 if (kvm_enabled())
978 kvm_flush_coalesced_mmio_buffer();
979 }
980
981 void qemu_mutex_lock_ramlist(void)
982 {
983 qemu_mutex_lock(&ram_list.mutex);
984 }
985
986 void qemu_mutex_unlock_ramlist(void)
987 {
988 qemu_mutex_unlock(&ram_list.mutex);
989 }
990
991 #ifdef __linux__
992
993 #include <sys/vfs.h>
994
995 #define HUGETLBFS_MAGIC 0x958458f6
996
997 static long gethugepagesize(const char *path)
998 {
999 struct statfs fs;
1000 int ret;
1001
1002 do {
1003 ret = statfs(path, &fs);
1004 } while (ret != 0 && errno == EINTR);
1005
1006 if (ret != 0) {
1007 perror(path);
1008 return 0;
1009 }
1010
1011 if (fs.f_type != HUGETLBFS_MAGIC)
1012 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1013
1014 return fs.f_bsize;
1015 }
1016
1017 static void *file_ram_alloc(RAMBlock *block,
1018 ram_addr_t memory,
1019 const char *path)
1020 {
1021 char *filename;
1022 char *sanitized_name;
1023 char *c;
1024 void *area;
1025 int fd;
1026 unsigned long hpagesize;
1027
1028 hpagesize = gethugepagesize(path);
1029 if (!hpagesize) {
1030 goto error;
1031 }
1032
1033 if (memory < hpagesize) {
1034 return NULL;
1035 }
1036
1037 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1038 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1039 goto error;
1040 }
1041
1042 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1043 sanitized_name = g_strdup(block->mr->name);
1044 for (c = sanitized_name; *c != '\0'; c++) {
1045 if (*c == '/')
1046 *c = '_';
1047 }
1048
1049 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1050 sanitized_name);
1051 g_free(sanitized_name);
1052
1053 fd = mkstemp(filename);
1054 if (fd < 0) {
1055 perror("unable to create backing store for hugepages");
1056 g_free(filename);
1057 goto error;
1058 }
1059 unlink(filename);
1060 g_free(filename);
1061
1062 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1063
1064 /*
1065 * ftruncate is not supported by hugetlbfs in older
1066 * hosts, so don't bother bailing out on errors.
1067 * If anything goes wrong with it under other filesystems,
1068 * mmap will fail.
1069 */
1070 if (ftruncate(fd, memory))
1071 perror("ftruncate");
1072
1073 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1074 if (area == MAP_FAILED) {
1075 perror("file_ram_alloc: can't mmap RAM pages");
1076 close(fd);
1077 goto error;
1078 }
1079
1080 if (mem_prealloc) {
1081 os_mem_prealloc(fd, area, memory);
1082 }
1083
1084 block->fd = fd;
1085 return area;
1086
1087 error:
1088 if (mem_prealloc) {
1089 exit(1);
1090 }
1091 return NULL;
1092 }
1093 #endif
1094
1095 static ram_addr_t find_ram_offset(ram_addr_t size)
1096 {
1097 RAMBlock *block, *next_block;
1098 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1099
1100 assert(size != 0); /* it would hand out same offset multiple times */
1101
1102 if (QTAILQ_EMPTY(&ram_list.blocks))
1103 return 0;
1104
1105 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1106 ram_addr_t end, next = RAM_ADDR_MAX;
1107
1108 end = block->offset + block->length;
1109
1110 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1111 if (next_block->offset >= end) {
1112 next = MIN(next, next_block->offset);
1113 }
1114 }
1115 if (next - end >= size && next - end < mingap) {
1116 offset = end;
1117 mingap = next - end;
1118 }
1119 }
1120
1121 if (offset == RAM_ADDR_MAX) {
1122 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1123 (uint64_t)size);
1124 abort();
1125 }
1126
1127 return offset;
1128 }
1129
1130 ram_addr_t last_ram_offset(void)
1131 {
1132 RAMBlock *block;
1133 ram_addr_t last = 0;
1134
1135 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1136 last = MAX(last, block->offset + block->length);
1137
1138 return last;
1139 }
1140
1141 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1142 {
1143 int ret;
1144
1145 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1146 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1147 "dump-guest-core", true)) {
1148 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1149 if (ret) {
1150 perror("qemu_madvise");
1151 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1152 "but dump_guest_core=off specified\n");
1153 }
1154 }
1155 }
1156
1157 static RAMBlock *find_ram_block(ram_addr_t addr)
1158 {
1159 RAMBlock *block;
1160
1161 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1162 if (block->offset == addr) {
1163 return block;
1164 }
1165 }
1166
1167 return NULL;
1168 }
1169
1170 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1171 {
1172 RAMBlock *new_block = find_ram_block(addr);
1173 RAMBlock *block;
1174
1175 assert(new_block);
1176 assert(!new_block->idstr[0]);
1177
1178 if (dev) {
1179 char *id = qdev_get_dev_path(dev);
1180 if (id) {
1181 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1182 g_free(id);
1183 }
1184 }
1185 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1186
1187 /* This assumes the iothread lock is taken here too. */
1188 qemu_mutex_lock_ramlist();
1189 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1190 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1191 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1192 new_block->idstr);
1193 abort();
1194 }
1195 }
1196 qemu_mutex_unlock_ramlist();
1197 }
1198
1199 void qemu_ram_unset_idstr(ram_addr_t addr)
1200 {
1201 RAMBlock *block = find_ram_block(addr);
1202
1203 if (block) {
1204 memset(block->idstr, 0, sizeof(block->idstr));
1205 }
1206 }
1207
1208 static int memory_try_enable_merging(void *addr, size_t len)
1209 {
1210 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1211 /* disabled by the user */
1212 return 0;
1213 }
1214
1215 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1216 }
1217
1218 static ram_addr_t ram_block_add(RAMBlock *new_block)
1219 {
1220 RAMBlock *block;
1221 ram_addr_t old_ram_size, new_ram_size;
1222
1223 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1224
1225 /* This assumes the iothread lock is taken here too. */
1226 qemu_mutex_lock_ramlist();
1227 new_block->offset = find_ram_offset(new_block->length);
1228
1229 if (!new_block->host) {
1230 if (xen_enabled()) {
1231 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1232 } else {
1233 new_block->host = phys_mem_alloc(new_block->length);
1234 if (!new_block->host) {
1235 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1236 new_block->mr->name, strerror(errno));
1237 exit(1);
1238 }
1239 memory_try_enable_merging(new_block->host, new_block->length);
1240 }
1241 }
1242
1243 /* Keep the list sorted from biggest to smallest block. */
1244 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1245 if (block->length < new_block->length) {
1246 break;
1247 }
1248 }
1249 if (block) {
1250 QTAILQ_INSERT_BEFORE(block, new_block, next);
1251 } else {
1252 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1253 }
1254 ram_list.mru_block = NULL;
1255
1256 ram_list.version++;
1257 qemu_mutex_unlock_ramlist();
1258
1259 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1260
1261 if (new_ram_size > old_ram_size) {
1262 int i;
1263 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1264 ram_list.dirty_memory[i] =
1265 bitmap_zero_extend(ram_list.dirty_memory[i],
1266 old_ram_size, new_ram_size);
1267 }
1268 }
1269 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1270
1271 qemu_ram_setup_dump(new_block->host, new_block->length);
1272 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1273 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1274
1275 if (kvm_enabled()) {
1276 kvm_setup_guest_memory(new_block->host, new_block->length);
1277 }
1278
1279 return new_block->offset;
1280 }
1281
1282 #ifdef __linux__
1283 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1284 const char *mem_path)
1285 {
1286 RAMBlock *new_block;
1287
1288 if (xen_enabled()) {
1289 fprintf(stderr, "-mem-path not supported with Xen\n");
1290 exit(1);
1291 }
1292
1293 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1294 /*
1295 * file_ram_alloc() needs to allocate just like
1296 * phys_mem_alloc, but we haven't bothered to provide
1297 * a hook there.
1298 */
1299 fprintf(stderr,
1300 "-mem-path not supported with this accelerator\n");
1301 exit(1);
1302 }
1303
1304 size = TARGET_PAGE_ALIGN(size);
1305 new_block = g_malloc0(sizeof(*new_block));
1306 new_block->mr = mr;
1307 new_block->length = size;
1308 new_block->host = file_ram_alloc(new_block, size, mem_path);
1309 return ram_block_add(new_block);
1310 }
1311 #endif
1312
1313 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1314 MemoryRegion *mr)
1315 {
1316 RAMBlock *new_block;
1317
1318 size = TARGET_PAGE_ALIGN(size);
1319 new_block = g_malloc0(sizeof(*new_block));
1320 new_block->mr = mr;
1321 new_block->length = size;
1322 new_block->fd = -1;
1323 new_block->host = host;
1324 if (host) {
1325 new_block->flags |= RAM_PREALLOC;
1326 }
1327 return ram_block_add(new_block);
1328 }
1329
1330 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1331 {
1332 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1333 }
1334
1335 void qemu_ram_free_from_ptr(ram_addr_t addr)
1336 {
1337 RAMBlock *block;
1338
1339 /* This assumes the iothread lock is taken here too. */
1340 qemu_mutex_lock_ramlist();
1341 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1342 if (addr == block->offset) {
1343 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1344 ram_list.mru_block = NULL;
1345 ram_list.version++;
1346 g_free(block);
1347 break;
1348 }
1349 }
1350 qemu_mutex_unlock_ramlist();
1351 }
1352
1353 void qemu_ram_free(ram_addr_t addr)
1354 {
1355 RAMBlock *block;
1356
1357 /* This assumes the iothread lock is taken here too. */
1358 qemu_mutex_lock_ramlist();
1359 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1360 if (addr == block->offset) {
1361 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1362 ram_list.mru_block = NULL;
1363 ram_list.version++;
1364 if (block->flags & RAM_PREALLOC) {
1365 ;
1366 } else if (xen_enabled()) {
1367 xen_invalidate_map_cache_entry(block->host);
1368 #ifndef _WIN32
1369 } else if (block->fd >= 0) {
1370 munmap(block->host, block->length);
1371 close(block->fd);
1372 #endif
1373 } else {
1374 qemu_anon_ram_free(block->host, block->length);
1375 }
1376 g_free(block);
1377 break;
1378 }
1379 }
1380 qemu_mutex_unlock_ramlist();
1381
1382 }
1383
1384 #ifndef _WIN32
1385 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1386 {
1387 RAMBlock *block;
1388 ram_addr_t offset;
1389 int flags;
1390 void *area, *vaddr;
1391
1392 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1393 offset = addr - block->offset;
1394 if (offset < block->length) {
1395 vaddr = block->host + offset;
1396 if (block->flags & RAM_PREALLOC) {
1397 ;
1398 } else if (xen_enabled()) {
1399 abort();
1400 } else {
1401 flags = MAP_FIXED;
1402 munmap(vaddr, length);
1403 if (block->fd >= 0) {
1404 #ifdef MAP_POPULATE
1405 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1406 MAP_PRIVATE;
1407 #else
1408 flags |= MAP_PRIVATE;
1409 #endif
1410 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1411 flags, block->fd, offset);
1412 } else {
1413 /*
1414 * Remap needs to match alloc. Accelerators that
1415 * set phys_mem_alloc never remap. If they did,
1416 * we'd need a remap hook here.
1417 */
1418 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1419
1420 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1421 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1422 flags, -1, 0);
1423 }
1424 if (area != vaddr) {
1425 fprintf(stderr, "Could not remap addr: "
1426 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1427 length, addr);
1428 exit(1);
1429 }
1430 memory_try_enable_merging(vaddr, length);
1431 qemu_ram_setup_dump(vaddr, length);
1432 }
1433 return;
1434 }
1435 }
1436 }
1437 #endif /* !_WIN32 */
1438
1439 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1440 With the exception of the softmmu code in this file, this should
1441 only be used for local memory (e.g. video ram) that the device owns,
1442 and knows it isn't going to access beyond the end of the block.
1443
1444 It should not be used for general purpose DMA.
1445 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1446 */
1447 void *qemu_get_ram_ptr(ram_addr_t addr)
1448 {
1449 RAMBlock *block = qemu_get_ram_block(addr);
1450
1451 if (xen_enabled()) {
1452 /* We need to check if the requested address is in the RAM
1453 * because we don't want to map the entire memory in QEMU.
1454 * In that case just map until the end of the page.
1455 */
1456 if (block->offset == 0) {
1457 return xen_map_cache(addr, 0, 0);
1458 } else if (block->host == NULL) {
1459 block->host =
1460 xen_map_cache(block->offset, block->length, 1);
1461 }
1462 }
1463 return block->host + (addr - block->offset);
1464 }
1465
1466 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1467 * but takes a size argument */
1468 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1469 {
1470 if (*size == 0) {
1471 return NULL;
1472 }
1473 if (xen_enabled()) {
1474 return xen_map_cache(addr, *size, 1);
1475 } else {
1476 RAMBlock *block;
1477
1478 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1479 if (addr - block->offset < block->length) {
1480 if (addr - block->offset + *size > block->length)
1481 *size = block->length - addr + block->offset;
1482 return block->host + (addr - block->offset);
1483 }
1484 }
1485
1486 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1487 abort();
1488 }
1489 }
1490
1491 /* Some of the softmmu routines need to translate from a host pointer
1492 (typically a TLB entry) back to a ram offset. */
1493 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1494 {
1495 RAMBlock *block;
1496 uint8_t *host = ptr;
1497
1498 if (xen_enabled()) {
1499 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1500 return qemu_get_ram_block(*ram_addr)->mr;
1501 }
1502
1503 block = ram_list.mru_block;
1504 if (block && block->host && host - block->host < block->length) {
1505 goto found;
1506 }
1507
1508 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1509 /* This case append when the block is not mapped. */
1510 if (block->host == NULL) {
1511 continue;
1512 }
1513 if (host - block->host < block->length) {
1514 goto found;
1515 }
1516 }
1517
1518 return NULL;
1519
1520 found:
1521 *ram_addr = block->offset + (host - block->host);
1522 return block->mr;
1523 }
1524
1525 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1526 uint64_t val, unsigned size)
1527 {
1528 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1529 tb_invalidate_phys_page_fast(ram_addr, size);
1530 }
1531 switch (size) {
1532 case 1:
1533 stb_p(qemu_get_ram_ptr(ram_addr), val);
1534 break;
1535 case 2:
1536 stw_p(qemu_get_ram_ptr(ram_addr), val);
1537 break;
1538 case 4:
1539 stl_p(qemu_get_ram_ptr(ram_addr), val);
1540 break;
1541 default:
1542 abort();
1543 }
1544 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1545 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1546 /* we remove the notdirty callback only if the code has been
1547 flushed */
1548 if (!cpu_physical_memory_is_clean(ram_addr)) {
1549 CPUArchState *env = current_cpu->env_ptr;
1550 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1551 }
1552 }
1553
1554 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1555 unsigned size, bool is_write)
1556 {
1557 return is_write;
1558 }
1559
1560 static const MemoryRegionOps notdirty_mem_ops = {
1561 .write = notdirty_mem_write,
1562 .valid.accepts = notdirty_mem_accepts,
1563 .endianness = DEVICE_NATIVE_ENDIAN,
1564 };
1565
1566 /* Generate a debug exception if a watchpoint has been hit. */
1567 static void check_watchpoint(int offset, int len_mask, int flags)
1568 {
1569 CPUState *cpu = current_cpu;
1570 CPUArchState *env = cpu->env_ptr;
1571 target_ulong pc, cs_base;
1572 target_ulong vaddr;
1573 CPUWatchpoint *wp;
1574 int cpu_flags;
1575
1576 if (cpu->watchpoint_hit) {
1577 /* We re-entered the check after replacing the TB. Now raise
1578 * the debug interrupt so that is will trigger after the
1579 * current instruction. */
1580 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1581 return;
1582 }
1583 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1584 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1585 if ((vaddr == (wp->vaddr & len_mask) ||
1586 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1587 wp->flags |= BP_WATCHPOINT_HIT;
1588 if (!cpu->watchpoint_hit) {
1589 cpu->watchpoint_hit = wp;
1590 tb_check_watchpoint(cpu);
1591 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1592 cpu->exception_index = EXCP_DEBUG;
1593 cpu_loop_exit(cpu);
1594 } else {
1595 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1596 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1597 cpu_resume_from_signal(cpu, NULL);
1598 }
1599 }
1600 } else {
1601 wp->flags &= ~BP_WATCHPOINT_HIT;
1602 }
1603 }
1604 }
1605
1606 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1607 so these check for a hit then pass through to the normal out-of-line
1608 phys routines. */
1609 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1610 unsigned size)
1611 {
1612 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1613 switch (size) {
1614 case 1: return ldub_phys(&address_space_memory, addr);
1615 case 2: return lduw_phys(&address_space_memory, addr);
1616 case 4: return ldl_phys(&address_space_memory, addr);
1617 default: abort();
1618 }
1619 }
1620
1621 static void watch_mem_write(void *opaque, hwaddr addr,
1622 uint64_t val, unsigned size)
1623 {
1624 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1625 switch (size) {
1626 case 1:
1627 stb_phys(&address_space_memory, addr, val);
1628 break;
1629 case 2:
1630 stw_phys(&address_space_memory, addr, val);
1631 break;
1632 case 4:
1633 stl_phys(&address_space_memory, addr, val);
1634 break;
1635 default: abort();
1636 }
1637 }
1638
1639 static const MemoryRegionOps watch_mem_ops = {
1640 .read = watch_mem_read,
1641 .write = watch_mem_write,
1642 .endianness = DEVICE_NATIVE_ENDIAN,
1643 };
1644
1645 static uint64_t subpage_read(void *opaque, hwaddr addr,
1646 unsigned len)
1647 {
1648 subpage_t *subpage = opaque;
1649 uint8_t buf[4];
1650
1651 #if defined(DEBUG_SUBPAGE)
1652 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1653 subpage, len, addr);
1654 #endif
1655 address_space_read(subpage->as, addr + subpage->base, buf, len);
1656 switch (len) {
1657 case 1:
1658 return ldub_p(buf);
1659 case 2:
1660 return lduw_p(buf);
1661 case 4:
1662 return ldl_p(buf);
1663 default:
1664 abort();
1665 }
1666 }
1667
1668 static void subpage_write(void *opaque, hwaddr addr,
1669 uint64_t value, unsigned len)
1670 {
1671 subpage_t *subpage = opaque;
1672 uint8_t buf[4];
1673
1674 #if defined(DEBUG_SUBPAGE)
1675 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1676 " value %"PRIx64"\n",
1677 __func__, subpage, len, addr, value);
1678 #endif
1679 switch (len) {
1680 case 1:
1681 stb_p(buf, value);
1682 break;
1683 case 2:
1684 stw_p(buf, value);
1685 break;
1686 case 4:
1687 stl_p(buf, value);
1688 break;
1689 default:
1690 abort();
1691 }
1692 address_space_write(subpage->as, addr + subpage->base, buf, len);
1693 }
1694
1695 static bool subpage_accepts(void *opaque, hwaddr addr,
1696 unsigned len, bool is_write)
1697 {
1698 subpage_t *subpage = opaque;
1699 #if defined(DEBUG_SUBPAGE)
1700 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1701 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1702 #endif
1703
1704 return address_space_access_valid(subpage->as, addr + subpage->base,
1705 len, is_write);
1706 }
1707
1708 static const MemoryRegionOps subpage_ops = {
1709 .read = subpage_read,
1710 .write = subpage_write,
1711 .valid.accepts = subpage_accepts,
1712 .endianness = DEVICE_NATIVE_ENDIAN,
1713 };
1714
1715 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1716 uint16_t section)
1717 {
1718 int idx, eidx;
1719
1720 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1721 return -1;
1722 idx = SUBPAGE_IDX(start);
1723 eidx = SUBPAGE_IDX(end);
1724 #if defined(DEBUG_SUBPAGE)
1725 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1726 __func__, mmio, start, end, idx, eidx, section);
1727 #endif
1728 for (; idx <= eidx; idx++) {
1729 mmio->sub_section[idx] = section;
1730 }
1731
1732 return 0;
1733 }
1734
1735 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1736 {
1737 subpage_t *mmio;
1738
1739 mmio = g_malloc0(sizeof(subpage_t));
1740
1741 mmio->as = as;
1742 mmio->base = base;
1743 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1744 "subpage", TARGET_PAGE_SIZE);
1745 mmio->iomem.subpage = true;
1746 #if defined(DEBUG_SUBPAGE)
1747 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1748 mmio, base, TARGET_PAGE_SIZE);
1749 #endif
1750 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1751
1752 return mmio;
1753 }
1754
1755 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1756 MemoryRegion *mr)
1757 {
1758 assert(as);
1759 MemoryRegionSection section = {
1760 .address_space = as,
1761 .mr = mr,
1762 .offset_within_address_space = 0,
1763 .offset_within_region = 0,
1764 .size = int128_2_64(),
1765 };
1766
1767 return phys_section_add(map, &section);
1768 }
1769
1770 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1771 {
1772 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1773 }
1774
1775 static void io_mem_init(void)
1776 {
1777 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1778 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1779 "unassigned", UINT64_MAX);
1780 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1781 "notdirty", UINT64_MAX);
1782 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1783 "watch", UINT64_MAX);
1784 }
1785
1786 static void mem_begin(MemoryListener *listener)
1787 {
1788 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1789 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1790 uint16_t n;
1791
1792 n = dummy_section(&d->map, as, &io_mem_unassigned);
1793 assert(n == PHYS_SECTION_UNASSIGNED);
1794 n = dummy_section(&d->map, as, &io_mem_notdirty);
1795 assert(n == PHYS_SECTION_NOTDIRTY);
1796 n = dummy_section(&d->map, as, &io_mem_rom);
1797 assert(n == PHYS_SECTION_ROM);
1798 n = dummy_section(&d->map, as, &io_mem_watch);
1799 assert(n == PHYS_SECTION_WATCH);
1800
1801 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1802 d->as = as;
1803 as->next_dispatch = d;
1804 }
1805
1806 static void mem_commit(MemoryListener *listener)
1807 {
1808 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1809 AddressSpaceDispatch *cur = as->dispatch;
1810 AddressSpaceDispatch *next = as->next_dispatch;
1811
1812 phys_page_compact_all(next, next->map.nodes_nb);
1813
1814 as->dispatch = next;
1815
1816 if (cur) {
1817 phys_sections_free(&cur->map);
1818 g_free(cur);
1819 }
1820 }
1821
1822 static void tcg_commit(MemoryListener *listener)
1823 {
1824 CPUState *cpu;
1825
1826 /* since each CPU stores ram addresses in its TLB cache, we must
1827 reset the modified entries */
1828 /* XXX: slow ! */
1829 CPU_FOREACH(cpu) {
1830 /* FIXME: Disentangle the cpu.h circular files deps so we can
1831 directly get the right CPU from listener. */
1832 if (cpu->tcg_as_listener != listener) {
1833 continue;
1834 }
1835 tlb_flush(cpu, 1);
1836 }
1837 }
1838
1839 static void core_log_global_start(MemoryListener *listener)
1840 {
1841 cpu_physical_memory_set_dirty_tracking(true);
1842 }
1843
1844 static void core_log_global_stop(MemoryListener *listener)
1845 {
1846 cpu_physical_memory_set_dirty_tracking(false);
1847 }
1848
1849 static MemoryListener core_memory_listener = {
1850 .log_global_start = core_log_global_start,
1851 .log_global_stop = core_log_global_stop,
1852 .priority = 1,
1853 };
1854
1855 void address_space_init_dispatch(AddressSpace *as)
1856 {
1857 as->dispatch = NULL;
1858 as->dispatch_listener = (MemoryListener) {
1859 .begin = mem_begin,
1860 .commit = mem_commit,
1861 .region_add = mem_add,
1862 .region_nop = mem_add,
1863 .priority = 0,
1864 };
1865 memory_listener_register(&as->dispatch_listener, as);
1866 }
1867
1868 void address_space_destroy_dispatch(AddressSpace *as)
1869 {
1870 AddressSpaceDispatch *d = as->dispatch;
1871
1872 memory_listener_unregister(&as->dispatch_listener);
1873 g_free(d);
1874 as->dispatch = NULL;
1875 }
1876
1877 static void memory_map_init(void)
1878 {
1879 system_memory = g_malloc(sizeof(*system_memory));
1880
1881 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1882 address_space_init(&address_space_memory, system_memory, "memory");
1883
1884 system_io = g_malloc(sizeof(*system_io));
1885 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1886 65536);
1887 address_space_init(&address_space_io, system_io, "I/O");
1888
1889 memory_listener_register(&core_memory_listener, &address_space_memory);
1890 }
1891
1892 MemoryRegion *get_system_memory(void)
1893 {
1894 return system_memory;
1895 }
1896
1897 MemoryRegion *get_system_io(void)
1898 {
1899 return system_io;
1900 }
1901
1902 #endif /* !defined(CONFIG_USER_ONLY) */
1903
1904 /* physical memory access (slow version, mainly for debug) */
1905 #if defined(CONFIG_USER_ONLY)
1906 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1907 uint8_t *buf, int len, int is_write)
1908 {
1909 int l, flags;
1910 target_ulong page;
1911 void * p;
1912
1913 while (len > 0) {
1914 page = addr & TARGET_PAGE_MASK;
1915 l = (page + TARGET_PAGE_SIZE) - addr;
1916 if (l > len)
1917 l = len;
1918 flags = page_get_flags(page);
1919 if (!(flags & PAGE_VALID))
1920 return -1;
1921 if (is_write) {
1922 if (!(flags & PAGE_WRITE))
1923 return -1;
1924 /* XXX: this code should not depend on lock_user */
1925 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1926 return -1;
1927 memcpy(p, buf, l);
1928 unlock_user(p, addr, l);
1929 } else {
1930 if (!(flags & PAGE_READ))
1931 return -1;
1932 /* XXX: this code should not depend on lock_user */
1933 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1934 return -1;
1935 memcpy(buf, p, l);
1936 unlock_user(p, addr, 0);
1937 }
1938 len -= l;
1939 buf += l;
1940 addr += l;
1941 }
1942 return 0;
1943 }
1944
1945 #else
1946
1947 static void invalidate_and_set_dirty(hwaddr addr,
1948 hwaddr length)
1949 {
1950 if (cpu_physical_memory_is_clean(addr)) {
1951 /* invalidate code */
1952 tb_invalidate_phys_page_range(addr, addr + length, 0);
1953 /* set dirty bit */
1954 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1955 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1956 }
1957 xen_modified_memory(addr, length);
1958 }
1959
1960 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1961 {
1962 unsigned access_size_max = mr->ops->valid.max_access_size;
1963
1964 /* Regions are assumed to support 1-4 byte accesses unless
1965 otherwise specified. */
1966 if (access_size_max == 0) {
1967 access_size_max = 4;
1968 }
1969
1970 /* Bound the maximum access by the alignment of the address. */
1971 if (!mr->ops->impl.unaligned) {
1972 unsigned align_size_max = addr & -addr;
1973 if (align_size_max != 0 && align_size_max < access_size_max) {
1974 access_size_max = align_size_max;
1975 }
1976 }
1977
1978 /* Don't attempt accesses larger than the maximum. */
1979 if (l > access_size_max) {
1980 l = access_size_max;
1981 }
1982 if (l & (l - 1)) {
1983 l = 1 << (qemu_fls(l) - 1);
1984 }
1985
1986 return l;
1987 }
1988
1989 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1990 int len, bool is_write)
1991 {
1992 hwaddr l;
1993 uint8_t *ptr;
1994 uint64_t val;
1995 hwaddr addr1;
1996 MemoryRegion *mr;
1997 bool error = false;
1998
1999 while (len > 0) {
2000 l = len;
2001 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2002
2003 if (is_write) {
2004 if (!memory_access_is_direct(mr, is_write)) {
2005 l = memory_access_size(mr, l, addr1);
2006 /* XXX: could force current_cpu to NULL to avoid
2007 potential bugs */
2008 switch (l) {
2009 case 8:
2010 /* 64 bit write access */
2011 val = ldq_p(buf);
2012 error |= io_mem_write(mr, addr1, val, 8);
2013 break;
2014 case 4:
2015 /* 32 bit write access */
2016 val = ldl_p(buf);
2017 error |= io_mem_write(mr, addr1, val, 4);
2018 break;
2019 case 2:
2020 /* 16 bit write access */
2021 val = lduw_p(buf);
2022 error |= io_mem_write(mr, addr1, val, 2);
2023 break;
2024 case 1:
2025 /* 8 bit write access */
2026 val = ldub_p(buf);
2027 error |= io_mem_write(mr, addr1, val, 1);
2028 break;
2029 default:
2030 abort();
2031 }
2032 } else {
2033 addr1 += memory_region_get_ram_addr(mr);
2034 /* RAM case */
2035 ptr = qemu_get_ram_ptr(addr1);
2036 memcpy(ptr, buf, l);
2037 invalidate_and_set_dirty(addr1, l);
2038 }
2039 } else {
2040 if (!memory_access_is_direct(mr, is_write)) {
2041 /* I/O case */
2042 l = memory_access_size(mr, l, addr1);
2043 switch (l) {
2044 case 8:
2045 /* 64 bit read access */
2046 error |= io_mem_read(mr, addr1, &val, 8);
2047 stq_p(buf, val);
2048 break;
2049 case 4:
2050 /* 32 bit read access */
2051 error |= io_mem_read(mr, addr1, &val, 4);
2052 stl_p(buf, val);
2053 break;
2054 case 2:
2055 /* 16 bit read access */
2056 error |= io_mem_read(mr, addr1, &val, 2);
2057 stw_p(buf, val);
2058 break;
2059 case 1:
2060 /* 8 bit read access */
2061 error |= io_mem_read(mr, addr1, &val, 1);
2062 stb_p(buf, val);
2063 break;
2064 default:
2065 abort();
2066 }
2067 } else {
2068 /* RAM case */
2069 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2070 memcpy(buf, ptr, l);
2071 }
2072 }
2073 len -= l;
2074 buf += l;
2075 addr += l;
2076 }
2077
2078 return error;
2079 }
2080
2081 bool address_space_write(AddressSpace *as, hwaddr addr,
2082 const uint8_t *buf, int len)
2083 {
2084 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2085 }
2086
2087 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2088 {
2089 return address_space_rw(as, addr, buf, len, false);
2090 }
2091
2092
2093 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2094 int len, int is_write)
2095 {
2096 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2097 }
2098
2099 enum write_rom_type {
2100 WRITE_DATA,
2101 FLUSH_CACHE,
2102 };
2103
2104 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2105 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2106 {
2107 hwaddr l;
2108 uint8_t *ptr;
2109 hwaddr addr1;
2110 MemoryRegion *mr;
2111
2112 while (len > 0) {
2113 l = len;
2114 mr = address_space_translate(as, addr, &addr1, &l, true);
2115
2116 if (!(memory_region_is_ram(mr) ||
2117 memory_region_is_romd(mr))) {
2118 /* do nothing */
2119 } else {
2120 addr1 += memory_region_get_ram_addr(mr);
2121 /* ROM/RAM case */
2122 ptr = qemu_get_ram_ptr(addr1);
2123 switch (type) {
2124 case WRITE_DATA:
2125 memcpy(ptr, buf, l);
2126 invalidate_and_set_dirty(addr1, l);
2127 break;
2128 case FLUSH_CACHE:
2129 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2130 break;
2131 }
2132 }
2133 len -= l;
2134 buf += l;
2135 addr += l;
2136 }
2137 }
2138
2139 /* used for ROM loading : can write in RAM and ROM */
2140 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2141 const uint8_t *buf, int len)
2142 {
2143 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2144 }
2145
2146 void cpu_flush_icache_range(hwaddr start, int len)
2147 {
2148 /*
2149 * This function should do the same thing as an icache flush that was
2150 * triggered from within the guest. For TCG we are always cache coherent,
2151 * so there is no need to flush anything. For KVM / Xen we need to flush
2152 * the host's instruction cache at least.
2153 */
2154 if (tcg_enabled()) {
2155 return;
2156 }
2157
2158 cpu_physical_memory_write_rom_internal(&address_space_memory,
2159 start, NULL, len, FLUSH_CACHE);
2160 }
2161
2162 typedef struct {
2163 MemoryRegion *mr;
2164 void *buffer;
2165 hwaddr addr;
2166 hwaddr len;
2167 } BounceBuffer;
2168
2169 static BounceBuffer bounce;
2170
2171 typedef struct MapClient {
2172 void *opaque;
2173 void (*callback)(void *opaque);
2174 QLIST_ENTRY(MapClient) link;
2175 } MapClient;
2176
2177 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2178 = QLIST_HEAD_INITIALIZER(map_client_list);
2179
2180 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2181 {
2182 MapClient *client = g_malloc(sizeof(*client));
2183
2184 client->opaque = opaque;
2185 client->callback = callback;
2186 QLIST_INSERT_HEAD(&map_client_list, client, link);
2187 return client;
2188 }
2189
2190 static void cpu_unregister_map_client(void *_client)
2191 {
2192 MapClient *client = (MapClient *)_client;
2193
2194 QLIST_REMOVE(client, link);
2195 g_free(client);
2196 }
2197
2198 static void cpu_notify_map_clients(void)
2199 {
2200 MapClient *client;
2201
2202 while (!QLIST_EMPTY(&map_client_list)) {
2203 client = QLIST_FIRST(&map_client_list);
2204 client->callback(client->opaque);
2205 cpu_unregister_map_client(client);
2206 }
2207 }
2208
2209 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2210 {
2211 MemoryRegion *mr;
2212 hwaddr l, xlat;
2213
2214 while (len > 0) {
2215 l = len;
2216 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2217 if (!memory_access_is_direct(mr, is_write)) {
2218 l = memory_access_size(mr, l, addr);
2219 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2220 return false;
2221 }
2222 }
2223
2224 len -= l;
2225 addr += l;
2226 }
2227 return true;
2228 }
2229
2230 /* Map a physical memory region into a host virtual address.
2231 * May map a subset of the requested range, given by and returned in *plen.
2232 * May return NULL if resources needed to perform the mapping are exhausted.
2233 * Use only for reads OR writes - not for read-modify-write operations.
2234 * Use cpu_register_map_client() to know when retrying the map operation is
2235 * likely to succeed.
2236 */
2237 void *address_space_map(AddressSpace *as,
2238 hwaddr addr,
2239 hwaddr *plen,
2240 bool is_write)
2241 {
2242 hwaddr len = *plen;
2243 hwaddr done = 0;
2244 hwaddr l, xlat, base;
2245 MemoryRegion *mr, *this_mr;
2246 ram_addr_t raddr;
2247
2248 if (len == 0) {
2249 return NULL;
2250 }
2251
2252 l = len;
2253 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2254 if (!memory_access_is_direct(mr, is_write)) {
2255 if (bounce.buffer) {
2256 return NULL;
2257 }
2258 /* Avoid unbounded allocations */
2259 l = MIN(l, TARGET_PAGE_SIZE);
2260 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2261 bounce.addr = addr;
2262 bounce.len = l;
2263
2264 memory_region_ref(mr);
2265 bounce.mr = mr;
2266 if (!is_write) {
2267 address_space_read(as, addr, bounce.buffer, l);
2268 }
2269
2270 *plen = l;
2271 return bounce.buffer;
2272 }
2273
2274 base = xlat;
2275 raddr = memory_region_get_ram_addr(mr);
2276
2277 for (;;) {
2278 len -= l;
2279 addr += l;
2280 done += l;
2281 if (len == 0) {
2282 break;
2283 }
2284
2285 l = len;
2286 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2287 if (this_mr != mr || xlat != base + done) {
2288 break;
2289 }
2290 }
2291
2292 memory_region_ref(mr);
2293 *plen = done;
2294 return qemu_ram_ptr_length(raddr + base, plen);
2295 }
2296
2297 /* Unmaps a memory region previously mapped by address_space_map().
2298 * Will also mark the memory as dirty if is_write == 1. access_len gives
2299 * the amount of memory that was actually read or written by the caller.
2300 */
2301 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2302 int is_write, hwaddr access_len)
2303 {
2304 if (buffer != bounce.buffer) {
2305 MemoryRegion *mr;
2306 ram_addr_t addr1;
2307
2308 mr = qemu_ram_addr_from_host(buffer, &addr1);
2309 assert(mr != NULL);
2310 if (is_write) {
2311 while (access_len) {
2312 unsigned l;
2313 l = TARGET_PAGE_SIZE;
2314 if (l > access_len)
2315 l = access_len;
2316 invalidate_and_set_dirty(addr1, l);
2317 addr1 += l;
2318 access_len -= l;
2319 }
2320 }
2321 if (xen_enabled()) {
2322 xen_invalidate_map_cache_entry(buffer);
2323 }
2324 memory_region_unref(mr);
2325 return;
2326 }
2327 if (is_write) {
2328 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2329 }
2330 qemu_vfree(bounce.buffer);
2331 bounce.buffer = NULL;
2332 memory_region_unref(bounce.mr);
2333 cpu_notify_map_clients();
2334 }
2335
2336 void *cpu_physical_memory_map(hwaddr addr,
2337 hwaddr *plen,
2338 int is_write)
2339 {
2340 return address_space_map(&address_space_memory, addr, plen, is_write);
2341 }
2342
2343 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2344 int is_write, hwaddr access_len)
2345 {
2346 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2347 }
2348
2349 /* warning: addr must be aligned */
2350 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2351 enum device_endian endian)
2352 {
2353 uint8_t *ptr;
2354 uint64_t val;
2355 MemoryRegion *mr;
2356 hwaddr l = 4;
2357 hwaddr addr1;
2358
2359 mr = address_space_translate(as, addr, &addr1, &l, false);
2360 if (l < 4 || !memory_access_is_direct(mr, false)) {
2361 /* I/O case */
2362 io_mem_read(mr, addr1, &val, 4);
2363 #if defined(TARGET_WORDS_BIGENDIAN)
2364 if (endian == DEVICE_LITTLE_ENDIAN) {
2365 val = bswap32(val);
2366 }
2367 #else
2368 if (endian == DEVICE_BIG_ENDIAN) {
2369 val = bswap32(val);
2370 }
2371 #endif
2372 } else {
2373 /* RAM case */
2374 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2375 & TARGET_PAGE_MASK)
2376 + addr1);
2377 switch (endian) {
2378 case DEVICE_LITTLE_ENDIAN:
2379 val = ldl_le_p(ptr);
2380 break;
2381 case DEVICE_BIG_ENDIAN:
2382 val = ldl_be_p(ptr);
2383 break;
2384 default:
2385 val = ldl_p(ptr);
2386 break;
2387 }
2388 }
2389 return val;
2390 }
2391
2392 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2393 {
2394 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2395 }
2396
2397 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2398 {
2399 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2400 }
2401
2402 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2403 {
2404 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2405 }
2406
2407 /* warning: addr must be aligned */
2408 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2409 enum device_endian endian)
2410 {
2411 uint8_t *ptr;
2412 uint64_t val;
2413 MemoryRegion *mr;
2414 hwaddr l = 8;
2415 hwaddr addr1;
2416
2417 mr = address_space_translate(as, addr, &addr1, &l,
2418 false);
2419 if (l < 8 || !memory_access_is_direct(mr, false)) {
2420 /* I/O case */
2421 io_mem_read(mr, addr1, &val, 8);
2422 #if defined(TARGET_WORDS_BIGENDIAN)
2423 if (endian == DEVICE_LITTLE_ENDIAN) {
2424 val = bswap64(val);
2425 }
2426 #else
2427 if (endian == DEVICE_BIG_ENDIAN) {
2428 val = bswap64(val);
2429 }
2430 #endif
2431 } else {
2432 /* RAM case */
2433 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2434 & TARGET_PAGE_MASK)
2435 + addr1);
2436 switch (endian) {
2437 case DEVICE_LITTLE_ENDIAN:
2438 val = ldq_le_p(ptr);
2439 break;
2440 case DEVICE_BIG_ENDIAN:
2441 val = ldq_be_p(ptr);
2442 break;
2443 default:
2444 val = ldq_p(ptr);
2445 break;
2446 }
2447 }
2448 return val;
2449 }
2450
2451 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2452 {
2453 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2454 }
2455
2456 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2457 {
2458 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2459 }
2460
2461 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2462 {
2463 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2464 }
2465
2466 /* XXX: optimize */
2467 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2468 {
2469 uint8_t val;
2470 address_space_rw(as, addr, &val, 1, 0);
2471 return val;
2472 }
2473
2474 /* warning: addr must be aligned */
2475 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2476 enum device_endian endian)
2477 {
2478 uint8_t *ptr;
2479 uint64_t val;
2480 MemoryRegion *mr;
2481 hwaddr l = 2;
2482 hwaddr addr1;
2483
2484 mr = address_space_translate(as, addr, &addr1, &l,
2485 false);
2486 if (l < 2 || !memory_access_is_direct(mr, false)) {
2487 /* I/O case */
2488 io_mem_read(mr, addr1, &val, 2);
2489 #if defined(TARGET_WORDS_BIGENDIAN)
2490 if (endian == DEVICE_LITTLE_ENDIAN) {
2491 val = bswap16(val);
2492 }
2493 #else
2494 if (endian == DEVICE_BIG_ENDIAN) {
2495 val = bswap16(val);
2496 }
2497 #endif
2498 } else {
2499 /* RAM case */
2500 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2501 & TARGET_PAGE_MASK)
2502 + addr1);
2503 switch (endian) {
2504 case DEVICE_LITTLE_ENDIAN:
2505 val = lduw_le_p(ptr);
2506 break;
2507 case DEVICE_BIG_ENDIAN:
2508 val = lduw_be_p(ptr);
2509 break;
2510 default:
2511 val = lduw_p(ptr);
2512 break;
2513 }
2514 }
2515 return val;
2516 }
2517
2518 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2519 {
2520 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2521 }
2522
2523 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2524 {
2525 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2526 }
2527
2528 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2529 {
2530 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2531 }
2532
2533 /* warning: addr must be aligned. The ram page is not masked as dirty
2534 and the code inside is not invalidated. It is useful if the dirty
2535 bits are used to track modified PTEs */
2536 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2537 {
2538 uint8_t *ptr;
2539 MemoryRegion *mr;
2540 hwaddr l = 4;
2541 hwaddr addr1;
2542
2543 mr = address_space_translate(as, addr, &addr1, &l,
2544 true);
2545 if (l < 4 || !memory_access_is_direct(mr, true)) {
2546 io_mem_write(mr, addr1, val, 4);
2547 } else {
2548 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2549 ptr = qemu_get_ram_ptr(addr1);
2550 stl_p(ptr, val);
2551
2552 if (unlikely(in_migration)) {
2553 if (cpu_physical_memory_is_clean(addr1)) {
2554 /* invalidate code */
2555 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2556 /* set dirty bit */
2557 cpu_physical_memory_set_dirty_flag(addr1,
2558 DIRTY_MEMORY_MIGRATION);
2559 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2560 }
2561 }
2562 }
2563 }
2564
2565 /* warning: addr must be aligned */
2566 static inline void stl_phys_internal(AddressSpace *as,
2567 hwaddr addr, uint32_t val,
2568 enum device_endian endian)
2569 {
2570 uint8_t *ptr;
2571 MemoryRegion *mr;
2572 hwaddr l = 4;
2573 hwaddr addr1;
2574
2575 mr = address_space_translate(as, addr, &addr1, &l,
2576 true);
2577 if (l < 4 || !memory_access_is_direct(mr, true)) {
2578 #if defined(TARGET_WORDS_BIGENDIAN)
2579 if (endian == DEVICE_LITTLE_ENDIAN) {
2580 val = bswap32(val);
2581 }
2582 #else
2583 if (endian == DEVICE_BIG_ENDIAN) {
2584 val = bswap32(val);
2585 }
2586 #endif
2587 io_mem_write(mr, addr1, val, 4);
2588 } else {
2589 /* RAM case */
2590 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2591 ptr = qemu_get_ram_ptr(addr1);
2592 switch (endian) {
2593 case DEVICE_LITTLE_ENDIAN:
2594 stl_le_p(ptr, val);
2595 break;
2596 case DEVICE_BIG_ENDIAN:
2597 stl_be_p(ptr, val);
2598 break;
2599 default:
2600 stl_p(ptr, val);
2601 break;
2602 }
2603 invalidate_and_set_dirty(addr1, 4);
2604 }
2605 }
2606
2607 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2608 {
2609 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2610 }
2611
2612 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2613 {
2614 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2615 }
2616
2617 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2618 {
2619 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2620 }
2621
2622 /* XXX: optimize */
2623 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2624 {
2625 uint8_t v = val;
2626 address_space_rw(as, addr, &v, 1, 1);
2627 }
2628
2629 /* warning: addr must be aligned */
2630 static inline void stw_phys_internal(AddressSpace *as,
2631 hwaddr addr, uint32_t val,
2632 enum device_endian endian)
2633 {
2634 uint8_t *ptr;
2635 MemoryRegion *mr;
2636 hwaddr l = 2;
2637 hwaddr addr1;
2638
2639 mr = address_space_translate(as, addr, &addr1, &l, true);
2640 if (l < 2 || !memory_access_is_direct(mr, true)) {
2641 #if defined(TARGET_WORDS_BIGENDIAN)
2642 if (endian == DEVICE_LITTLE_ENDIAN) {
2643 val = bswap16(val);
2644 }
2645 #else
2646 if (endian == DEVICE_BIG_ENDIAN) {
2647 val = bswap16(val);
2648 }
2649 #endif
2650 io_mem_write(mr, addr1, val, 2);
2651 } else {
2652 /* RAM case */
2653 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2654 ptr = qemu_get_ram_ptr(addr1);
2655 switch (endian) {
2656 case DEVICE_LITTLE_ENDIAN:
2657 stw_le_p(ptr, val);
2658 break;
2659 case DEVICE_BIG_ENDIAN:
2660 stw_be_p(ptr, val);
2661 break;
2662 default:
2663 stw_p(ptr, val);
2664 break;
2665 }
2666 invalidate_and_set_dirty(addr1, 2);
2667 }
2668 }
2669
2670 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2671 {
2672 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2673 }
2674
2675 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2676 {
2677 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2678 }
2679
2680 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2681 {
2682 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2683 }
2684
2685 /* XXX: optimize */
2686 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2687 {
2688 val = tswap64(val);
2689 address_space_rw(as, addr, (void *) &val, 8, 1);
2690 }
2691
2692 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2693 {
2694 val = cpu_to_le64(val);
2695 address_space_rw(as, addr, (void *) &val, 8, 1);
2696 }
2697
2698 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2699 {
2700 val = cpu_to_be64(val);
2701 address_space_rw(as, addr, (void *) &val, 8, 1);
2702 }
2703
2704 /* virtual memory access for debug (includes writing to ROM) */
2705 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2706 uint8_t *buf, int len, int is_write)
2707 {
2708 int l;
2709 hwaddr phys_addr;
2710 target_ulong page;
2711
2712 while (len > 0) {
2713 page = addr & TARGET_PAGE_MASK;
2714 phys_addr = cpu_get_phys_page_debug(cpu, page);
2715 /* if no physical page mapped, return an error */
2716 if (phys_addr == -1)
2717 return -1;
2718 l = (page + TARGET_PAGE_SIZE) - addr;
2719 if (l > len)
2720 l = len;
2721 phys_addr += (addr & ~TARGET_PAGE_MASK);
2722 if (is_write) {
2723 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2724 } else {
2725 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2726 }
2727 len -= l;
2728 buf += l;
2729 addr += l;
2730 }
2731 return 0;
2732 }
2733 #endif
2734
2735 #if !defined(CONFIG_USER_ONLY)
2736
2737 /*
2738 * A helper function for the _utterly broken_ virtio device model to find out if
2739 * it's running on a big endian machine. Don't do this at home kids!
2740 */
2741 bool virtio_is_big_endian(void);
2742 bool virtio_is_big_endian(void)
2743 {
2744 #if defined(TARGET_WORDS_BIGENDIAN)
2745 return true;
2746 #else
2747 return false;
2748 #endif
2749 }
2750
2751 #endif
2752
2753 #ifndef CONFIG_USER_ONLY
2754 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2755 {
2756 MemoryRegion*mr;
2757 hwaddr l = 1;
2758
2759 mr = address_space_translate(&address_space_memory,
2760 phys_addr, &phys_addr, &l, false);
2761
2762 return !(memory_region_is_ram(mr) ||
2763 memory_region_is_romd(mr));
2764 }
2765
2766 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2767 {
2768 RAMBlock *block;
2769
2770 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2771 func(block->host, block->offset, block->length, opaque);
2772 }
2773 }
2774 #endif