hostmem: allow preallocation of any memory region
[qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
74 #define RAM_PREALLOC (1 << 0)
75
76 #endif
77
78 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
79 /* current CPU in the current thread. It is only valid inside
80 cpu_exec() */
81 DEFINE_TLS(CPUState *, current_cpu);
82 /* 0 = Do not count executed instructions.
83 1 = Precise instruction counting.
84 2 = Adaptive rate instruction counting. */
85 int use_icount;
86
87 #if !defined(CONFIG_USER_ONLY)
88
89 typedef struct PhysPageEntry PhysPageEntry;
90
91 struct PhysPageEntry {
92 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
93 uint32_t skip : 6;
94 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
95 uint32_t ptr : 26;
96 };
97
98 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
99
100 /* Size of the L2 (and L3, etc) page tables. */
101 #define ADDR_SPACE_BITS 64
102
103 #define P_L2_BITS 9
104 #define P_L2_SIZE (1 << P_L2_BITS)
105
106 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
107
108 typedef PhysPageEntry Node[P_L2_SIZE];
109
110 typedef struct PhysPageMap {
111 unsigned sections_nb;
112 unsigned sections_nb_alloc;
113 unsigned nodes_nb;
114 unsigned nodes_nb_alloc;
115 Node *nodes;
116 MemoryRegionSection *sections;
117 } PhysPageMap;
118
119 struct AddressSpaceDispatch {
120 /* This is a multi-level map on the physical address space.
121 * The bottom level has pointers to MemoryRegionSections.
122 */
123 PhysPageEntry phys_map;
124 PhysPageMap map;
125 AddressSpace *as;
126 };
127
128 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
129 typedef struct subpage_t {
130 MemoryRegion iomem;
131 AddressSpace *as;
132 hwaddr base;
133 uint16_t sub_section[TARGET_PAGE_SIZE];
134 } subpage_t;
135
136 #define PHYS_SECTION_UNASSIGNED 0
137 #define PHYS_SECTION_NOTDIRTY 1
138 #define PHYS_SECTION_ROM 2
139 #define PHYS_SECTION_WATCH 3
140
141 static void io_mem_init(void);
142 static void memory_map_init(void);
143 static void tcg_commit(MemoryListener *listener);
144
145 static MemoryRegion io_mem_watch;
146 #endif
147
148 #if !defined(CONFIG_USER_ONLY)
149
150 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
151 {
152 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
153 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
154 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
155 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 }
157 }
158
159 static uint32_t phys_map_node_alloc(PhysPageMap *map)
160 {
161 unsigned i;
162 uint32_t ret;
163
164 ret = map->nodes_nb++;
165 assert(ret != PHYS_MAP_NODE_NIL);
166 assert(ret != map->nodes_nb_alloc);
167 for (i = 0; i < P_L2_SIZE; ++i) {
168 map->nodes[ret][i].skip = 1;
169 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
170 }
171 return ret;
172 }
173
174 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
175 hwaddr *index, hwaddr *nb, uint16_t leaf,
176 int level)
177 {
178 PhysPageEntry *p;
179 int i;
180 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
181
182 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
183 lp->ptr = phys_map_node_alloc(map);
184 p = map->nodes[lp->ptr];
185 if (level == 0) {
186 for (i = 0; i < P_L2_SIZE; i++) {
187 p[i].skip = 0;
188 p[i].ptr = PHYS_SECTION_UNASSIGNED;
189 }
190 }
191 } else {
192 p = map->nodes[lp->ptr];
193 }
194 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
195
196 while (*nb && lp < &p[P_L2_SIZE]) {
197 if ((*index & (step - 1)) == 0 && *nb >= step) {
198 lp->skip = 0;
199 lp->ptr = leaf;
200 *index += step;
201 *nb -= step;
202 } else {
203 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
204 }
205 ++lp;
206 }
207 }
208
209 static void phys_page_set(AddressSpaceDispatch *d,
210 hwaddr index, hwaddr nb,
211 uint16_t leaf)
212 {
213 /* Wildly overreserve - it doesn't matter much. */
214 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
215
216 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
217 }
218
219 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
220 * and update our entry so we can skip it and go directly to the destination.
221 */
222 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
223 {
224 unsigned valid_ptr = P_L2_SIZE;
225 int valid = 0;
226 PhysPageEntry *p;
227 int i;
228
229 if (lp->ptr == PHYS_MAP_NODE_NIL) {
230 return;
231 }
232
233 p = nodes[lp->ptr];
234 for (i = 0; i < P_L2_SIZE; i++) {
235 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
236 continue;
237 }
238
239 valid_ptr = i;
240 valid++;
241 if (p[i].skip) {
242 phys_page_compact(&p[i], nodes, compacted);
243 }
244 }
245
246 /* We can only compress if there's only one child. */
247 if (valid != 1) {
248 return;
249 }
250
251 assert(valid_ptr < P_L2_SIZE);
252
253 /* Don't compress if it won't fit in the # of bits we have. */
254 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
255 return;
256 }
257
258 lp->ptr = p[valid_ptr].ptr;
259 if (!p[valid_ptr].skip) {
260 /* If our only child is a leaf, make this a leaf. */
261 /* By design, we should have made this node a leaf to begin with so we
262 * should never reach here.
263 * But since it's so simple to handle this, let's do it just in case we
264 * change this rule.
265 */
266 lp->skip = 0;
267 } else {
268 lp->skip += p[valid_ptr].skip;
269 }
270 }
271
272 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
273 {
274 DECLARE_BITMAP(compacted, nodes_nb);
275
276 if (d->phys_map.skip) {
277 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 }
279 }
280
281 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
282 Node *nodes, MemoryRegionSection *sections)
283 {
284 PhysPageEntry *p;
285 hwaddr index = addr >> TARGET_PAGE_BITS;
286 int i;
287
288 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
289 if (lp.ptr == PHYS_MAP_NODE_NIL) {
290 return &sections[PHYS_SECTION_UNASSIGNED];
291 }
292 p = nodes[lp.ptr];
293 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
294 }
295
296 if (sections[lp.ptr].size.hi ||
297 range_covers_byte(sections[lp.ptr].offset_within_address_space,
298 sections[lp.ptr].size.lo, addr)) {
299 return &sections[lp.ptr];
300 } else {
301 return &sections[PHYS_SECTION_UNASSIGNED];
302 }
303 }
304
305 bool memory_region_is_unassigned(MemoryRegion *mr)
306 {
307 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
308 && mr != &io_mem_watch;
309 }
310
311 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
312 hwaddr addr,
313 bool resolve_subpage)
314 {
315 MemoryRegionSection *section;
316 subpage_t *subpage;
317
318 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
319 if (resolve_subpage && section->mr->subpage) {
320 subpage = container_of(section->mr, subpage_t, iomem);
321 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
322 }
323 return section;
324 }
325
326 static MemoryRegionSection *
327 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
328 hwaddr *plen, bool resolve_subpage)
329 {
330 MemoryRegionSection *section;
331 Int128 diff;
332
333 section = address_space_lookup_region(d, addr, resolve_subpage);
334 /* Compute offset within MemoryRegionSection */
335 addr -= section->offset_within_address_space;
336
337 /* Compute offset within MemoryRegion */
338 *xlat = addr + section->offset_within_region;
339
340 diff = int128_sub(section->mr->size, int128_make64(addr));
341 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
342 return section;
343 }
344
345 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
346 {
347 if (memory_region_is_ram(mr)) {
348 return !(is_write && mr->readonly);
349 }
350 if (memory_region_is_romd(mr)) {
351 return !is_write;
352 }
353
354 return false;
355 }
356
357 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
358 hwaddr *xlat, hwaddr *plen,
359 bool is_write)
360 {
361 IOMMUTLBEntry iotlb;
362 MemoryRegionSection *section;
363 MemoryRegion *mr;
364 hwaddr len = *plen;
365
366 for (;;) {
367 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
368 mr = section->mr;
369
370 if (!mr->iommu_ops) {
371 break;
372 }
373
374 iotlb = mr->iommu_ops->translate(mr, addr);
375 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
376 | (addr & iotlb.addr_mask));
377 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
378 if (!(iotlb.perm & (1 << is_write))) {
379 mr = &io_mem_unassigned;
380 break;
381 }
382
383 as = iotlb.target_as;
384 }
385
386 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
387 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
388 len = MIN(page, len);
389 }
390
391 *plen = len;
392 *xlat = addr;
393 return mr;
394 }
395
396 MemoryRegionSection *
397 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
398 hwaddr *plen)
399 {
400 MemoryRegionSection *section;
401 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
402
403 assert(!section->mr->iommu_ops);
404 return section;
405 }
406 #endif
407
408 void cpu_exec_init_all(void)
409 {
410 #if !defined(CONFIG_USER_ONLY)
411 qemu_mutex_init(&ram_list.mutex);
412 memory_map_init();
413 io_mem_init();
414 #endif
415 }
416
417 #if !defined(CONFIG_USER_ONLY)
418
419 static int cpu_common_post_load(void *opaque, int version_id)
420 {
421 CPUState *cpu = opaque;
422
423 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
424 version_id is increased. */
425 cpu->interrupt_request &= ~0x01;
426 tlb_flush(cpu, 1);
427
428 return 0;
429 }
430
431 const VMStateDescription vmstate_cpu_common = {
432 .name = "cpu_common",
433 .version_id = 1,
434 .minimum_version_id = 1,
435 .post_load = cpu_common_post_load,
436 .fields = (VMStateField[]) {
437 VMSTATE_UINT32(halted, CPUState),
438 VMSTATE_UINT32(interrupt_request, CPUState),
439 VMSTATE_END_OF_LIST()
440 }
441 };
442
443 #endif
444
445 CPUState *qemu_get_cpu(int index)
446 {
447 CPUState *cpu;
448
449 CPU_FOREACH(cpu) {
450 if (cpu->cpu_index == index) {
451 return cpu;
452 }
453 }
454
455 return NULL;
456 }
457
458 #if !defined(CONFIG_USER_ONLY)
459 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
460 {
461 /* We only support one address space per cpu at the moment. */
462 assert(cpu->as == as);
463
464 if (cpu->tcg_as_listener) {
465 memory_listener_unregister(cpu->tcg_as_listener);
466 } else {
467 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
468 }
469 cpu->tcg_as_listener->commit = tcg_commit;
470 memory_listener_register(cpu->tcg_as_listener, as);
471 }
472 #endif
473
474 void cpu_exec_init(CPUArchState *env)
475 {
476 CPUState *cpu = ENV_GET_CPU(env);
477 CPUClass *cc = CPU_GET_CLASS(cpu);
478 CPUState *some_cpu;
479 int cpu_index;
480
481 #if defined(CONFIG_USER_ONLY)
482 cpu_list_lock();
483 #endif
484 cpu_index = 0;
485 CPU_FOREACH(some_cpu) {
486 cpu_index++;
487 }
488 cpu->cpu_index = cpu_index;
489 cpu->numa_node = 0;
490 QTAILQ_INIT(&cpu->breakpoints);
491 QTAILQ_INIT(&cpu->watchpoints);
492 #ifndef CONFIG_USER_ONLY
493 cpu->as = &address_space_memory;
494 cpu->thread_id = qemu_get_thread_id();
495 #endif
496 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
497 #if defined(CONFIG_USER_ONLY)
498 cpu_list_unlock();
499 #endif
500 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
501 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
502 }
503 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
504 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
505 cpu_save, cpu_load, env);
506 assert(cc->vmsd == NULL);
507 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
508 #endif
509 if (cc->vmsd != NULL) {
510 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
511 }
512 }
513
514 #if defined(TARGET_HAS_ICE)
515 #if defined(CONFIG_USER_ONLY)
516 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
517 {
518 tb_invalidate_phys_page_range(pc, pc + 1, 0);
519 }
520 #else
521 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
522 {
523 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
524 if (phys != -1) {
525 tb_invalidate_phys_addr(cpu->as,
526 phys | (pc & ~TARGET_PAGE_MASK));
527 }
528 }
529 #endif
530 #endif /* TARGET_HAS_ICE */
531
532 #if defined(CONFIG_USER_ONLY)
533 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
534
535 {
536 }
537
538 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
539 int flags, CPUWatchpoint **watchpoint)
540 {
541 return -ENOSYS;
542 }
543 #else
544 /* Add a watchpoint. */
545 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
546 int flags, CPUWatchpoint **watchpoint)
547 {
548 vaddr len_mask = ~(len - 1);
549 CPUWatchpoint *wp;
550
551 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
552 if ((len & (len - 1)) || (addr & ~len_mask) ||
553 len == 0 || len > TARGET_PAGE_SIZE) {
554 error_report("tried to set invalid watchpoint at %"
555 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
556 return -EINVAL;
557 }
558 wp = g_malloc(sizeof(*wp));
559
560 wp->vaddr = addr;
561 wp->len_mask = len_mask;
562 wp->flags = flags;
563
564 /* keep all GDB-injected watchpoints in front */
565 if (flags & BP_GDB) {
566 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
567 } else {
568 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
569 }
570
571 tlb_flush_page(cpu, addr);
572
573 if (watchpoint)
574 *watchpoint = wp;
575 return 0;
576 }
577
578 /* Remove a specific watchpoint. */
579 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
580 int flags)
581 {
582 vaddr len_mask = ~(len - 1);
583 CPUWatchpoint *wp;
584
585 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
586 if (addr == wp->vaddr && len_mask == wp->len_mask
587 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
588 cpu_watchpoint_remove_by_ref(cpu, wp);
589 return 0;
590 }
591 }
592 return -ENOENT;
593 }
594
595 /* Remove a specific watchpoint by reference. */
596 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
597 {
598 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
599
600 tlb_flush_page(cpu, watchpoint->vaddr);
601
602 g_free(watchpoint);
603 }
604
605 /* Remove all matching watchpoints. */
606 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
607 {
608 CPUWatchpoint *wp, *next;
609
610 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
611 if (wp->flags & mask) {
612 cpu_watchpoint_remove_by_ref(cpu, wp);
613 }
614 }
615 }
616 #endif
617
618 /* Add a breakpoint. */
619 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
620 CPUBreakpoint **breakpoint)
621 {
622 #if defined(TARGET_HAS_ICE)
623 CPUBreakpoint *bp;
624
625 bp = g_malloc(sizeof(*bp));
626
627 bp->pc = pc;
628 bp->flags = flags;
629
630 /* keep all GDB-injected breakpoints in front */
631 if (flags & BP_GDB) {
632 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
633 } else {
634 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
635 }
636
637 breakpoint_invalidate(cpu, pc);
638
639 if (breakpoint) {
640 *breakpoint = bp;
641 }
642 return 0;
643 #else
644 return -ENOSYS;
645 #endif
646 }
647
648 /* Remove a specific breakpoint. */
649 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
650 {
651 #if defined(TARGET_HAS_ICE)
652 CPUBreakpoint *bp;
653
654 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
655 if (bp->pc == pc && bp->flags == flags) {
656 cpu_breakpoint_remove_by_ref(cpu, bp);
657 return 0;
658 }
659 }
660 return -ENOENT;
661 #else
662 return -ENOSYS;
663 #endif
664 }
665
666 /* Remove a specific breakpoint by reference. */
667 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
668 {
669 #if defined(TARGET_HAS_ICE)
670 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
671
672 breakpoint_invalidate(cpu, breakpoint->pc);
673
674 g_free(breakpoint);
675 #endif
676 }
677
678 /* Remove all matching breakpoints. */
679 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
680 {
681 #if defined(TARGET_HAS_ICE)
682 CPUBreakpoint *bp, *next;
683
684 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
685 if (bp->flags & mask) {
686 cpu_breakpoint_remove_by_ref(cpu, bp);
687 }
688 }
689 #endif
690 }
691
692 /* enable or disable single step mode. EXCP_DEBUG is returned by the
693 CPU loop after each instruction */
694 void cpu_single_step(CPUState *cpu, int enabled)
695 {
696 #if defined(TARGET_HAS_ICE)
697 if (cpu->singlestep_enabled != enabled) {
698 cpu->singlestep_enabled = enabled;
699 if (kvm_enabled()) {
700 kvm_update_guest_debug(cpu, 0);
701 } else {
702 /* must flush all the translated code to avoid inconsistencies */
703 /* XXX: only flush what is necessary */
704 CPUArchState *env = cpu->env_ptr;
705 tb_flush(env);
706 }
707 }
708 #endif
709 }
710
711 void cpu_abort(CPUState *cpu, const char *fmt, ...)
712 {
713 va_list ap;
714 va_list ap2;
715
716 va_start(ap, fmt);
717 va_copy(ap2, ap);
718 fprintf(stderr, "qemu: fatal: ");
719 vfprintf(stderr, fmt, ap);
720 fprintf(stderr, "\n");
721 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
722 if (qemu_log_enabled()) {
723 qemu_log("qemu: fatal: ");
724 qemu_log_vprintf(fmt, ap2);
725 qemu_log("\n");
726 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
727 qemu_log_flush();
728 qemu_log_close();
729 }
730 va_end(ap2);
731 va_end(ap);
732 #if defined(CONFIG_USER_ONLY)
733 {
734 struct sigaction act;
735 sigfillset(&act.sa_mask);
736 act.sa_handler = SIG_DFL;
737 sigaction(SIGABRT, &act, NULL);
738 }
739 #endif
740 abort();
741 }
742
743 #if !defined(CONFIG_USER_ONLY)
744 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
745 {
746 RAMBlock *block;
747
748 /* The list is protected by the iothread lock here. */
749 block = ram_list.mru_block;
750 if (block && addr - block->offset < block->length) {
751 goto found;
752 }
753 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
754 if (addr - block->offset < block->length) {
755 goto found;
756 }
757 }
758
759 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
760 abort();
761
762 found:
763 ram_list.mru_block = block;
764 return block;
765 }
766
767 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
768 {
769 ram_addr_t start1;
770 RAMBlock *block;
771 ram_addr_t end;
772
773 end = TARGET_PAGE_ALIGN(start + length);
774 start &= TARGET_PAGE_MASK;
775
776 block = qemu_get_ram_block(start);
777 assert(block == qemu_get_ram_block(end - 1));
778 start1 = (uintptr_t)block->host + (start - block->offset);
779 cpu_tlb_reset_dirty_all(start1, length);
780 }
781
782 /* Note: start and end must be within the same ram block. */
783 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
784 unsigned client)
785 {
786 if (length == 0)
787 return;
788 cpu_physical_memory_clear_dirty_range(start, length, client);
789
790 if (tcg_enabled()) {
791 tlb_reset_dirty_range_all(start, length);
792 }
793 }
794
795 static void cpu_physical_memory_set_dirty_tracking(bool enable)
796 {
797 in_migration = enable;
798 }
799
800 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
801 MemoryRegionSection *section,
802 target_ulong vaddr,
803 hwaddr paddr, hwaddr xlat,
804 int prot,
805 target_ulong *address)
806 {
807 hwaddr iotlb;
808 CPUWatchpoint *wp;
809
810 if (memory_region_is_ram(section->mr)) {
811 /* Normal RAM. */
812 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
813 + xlat;
814 if (!section->readonly) {
815 iotlb |= PHYS_SECTION_NOTDIRTY;
816 } else {
817 iotlb |= PHYS_SECTION_ROM;
818 }
819 } else {
820 iotlb = section - section->address_space->dispatch->map.sections;
821 iotlb += xlat;
822 }
823
824 /* Make accesses to pages with watchpoints go via the
825 watchpoint trap routines. */
826 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
827 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
828 /* Avoid trapping reads of pages with a write breakpoint. */
829 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
830 iotlb = PHYS_SECTION_WATCH + paddr;
831 *address |= TLB_MMIO;
832 break;
833 }
834 }
835 }
836
837 return iotlb;
838 }
839 #endif /* defined(CONFIG_USER_ONLY) */
840
841 #if !defined(CONFIG_USER_ONLY)
842
843 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
844 uint16_t section);
845 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
846
847 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
848
849 /*
850 * Set a custom physical guest memory alloator.
851 * Accelerators with unusual needs may need this. Hopefully, we can
852 * get rid of it eventually.
853 */
854 void phys_mem_set_alloc(void *(*alloc)(size_t))
855 {
856 phys_mem_alloc = alloc;
857 }
858
859 static uint16_t phys_section_add(PhysPageMap *map,
860 MemoryRegionSection *section)
861 {
862 /* The physical section number is ORed with a page-aligned
863 * pointer to produce the iotlb entries. Thus it should
864 * never overflow into the page-aligned value.
865 */
866 assert(map->sections_nb < TARGET_PAGE_SIZE);
867
868 if (map->sections_nb == map->sections_nb_alloc) {
869 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
870 map->sections = g_renew(MemoryRegionSection, map->sections,
871 map->sections_nb_alloc);
872 }
873 map->sections[map->sections_nb] = *section;
874 memory_region_ref(section->mr);
875 return map->sections_nb++;
876 }
877
878 static void phys_section_destroy(MemoryRegion *mr)
879 {
880 memory_region_unref(mr);
881
882 if (mr->subpage) {
883 subpage_t *subpage = container_of(mr, subpage_t, iomem);
884 memory_region_destroy(&subpage->iomem);
885 g_free(subpage);
886 }
887 }
888
889 static void phys_sections_free(PhysPageMap *map)
890 {
891 while (map->sections_nb > 0) {
892 MemoryRegionSection *section = &map->sections[--map->sections_nb];
893 phys_section_destroy(section->mr);
894 }
895 g_free(map->sections);
896 g_free(map->nodes);
897 }
898
899 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
900 {
901 subpage_t *subpage;
902 hwaddr base = section->offset_within_address_space
903 & TARGET_PAGE_MASK;
904 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
905 d->map.nodes, d->map.sections);
906 MemoryRegionSection subsection = {
907 .offset_within_address_space = base,
908 .size = int128_make64(TARGET_PAGE_SIZE),
909 };
910 hwaddr start, end;
911
912 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
913
914 if (!(existing->mr->subpage)) {
915 subpage = subpage_init(d->as, base);
916 subsection.address_space = d->as;
917 subsection.mr = &subpage->iomem;
918 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
919 phys_section_add(&d->map, &subsection));
920 } else {
921 subpage = container_of(existing->mr, subpage_t, iomem);
922 }
923 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
924 end = start + int128_get64(section->size) - 1;
925 subpage_register(subpage, start, end,
926 phys_section_add(&d->map, section));
927 }
928
929
930 static void register_multipage(AddressSpaceDispatch *d,
931 MemoryRegionSection *section)
932 {
933 hwaddr start_addr = section->offset_within_address_space;
934 uint16_t section_index = phys_section_add(&d->map, section);
935 uint64_t num_pages = int128_get64(int128_rshift(section->size,
936 TARGET_PAGE_BITS));
937
938 assert(num_pages);
939 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
940 }
941
942 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
943 {
944 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
945 AddressSpaceDispatch *d = as->next_dispatch;
946 MemoryRegionSection now = *section, remain = *section;
947 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
948
949 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
950 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
951 - now.offset_within_address_space;
952
953 now.size = int128_min(int128_make64(left), now.size);
954 register_subpage(d, &now);
955 } else {
956 now.size = int128_zero();
957 }
958 while (int128_ne(remain.size, now.size)) {
959 remain.size = int128_sub(remain.size, now.size);
960 remain.offset_within_address_space += int128_get64(now.size);
961 remain.offset_within_region += int128_get64(now.size);
962 now = remain;
963 if (int128_lt(remain.size, page_size)) {
964 register_subpage(d, &now);
965 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
966 now.size = page_size;
967 register_subpage(d, &now);
968 } else {
969 now.size = int128_and(now.size, int128_neg(page_size));
970 register_multipage(d, &now);
971 }
972 }
973 }
974
975 void qemu_flush_coalesced_mmio_buffer(void)
976 {
977 if (kvm_enabled())
978 kvm_flush_coalesced_mmio_buffer();
979 }
980
981 void qemu_mutex_lock_ramlist(void)
982 {
983 qemu_mutex_lock(&ram_list.mutex);
984 }
985
986 void qemu_mutex_unlock_ramlist(void)
987 {
988 qemu_mutex_unlock(&ram_list.mutex);
989 }
990
991 #ifdef __linux__
992
993 #include <sys/vfs.h>
994
995 #define HUGETLBFS_MAGIC 0x958458f6
996
997 static long gethugepagesize(const char *path)
998 {
999 struct statfs fs;
1000 int ret;
1001
1002 do {
1003 ret = statfs(path, &fs);
1004 } while (ret != 0 && errno == EINTR);
1005
1006 if (ret != 0) {
1007 perror(path);
1008 return 0;
1009 }
1010
1011 if (fs.f_type != HUGETLBFS_MAGIC)
1012 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1013
1014 return fs.f_bsize;
1015 }
1016
1017 static void *file_ram_alloc(RAMBlock *block,
1018 ram_addr_t memory,
1019 const char *path,
1020 Error **errp)
1021 {
1022 char *filename;
1023 char *sanitized_name;
1024 char *c;
1025 void *area;
1026 int fd;
1027 unsigned long hpagesize;
1028
1029 hpagesize = gethugepagesize(path);
1030 if (!hpagesize) {
1031 goto error;
1032 }
1033
1034 if (memory < hpagesize) {
1035 return NULL;
1036 }
1037
1038 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1039 error_setg(errp,
1040 "host lacks kvm mmu notifiers, -mem-path unsupported");
1041 goto error;
1042 }
1043
1044 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1045 sanitized_name = g_strdup(block->mr->name);
1046 for (c = sanitized_name; *c != '\0'; c++) {
1047 if (*c == '/')
1048 *c = '_';
1049 }
1050
1051 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1052 sanitized_name);
1053 g_free(sanitized_name);
1054
1055 fd = mkstemp(filename);
1056 if (fd < 0) {
1057 error_setg_errno(errp, errno,
1058 "unable to create backing store for hugepages");
1059 g_free(filename);
1060 goto error;
1061 }
1062 unlink(filename);
1063 g_free(filename);
1064
1065 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1066
1067 /*
1068 * ftruncate is not supported by hugetlbfs in older
1069 * hosts, so don't bother bailing out on errors.
1070 * If anything goes wrong with it under other filesystems,
1071 * mmap will fail.
1072 */
1073 if (ftruncate(fd, memory)) {
1074 perror("ftruncate");
1075 }
1076
1077 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1078 if (area == MAP_FAILED) {
1079 error_setg_errno(errp, errno,
1080 "unable to map backing store for hugepages");
1081 close(fd);
1082 goto error;
1083 }
1084
1085 if (mem_prealloc) {
1086 os_mem_prealloc(fd, area, memory);
1087 }
1088
1089 block->fd = fd;
1090 return area;
1091
1092 error:
1093 if (mem_prealloc) {
1094 exit(1);
1095 }
1096 return NULL;
1097 }
1098 #endif
1099
1100 static ram_addr_t find_ram_offset(ram_addr_t size)
1101 {
1102 RAMBlock *block, *next_block;
1103 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1104
1105 assert(size != 0); /* it would hand out same offset multiple times */
1106
1107 if (QTAILQ_EMPTY(&ram_list.blocks))
1108 return 0;
1109
1110 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1111 ram_addr_t end, next = RAM_ADDR_MAX;
1112
1113 end = block->offset + block->length;
1114
1115 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1116 if (next_block->offset >= end) {
1117 next = MIN(next, next_block->offset);
1118 }
1119 }
1120 if (next - end >= size && next - end < mingap) {
1121 offset = end;
1122 mingap = next - end;
1123 }
1124 }
1125
1126 if (offset == RAM_ADDR_MAX) {
1127 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1128 (uint64_t)size);
1129 abort();
1130 }
1131
1132 return offset;
1133 }
1134
1135 ram_addr_t last_ram_offset(void)
1136 {
1137 RAMBlock *block;
1138 ram_addr_t last = 0;
1139
1140 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1141 last = MAX(last, block->offset + block->length);
1142
1143 return last;
1144 }
1145
1146 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1147 {
1148 int ret;
1149
1150 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1151 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1152 "dump-guest-core", true)) {
1153 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1154 if (ret) {
1155 perror("qemu_madvise");
1156 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1157 "but dump_guest_core=off specified\n");
1158 }
1159 }
1160 }
1161
1162 static RAMBlock *find_ram_block(ram_addr_t addr)
1163 {
1164 RAMBlock *block;
1165
1166 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1167 if (block->offset == addr) {
1168 return block;
1169 }
1170 }
1171
1172 return NULL;
1173 }
1174
1175 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1176 {
1177 RAMBlock *new_block = find_ram_block(addr);
1178 RAMBlock *block;
1179
1180 assert(new_block);
1181 assert(!new_block->idstr[0]);
1182
1183 if (dev) {
1184 char *id = qdev_get_dev_path(dev);
1185 if (id) {
1186 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1187 g_free(id);
1188 }
1189 }
1190 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1191
1192 /* This assumes the iothread lock is taken here too. */
1193 qemu_mutex_lock_ramlist();
1194 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1195 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1196 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1197 new_block->idstr);
1198 abort();
1199 }
1200 }
1201 qemu_mutex_unlock_ramlist();
1202 }
1203
1204 void qemu_ram_unset_idstr(ram_addr_t addr)
1205 {
1206 RAMBlock *block = find_ram_block(addr);
1207
1208 if (block) {
1209 memset(block->idstr, 0, sizeof(block->idstr));
1210 }
1211 }
1212
1213 static int memory_try_enable_merging(void *addr, size_t len)
1214 {
1215 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1216 /* disabled by the user */
1217 return 0;
1218 }
1219
1220 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1221 }
1222
1223 static ram_addr_t ram_block_add(RAMBlock *new_block)
1224 {
1225 RAMBlock *block;
1226 ram_addr_t old_ram_size, new_ram_size;
1227
1228 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1229
1230 /* This assumes the iothread lock is taken here too. */
1231 qemu_mutex_lock_ramlist();
1232 new_block->offset = find_ram_offset(new_block->length);
1233
1234 if (!new_block->host) {
1235 if (xen_enabled()) {
1236 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1237 } else {
1238 new_block->host = phys_mem_alloc(new_block->length);
1239 if (!new_block->host) {
1240 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1241 new_block->mr->name, strerror(errno));
1242 exit(1);
1243 }
1244 memory_try_enable_merging(new_block->host, new_block->length);
1245 }
1246 }
1247
1248 /* Keep the list sorted from biggest to smallest block. */
1249 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1250 if (block->length < new_block->length) {
1251 break;
1252 }
1253 }
1254 if (block) {
1255 QTAILQ_INSERT_BEFORE(block, new_block, next);
1256 } else {
1257 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1258 }
1259 ram_list.mru_block = NULL;
1260
1261 ram_list.version++;
1262 qemu_mutex_unlock_ramlist();
1263
1264 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1265
1266 if (new_ram_size > old_ram_size) {
1267 int i;
1268 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1269 ram_list.dirty_memory[i] =
1270 bitmap_zero_extend(ram_list.dirty_memory[i],
1271 old_ram_size, new_ram_size);
1272 }
1273 }
1274 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1275
1276 qemu_ram_setup_dump(new_block->host, new_block->length);
1277 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1278 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1279
1280 if (kvm_enabled()) {
1281 kvm_setup_guest_memory(new_block->host, new_block->length);
1282 }
1283
1284 return new_block->offset;
1285 }
1286
1287 #ifdef __linux__
1288 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1289 const char *mem_path,
1290 Error **errp)
1291 {
1292 RAMBlock *new_block;
1293
1294 if (xen_enabled()) {
1295 error_setg(errp, "-mem-path not supported with Xen");
1296 return -1;
1297 }
1298
1299 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1300 /*
1301 * file_ram_alloc() needs to allocate just like
1302 * phys_mem_alloc, but we haven't bothered to provide
1303 * a hook there.
1304 */
1305 error_setg(errp,
1306 "-mem-path not supported with this accelerator");
1307 return -1;
1308 }
1309
1310 size = TARGET_PAGE_ALIGN(size);
1311 new_block = g_malloc0(sizeof(*new_block));
1312 new_block->mr = mr;
1313 new_block->length = size;
1314 new_block->host = file_ram_alloc(new_block, size,
1315 mem_path, errp);
1316 if (!new_block->host) {
1317 g_free(new_block);
1318 return -1;
1319 }
1320
1321 return ram_block_add(new_block);
1322 }
1323 #endif
1324
1325 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1326 MemoryRegion *mr)
1327 {
1328 RAMBlock *new_block;
1329
1330 size = TARGET_PAGE_ALIGN(size);
1331 new_block = g_malloc0(sizeof(*new_block));
1332 new_block->mr = mr;
1333 new_block->length = size;
1334 new_block->fd = -1;
1335 new_block->host = host;
1336 if (host) {
1337 new_block->flags |= RAM_PREALLOC;
1338 }
1339 return ram_block_add(new_block);
1340 }
1341
1342 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1343 {
1344 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1345 }
1346
1347 void qemu_ram_free_from_ptr(ram_addr_t addr)
1348 {
1349 RAMBlock *block;
1350
1351 /* This assumes the iothread lock is taken here too. */
1352 qemu_mutex_lock_ramlist();
1353 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1354 if (addr == block->offset) {
1355 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1356 ram_list.mru_block = NULL;
1357 ram_list.version++;
1358 g_free(block);
1359 break;
1360 }
1361 }
1362 qemu_mutex_unlock_ramlist();
1363 }
1364
1365 void qemu_ram_free(ram_addr_t addr)
1366 {
1367 RAMBlock *block;
1368
1369 /* This assumes the iothread lock is taken here too. */
1370 qemu_mutex_lock_ramlist();
1371 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1372 if (addr == block->offset) {
1373 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1374 ram_list.mru_block = NULL;
1375 ram_list.version++;
1376 if (block->flags & RAM_PREALLOC) {
1377 ;
1378 } else if (xen_enabled()) {
1379 xen_invalidate_map_cache_entry(block->host);
1380 #ifndef _WIN32
1381 } else if (block->fd >= 0) {
1382 munmap(block->host, block->length);
1383 close(block->fd);
1384 #endif
1385 } else {
1386 qemu_anon_ram_free(block->host, block->length);
1387 }
1388 g_free(block);
1389 break;
1390 }
1391 }
1392 qemu_mutex_unlock_ramlist();
1393
1394 }
1395
1396 #ifndef _WIN32
1397 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1398 {
1399 RAMBlock *block;
1400 ram_addr_t offset;
1401 int flags;
1402 void *area, *vaddr;
1403
1404 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1405 offset = addr - block->offset;
1406 if (offset < block->length) {
1407 vaddr = block->host + offset;
1408 if (block->flags & RAM_PREALLOC) {
1409 ;
1410 } else if (xen_enabled()) {
1411 abort();
1412 } else {
1413 flags = MAP_FIXED;
1414 munmap(vaddr, length);
1415 if (block->fd >= 0) {
1416 #ifdef MAP_POPULATE
1417 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1418 MAP_PRIVATE;
1419 #else
1420 flags |= MAP_PRIVATE;
1421 #endif
1422 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1423 flags, block->fd, offset);
1424 } else {
1425 /*
1426 * Remap needs to match alloc. Accelerators that
1427 * set phys_mem_alloc never remap. If they did,
1428 * we'd need a remap hook here.
1429 */
1430 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1431
1432 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1433 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1434 flags, -1, 0);
1435 }
1436 if (area != vaddr) {
1437 fprintf(stderr, "Could not remap addr: "
1438 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1439 length, addr);
1440 exit(1);
1441 }
1442 memory_try_enable_merging(vaddr, length);
1443 qemu_ram_setup_dump(vaddr, length);
1444 }
1445 return;
1446 }
1447 }
1448 }
1449 #endif /* !_WIN32 */
1450
1451 int qemu_get_ram_fd(ram_addr_t addr)
1452 {
1453 RAMBlock *block = qemu_get_ram_block(addr);
1454
1455 return block->fd;
1456 }
1457
1458 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1459 With the exception of the softmmu code in this file, this should
1460 only be used for local memory (e.g. video ram) that the device owns,
1461 and knows it isn't going to access beyond the end of the block.
1462
1463 It should not be used for general purpose DMA.
1464 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1465 */
1466 void *qemu_get_ram_ptr(ram_addr_t addr)
1467 {
1468 RAMBlock *block = qemu_get_ram_block(addr);
1469
1470 if (xen_enabled()) {
1471 /* We need to check if the requested address is in the RAM
1472 * because we don't want to map the entire memory in QEMU.
1473 * In that case just map until the end of the page.
1474 */
1475 if (block->offset == 0) {
1476 return xen_map_cache(addr, 0, 0);
1477 } else if (block->host == NULL) {
1478 block->host =
1479 xen_map_cache(block->offset, block->length, 1);
1480 }
1481 }
1482 return block->host + (addr - block->offset);
1483 }
1484
1485 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1486 * but takes a size argument */
1487 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1488 {
1489 if (*size == 0) {
1490 return NULL;
1491 }
1492 if (xen_enabled()) {
1493 return xen_map_cache(addr, *size, 1);
1494 } else {
1495 RAMBlock *block;
1496
1497 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1498 if (addr - block->offset < block->length) {
1499 if (addr - block->offset + *size > block->length)
1500 *size = block->length - addr + block->offset;
1501 return block->host + (addr - block->offset);
1502 }
1503 }
1504
1505 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1506 abort();
1507 }
1508 }
1509
1510 /* Some of the softmmu routines need to translate from a host pointer
1511 (typically a TLB entry) back to a ram offset. */
1512 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1513 {
1514 RAMBlock *block;
1515 uint8_t *host = ptr;
1516
1517 if (xen_enabled()) {
1518 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1519 return qemu_get_ram_block(*ram_addr)->mr;
1520 }
1521
1522 block = ram_list.mru_block;
1523 if (block && block->host && host - block->host < block->length) {
1524 goto found;
1525 }
1526
1527 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1528 /* This case append when the block is not mapped. */
1529 if (block->host == NULL) {
1530 continue;
1531 }
1532 if (host - block->host < block->length) {
1533 goto found;
1534 }
1535 }
1536
1537 return NULL;
1538
1539 found:
1540 *ram_addr = block->offset + (host - block->host);
1541 return block->mr;
1542 }
1543
1544 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1545 uint64_t val, unsigned size)
1546 {
1547 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1548 tb_invalidate_phys_page_fast(ram_addr, size);
1549 }
1550 switch (size) {
1551 case 1:
1552 stb_p(qemu_get_ram_ptr(ram_addr), val);
1553 break;
1554 case 2:
1555 stw_p(qemu_get_ram_ptr(ram_addr), val);
1556 break;
1557 case 4:
1558 stl_p(qemu_get_ram_ptr(ram_addr), val);
1559 break;
1560 default:
1561 abort();
1562 }
1563 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1564 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1565 /* we remove the notdirty callback only if the code has been
1566 flushed */
1567 if (!cpu_physical_memory_is_clean(ram_addr)) {
1568 CPUArchState *env = current_cpu->env_ptr;
1569 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1570 }
1571 }
1572
1573 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1574 unsigned size, bool is_write)
1575 {
1576 return is_write;
1577 }
1578
1579 static const MemoryRegionOps notdirty_mem_ops = {
1580 .write = notdirty_mem_write,
1581 .valid.accepts = notdirty_mem_accepts,
1582 .endianness = DEVICE_NATIVE_ENDIAN,
1583 };
1584
1585 /* Generate a debug exception if a watchpoint has been hit. */
1586 static void check_watchpoint(int offset, int len_mask, int flags)
1587 {
1588 CPUState *cpu = current_cpu;
1589 CPUArchState *env = cpu->env_ptr;
1590 target_ulong pc, cs_base;
1591 target_ulong vaddr;
1592 CPUWatchpoint *wp;
1593 int cpu_flags;
1594
1595 if (cpu->watchpoint_hit) {
1596 /* We re-entered the check after replacing the TB. Now raise
1597 * the debug interrupt so that is will trigger after the
1598 * current instruction. */
1599 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1600 return;
1601 }
1602 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1603 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1604 if ((vaddr == (wp->vaddr & len_mask) ||
1605 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1606 wp->flags |= BP_WATCHPOINT_HIT;
1607 if (!cpu->watchpoint_hit) {
1608 cpu->watchpoint_hit = wp;
1609 tb_check_watchpoint(cpu);
1610 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1611 cpu->exception_index = EXCP_DEBUG;
1612 cpu_loop_exit(cpu);
1613 } else {
1614 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1615 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1616 cpu_resume_from_signal(cpu, NULL);
1617 }
1618 }
1619 } else {
1620 wp->flags &= ~BP_WATCHPOINT_HIT;
1621 }
1622 }
1623 }
1624
1625 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1626 so these check for a hit then pass through to the normal out-of-line
1627 phys routines. */
1628 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1629 unsigned size)
1630 {
1631 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1632 switch (size) {
1633 case 1: return ldub_phys(&address_space_memory, addr);
1634 case 2: return lduw_phys(&address_space_memory, addr);
1635 case 4: return ldl_phys(&address_space_memory, addr);
1636 default: abort();
1637 }
1638 }
1639
1640 static void watch_mem_write(void *opaque, hwaddr addr,
1641 uint64_t val, unsigned size)
1642 {
1643 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1644 switch (size) {
1645 case 1:
1646 stb_phys(&address_space_memory, addr, val);
1647 break;
1648 case 2:
1649 stw_phys(&address_space_memory, addr, val);
1650 break;
1651 case 4:
1652 stl_phys(&address_space_memory, addr, val);
1653 break;
1654 default: abort();
1655 }
1656 }
1657
1658 static const MemoryRegionOps watch_mem_ops = {
1659 .read = watch_mem_read,
1660 .write = watch_mem_write,
1661 .endianness = DEVICE_NATIVE_ENDIAN,
1662 };
1663
1664 static uint64_t subpage_read(void *opaque, hwaddr addr,
1665 unsigned len)
1666 {
1667 subpage_t *subpage = opaque;
1668 uint8_t buf[4];
1669
1670 #if defined(DEBUG_SUBPAGE)
1671 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1672 subpage, len, addr);
1673 #endif
1674 address_space_read(subpage->as, addr + subpage->base, buf, len);
1675 switch (len) {
1676 case 1:
1677 return ldub_p(buf);
1678 case 2:
1679 return lduw_p(buf);
1680 case 4:
1681 return ldl_p(buf);
1682 default:
1683 abort();
1684 }
1685 }
1686
1687 static void subpage_write(void *opaque, hwaddr addr,
1688 uint64_t value, unsigned len)
1689 {
1690 subpage_t *subpage = opaque;
1691 uint8_t buf[4];
1692
1693 #if defined(DEBUG_SUBPAGE)
1694 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1695 " value %"PRIx64"\n",
1696 __func__, subpage, len, addr, value);
1697 #endif
1698 switch (len) {
1699 case 1:
1700 stb_p(buf, value);
1701 break;
1702 case 2:
1703 stw_p(buf, value);
1704 break;
1705 case 4:
1706 stl_p(buf, value);
1707 break;
1708 default:
1709 abort();
1710 }
1711 address_space_write(subpage->as, addr + subpage->base, buf, len);
1712 }
1713
1714 static bool subpage_accepts(void *opaque, hwaddr addr,
1715 unsigned len, bool is_write)
1716 {
1717 subpage_t *subpage = opaque;
1718 #if defined(DEBUG_SUBPAGE)
1719 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1720 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1721 #endif
1722
1723 return address_space_access_valid(subpage->as, addr + subpage->base,
1724 len, is_write);
1725 }
1726
1727 static const MemoryRegionOps subpage_ops = {
1728 .read = subpage_read,
1729 .write = subpage_write,
1730 .valid.accepts = subpage_accepts,
1731 .endianness = DEVICE_NATIVE_ENDIAN,
1732 };
1733
1734 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1735 uint16_t section)
1736 {
1737 int idx, eidx;
1738
1739 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1740 return -1;
1741 idx = SUBPAGE_IDX(start);
1742 eidx = SUBPAGE_IDX(end);
1743 #if defined(DEBUG_SUBPAGE)
1744 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1745 __func__, mmio, start, end, idx, eidx, section);
1746 #endif
1747 for (; idx <= eidx; idx++) {
1748 mmio->sub_section[idx] = section;
1749 }
1750
1751 return 0;
1752 }
1753
1754 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1755 {
1756 subpage_t *mmio;
1757
1758 mmio = g_malloc0(sizeof(subpage_t));
1759
1760 mmio->as = as;
1761 mmio->base = base;
1762 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1763 "subpage", TARGET_PAGE_SIZE);
1764 mmio->iomem.subpage = true;
1765 #if defined(DEBUG_SUBPAGE)
1766 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1767 mmio, base, TARGET_PAGE_SIZE);
1768 #endif
1769 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1770
1771 return mmio;
1772 }
1773
1774 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1775 MemoryRegion *mr)
1776 {
1777 assert(as);
1778 MemoryRegionSection section = {
1779 .address_space = as,
1780 .mr = mr,
1781 .offset_within_address_space = 0,
1782 .offset_within_region = 0,
1783 .size = int128_2_64(),
1784 };
1785
1786 return phys_section_add(map, &section);
1787 }
1788
1789 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1790 {
1791 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1792 }
1793
1794 static void io_mem_init(void)
1795 {
1796 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1797 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1798 "unassigned", UINT64_MAX);
1799 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1800 "notdirty", UINT64_MAX);
1801 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1802 "watch", UINT64_MAX);
1803 }
1804
1805 static void mem_begin(MemoryListener *listener)
1806 {
1807 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1808 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1809 uint16_t n;
1810
1811 n = dummy_section(&d->map, as, &io_mem_unassigned);
1812 assert(n == PHYS_SECTION_UNASSIGNED);
1813 n = dummy_section(&d->map, as, &io_mem_notdirty);
1814 assert(n == PHYS_SECTION_NOTDIRTY);
1815 n = dummy_section(&d->map, as, &io_mem_rom);
1816 assert(n == PHYS_SECTION_ROM);
1817 n = dummy_section(&d->map, as, &io_mem_watch);
1818 assert(n == PHYS_SECTION_WATCH);
1819
1820 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1821 d->as = as;
1822 as->next_dispatch = d;
1823 }
1824
1825 static void mem_commit(MemoryListener *listener)
1826 {
1827 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1828 AddressSpaceDispatch *cur = as->dispatch;
1829 AddressSpaceDispatch *next = as->next_dispatch;
1830
1831 phys_page_compact_all(next, next->map.nodes_nb);
1832
1833 as->dispatch = next;
1834
1835 if (cur) {
1836 phys_sections_free(&cur->map);
1837 g_free(cur);
1838 }
1839 }
1840
1841 static void tcg_commit(MemoryListener *listener)
1842 {
1843 CPUState *cpu;
1844
1845 /* since each CPU stores ram addresses in its TLB cache, we must
1846 reset the modified entries */
1847 /* XXX: slow ! */
1848 CPU_FOREACH(cpu) {
1849 /* FIXME: Disentangle the cpu.h circular files deps so we can
1850 directly get the right CPU from listener. */
1851 if (cpu->tcg_as_listener != listener) {
1852 continue;
1853 }
1854 tlb_flush(cpu, 1);
1855 }
1856 }
1857
1858 static void core_log_global_start(MemoryListener *listener)
1859 {
1860 cpu_physical_memory_set_dirty_tracking(true);
1861 }
1862
1863 static void core_log_global_stop(MemoryListener *listener)
1864 {
1865 cpu_physical_memory_set_dirty_tracking(false);
1866 }
1867
1868 static MemoryListener core_memory_listener = {
1869 .log_global_start = core_log_global_start,
1870 .log_global_stop = core_log_global_stop,
1871 .priority = 1,
1872 };
1873
1874 void address_space_init_dispatch(AddressSpace *as)
1875 {
1876 as->dispatch = NULL;
1877 as->dispatch_listener = (MemoryListener) {
1878 .begin = mem_begin,
1879 .commit = mem_commit,
1880 .region_add = mem_add,
1881 .region_nop = mem_add,
1882 .priority = 0,
1883 };
1884 memory_listener_register(&as->dispatch_listener, as);
1885 }
1886
1887 void address_space_destroy_dispatch(AddressSpace *as)
1888 {
1889 AddressSpaceDispatch *d = as->dispatch;
1890
1891 memory_listener_unregister(&as->dispatch_listener);
1892 g_free(d);
1893 as->dispatch = NULL;
1894 }
1895
1896 static void memory_map_init(void)
1897 {
1898 system_memory = g_malloc(sizeof(*system_memory));
1899
1900 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1901 address_space_init(&address_space_memory, system_memory, "memory");
1902
1903 system_io = g_malloc(sizeof(*system_io));
1904 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1905 65536);
1906 address_space_init(&address_space_io, system_io, "I/O");
1907
1908 memory_listener_register(&core_memory_listener, &address_space_memory);
1909 }
1910
1911 MemoryRegion *get_system_memory(void)
1912 {
1913 return system_memory;
1914 }
1915
1916 MemoryRegion *get_system_io(void)
1917 {
1918 return system_io;
1919 }
1920
1921 #endif /* !defined(CONFIG_USER_ONLY) */
1922
1923 /* physical memory access (slow version, mainly for debug) */
1924 #if defined(CONFIG_USER_ONLY)
1925 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1926 uint8_t *buf, int len, int is_write)
1927 {
1928 int l, flags;
1929 target_ulong page;
1930 void * p;
1931
1932 while (len > 0) {
1933 page = addr & TARGET_PAGE_MASK;
1934 l = (page + TARGET_PAGE_SIZE) - addr;
1935 if (l > len)
1936 l = len;
1937 flags = page_get_flags(page);
1938 if (!(flags & PAGE_VALID))
1939 return -1;
1940 if (is_write) {
1941 if (!(flags & PAGE_WRITE))
1942 return -1;
1943 /* XXX: this code should not depend on lock_user */
1944 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1945 return -1;
1946 memcpy(p, buf, l);
1947 unlock_user(p, addr, l);
1948 } else {
1949 if (!(flags & PAGE_READ))
1950 return -1;
1951 /* XXX: this code should not depend on lock_user */
1952 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1953 return -1;
1954 memcpy(buf, p, l);
1955 unlock_user(p, addr, 0);
1956 }
1957 len -= l;
1958 buf += l;
1959 addr += l;
1960 }
1961 return 0;
1962 }
1963
1964 #else
1965
1966 static void invalidate_and_set_dirty(hwaddr addr,
1967 hwaddr length)
1968 {
1969 if (cpu_physical_memory_is_clean(addr)) {
1970 /* invalidate code */
1971 tb_invalidate_phys_page_range(addr, addr + length, 0);
1972 /* set dirty bit */
1973 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1974 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1975 }
1976 xen_modified_memory(addr, length);
1977 }
1978
1979 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1980 {
1981 unsigned access_size_max = mr->ops->valid.max_access_size;
1982
1983 /* Regions are assumed to support 1-4 byte accesses unless
1984 otherwise specified. */
1985 if (access_size_max == 0) {
1986 access_size_max = 4;
1987 }
1988
1989 /* Bound the maximum access by the alignment of the address. */
1990 if (!mr->ops->impl.unaligned) {
1991 unsigned align_size_max = addr & -addr;
1992 if (align_size_max != 0 && align_size_max < access_size_max) {
1993 access_size_max = align_size_max;
1994 }
1995 }
1996
1997 /* Don't attempt accesses larger than the maximum. */
1998 if (l > access_size_max) {
1999 l = access_size_max;
2000 }
2001 if (l & (l - 1)) {
2002 l = 1 << (qemu_fls(l) - 1);
2003 }
2004
2005 return l;
2006 }
2007
2008 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2009 int len, bool is_write)
2010 {
2011 hwaddr l;
2012 uint8_t *ptr;
2013 uint64_t val;
2014 hwaddr addr1;
2015 MemoryRegion *mr;
2016 bool error = false;
2017
2018 while (len > 0) {
2019 l = len;
2020 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2021
2022 if (is_write) {
2023 if (!memory_access_is_direct(mr, is_write)) {
2024 l = memory_access_size(mr, l, addr1);
2025 /* XXX: could force current_cpu to NULL to avoid
2026 potential bugs */
2027 switch (l) {
2028 case 8:
2029 /* 64 bit write access */
2030 val = ldq_p(buf);
2031 error |= io_mem_write(mr, addr1, val, 8);
2032 break;
2033 case 4:
2034 /* 32 bit write access */
2035 val = ldl_p(buf);
2036 error |= io_mem_write(mr, addr1, val, 4);
2037 break;
2038 case 2:
2039 /* 16 bit write access */
2040 val = lduw_p(buf);
2041 error |= io_mem_write(mr, addr1, val, 2);
2042 break;
2043 case 1:
2044 /* 8 bit write access */
2045 val = ldub_p(buf);
2046 error |= io_mem_write(mr, addr1, val, 1);
2047 break;
2048 default:
2049 abort();
2050 }
2051 } else {
2052 addr1 += memory_region_get_ram_addr(mr);
2053 /* RAM case */
2054 ptr = qemu_get_ram_ptr(addr1);
2055 memcpy(ptr, buf, l);
2056 invalidate_and_set_dirty(addr1, l);
2057 }
2058 } else {
2059 if (!memory_access_is_direct(mr, is_write)) {
2060 /* I/O case */
2061 l = memory_access_size(mr, l, addr1);
2062 switch (l) {
2063 case 8:
2064 /* 64 bit read access */
2065 error |= io_mem_read(mr, addr1, &val, 8);
2066 stq_p(buf, val);
2067 break;
2068 case 4:
2069 /* 32 bit read access */
2070 error |= io_mem_read(mr, addr1, &val, 4);
2071 stl_p(buf, val);
2072 break;
2073 case 2:
2074 /* 16 bit read access */
2075 error |= io_mem_read(mr, addr1, &val, 2);
2076 stw_p(buf, val);
2077 break;
2078 case 1:
2079 /* 8 bit read access */
2080 error |= io_mem_read(mr, addr1, &val, 1);
2081 stb_p(buf, val);
2082 break;
2083 default:
2084 abort();
2085 }
2086 } else {
2087 /* RAM case */
2088 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2089 memcpy(buf, ptr, l);
2090 }
2091 }
2092 len -= l;
2093 buf += l;
2094 addr += l;
2095 }
2096
2097 return error;
2098 }
2099
2100 bool address_space_write(AddressSpace *as, hwaddr addr,
2101 const uint8_t *buf, int len)
2102 {
2103 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2104 }
2105
2106 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2107 {
2108 return address_space_rw(as, addr, buf, len, false);
2109 }
2110
2111
2112 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2113 int len, int is_write)
2114 {
2115 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2116 }
2117
2118 enum write_rom_type {
2119 WRITE_DATA,
2120 FLUSH_CACHE,
2121 };
2122
2123 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2124 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2125 {
2126 hwaddr l;
2127 uint8_t *ptr;
2128 hwaddr addr1;
2129 MemoryRegion *mr;
2130
2131 while (len > 0) {
2132 l = len;
2133 mr = address_space_translate(as, addr, &addr1, &l, true);
2134
2135 if (!(memory_region_is_ram(mr) ||
2136 memory_region_is_romd(mr))) {
2137 /* do nothing */
2138 } else {
2139 addr1 += memory_region_get_ram_addr(mr);
2140 /* ROM/RAM case */
2141 ptr = qemu_get_ram_ptr(addr1);
2142 switch (type) {
2143 case WRITE_DATA:
2144 memcpy(ptr, buf, l);
2145 invalidate_and_set_dirty(addr1, l);
2146 break;
2147 case FLUSH_CACHE:
2148 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2149 break;
2150 }
2151 }
2152 len -= l;
2153 buf += l;
2154 addr += l;
2155 }
2156 }
2157
2158 /* used for ROM loading : can write in RAM and ROM */
2159 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2160 const uint8_t *buf, int len)
2161 {
2162 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2163 }
2164
2165 void cpu_flush_icache_range(hwaddr start, int len)
2166 {
2167 /*
2168 * This function should do the same thing as an icache flush that was
2169 * triggered from within the guest. For TCG we are always cache coherent,
2170 * so there is no need to flush anything. For KVM / Xen we need to flush
2171 * the host's instruction cache at least.
2172 */
2173 if (tcg_enabled()) {
2174 return;
2175 }
2176
2177 cpu_physical_memory_write_rom_internal(&address_space_memory,
2178 start, NULL, len, FLUSH_CACHE);
2179 }
2180
2181 typedef struct {
2182 MemoryRegion *mr;
2183 void *buffer;
2184 hwaddr addr;
2185 hwaddr len;
2186 } BounceBuffer;
2187
2188 static BounceBuffer bounce;
2189
2190 typedef struct MapClient {
2191 void *opaque;
2192 void (*callback)(void *opaque);
2193 QLIST_ENTRY(MapClient) link;
2194 } MapClient;
2195
2196 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2197 = QLIST_HEAD_INITIALIZER(map_client_list);
2198
2199 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2200 {
2201 MapClient *client = g_malloc(sizeof(*client));
2202
2203 client->opaque = opaque;
2204 client->callback = callback;
2205 QLIST_INSERT_HEAD(&map_client_list, client, link);
2206 return client;
2207 }
2208
2209 static void cpu_unregister_map_client(void *_client)
2210 {
2211 MapClient *client = (MapClient *)_client;
2212
2213 QLIST_REMOVE(client, link);
2214 g_free(client);
2215 }
2216
2217 static void cpu_notify_map_clients(void)
2218 {
2219 MapClient *client;
2220
2221 while (!QLIST_EMPTY(&map_client_list)) {
2222 client = QLIST_FIRST(&map_client_list);
2223 client->callback(client->opaque);
2224 cpu_unregister_map_client(client);
2225 }
2226 }
2227
2228 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2229 {
2230 MemoryRegion *mr;
2231 hwaddr l, xlat;
2232
2233 while (len > 0) {
2234 l = len;
2235 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2236 if (!memory_access_is_direct(mr, is_write)) {
2237 l = memory_access_size(mr, l, addr);
2238 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2239 return false;
2240 }
2241 }
2242
2243 len -= l;
2244 addr += l;
2245 }
2246 return true;
2247 }
2248
2249 /* Map a physical memory region into a host virtual address.
2250 * May map a subset of the requested range, given by and returned in *plen.
2251 * May return NULL if resources needed to perform the mapping are exhausted.
2252 * Use only for reads OR writes - not for read-modify-write operations.
2253 * Use cpu_register_map_client() to know when retrying the map operation is
2254 * likely to succeed.
2255 */
2256 void *address_space_map(AddressSpace *as,
2257 hwaddr addr,
2258 hwaddr *plen,
2259 bool is_write)
2260 {
2261 hwaddr len = *plen;
2262 hwaddr done = 0;
2263 hwaddr l, xlat, base;
2264 MemoryRegion *mr, *this_mr;
2265 ram_addr_t raddr;
2266
2267 if (len == 0) {
2268 return NULL;
2269 }
2270
2271 l = len;
2272 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2273 if (!memory_access_is_direct(mr, is_write)) {
2274 if (bounce.buffer) {
2275 return NULL;
2276 }
2277 /* Avoid unbounded allocations */
2278 l = MIN(l, TARGET_PAGE_SIZE);
2279 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2280 bounce.addr = addr;
2281 bounce.len = l;
2282
2283 memory_region_ref(mr);
2284 bounce.mr = mr;
2285 if (!is_write) {
2286 address_space_read(as, addr, bounce.buffer, l);
2287 }
2288
2289 *plen = l;
2290 return bounce.buffer;
2291 }
2292
2293 base = xlat;
2294 raddr = memory_region_get_ram_addr(mr);
2295
2296 for (;;) {
2297 len -= l;
2298 addr += l;
2299 done += l;
2300 if (len == 0) {
2301 break;
2302 }
2303
2304 l = len;
2305 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2306 if (this_mr != mr || xlat != base + done) {
2307 break;
2308 }
2309 }
2310
2311 memory_region_ref(mr);
2312 *plen = done;
2313 return qemu_ram_ptr_length(raddr + base, plen);
2314 }
2315
2316 /* Unmaps a memory region previously mapped by address_space_map().
2317 * Will also mark the memory as dirty if is_write == 1. access_len gives
2318 * the amount of memory that was actually read or written by the caller.
2319 */
2320 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2321 int is_write, hwaddr access_len)
2322 {
2323 if (buffer != bounce.buffer) {
2324 MemoryRegion *mr;
2325 ram_addr_t addr1;
2326
2327 mr = qemu_ram_addr_from_host(buffer, &addr1);
2328 assert(mr != NULL);
2329 if (is_write) {
2330 while (access_len) {
2331 unsigned l;
2332 l = TARGET_PAGE_SIZE;
2333 if (l > access_len)
2334 l = access_len;
2335 invalidate_and_set_dirty(addr1, l);
2336 addr1 += l;
2337 access_len -= l;
2338 }
2339 }
2340 if (xen_enabled()) {
2341 xen_invalidate_map_cache_entry(buffer);
2342 }
2343 memory_region_unref(mr);
2344 return;
2345 }
2346 if (is_write) {
2347 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2348 }
2349 qemu_vfree(bounce.buffer);
2350 bounce.buffer = NULL;
2351 memory_region_unref(bounce.mr);
2352 cpu_notify_map_clients();
2353 }
2354
2355 void *cpu_physical_memory_map(hwaddr addr,
2356 hwaddr *plen,
2357 int is_write)
2358 {
2359 return address_space_map(&address_space_memory, addr, plen, is_write);
2360 }
2361
2362 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2363 int is_write, hwaddr access_len)
2364 {
2365 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2366 }
2367
2368 /* warning: addr must be aligned */
2369 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2370 enum device_endian endian)
2371 {
2372 uint8_t *ptr;
2373 uint64_t val;
2374 MemoryRegion *mr;
2375 hwaddr l = 4;
2376 hwaddr addr1;
2377
2378 mr = address_space_translate(as, addr, &addr1, &l, false);
2379 if (l < 4 || !memory_access_is_direct(mr, false)) {
2380 /* I/O case */
2381 io_mem_read(mr, addr1, &val, 4);
2382 #if defined(TARGET_WORDS_BIGENDIAN)
2383 if (endian == DEVICE_LITTLE_ENDIAN) {
2384 val = bswap32(val);
2385 }
2386 #else
2387 if (endian == DEVICE_BIG_ENDIAN) {
2388 val = bswap32(val);
2389 }
2390 #endif
2391 } else {
2392 /* RAM case */
2393 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2394 & TARGET_PAGE_MASK)
2395 + addr1);
2396 switch (endian) {
2397 case DEVICE_LITTLE_ENDIAN:
2398 val = ldl_le_p(ptr);
2399 break;
2400 case DEVICE_BIG_ENDIAN:
2401 val = ldl_be_p(ptr);
2402 break;
2403 default:
2404 val = ldl_p(ptr);
2405 break;
2406 }
2407 }
2408 return val;
2409 }
2410
2411 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2412 {
2413 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2414 }
2415
2416 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2417 {
2418 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2419 }
2420
2421 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2422 {
2423 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2424 }
2425
2426 /* warning: addr must be aligned */
2427 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2428 enum device_endian endian)
2429 {
2430 uint8_t *ptr;
2431 uint64_t val;
2432 MemoryRegion *mr;
2433 hwaddr l = 8;
2434 hwaddr addr1;
2435
2436 mr = address_space_translate(as, addr, &addr1, &l,
2437 false);
2438 if (l < 8 || !memory_access_is_direct(mr, false)) {
2439 /* I/O case */
2440 io_mem_read(mr, addr1, &val, 8);
2441 #if defined(TARGET_WORDS_BIGENDIAN)
2442 if (endian == DEVICE_LITTLE_ENDIAN) {
2443 val = bswap64(val);
2444 }
2445 #else
2446 if (endian == DEVICE_BIG_ENDIAN) {
2447 val = bswap64(val);
2448 }
2449 #endif
2450 } else {
2451 /* RAM case */
2452 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2453 & TARGET_PAGE_MASK)
2454 + addr1);
2455 switch (endian) {
2456 case DEVICE_LITTLE_ENDIAN:
2457 val = ldq_le_p(ptr);
2458 break;
2459 case DEVICE_BIG_ENDIAN:
2460 val = ldq_be_p(ptr);
2461 break;
2462 default:
2463 val = ldq_p(ptr);
2464 break;
2465 }
2466 }
2467 return val;
2468 }
2469
2470 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2471 {
2472 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2473 }
2474
2475 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2476 {
2477 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2478 }
2479
2480 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2481 {
2482 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2483 }
2484
2485 /* XXX: optimize */
2486 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2487 {
2488 uint8_t val;
2489 address_space_rw(as, addr, &val, 1, 0);
2490 return val;
2491 }
2492
2493 /* warning: addr must be aligned */
2494 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2495 enum device_endian endian)
2496 {
2497 uint8_t *ptr;
2498 uint64_t val;
2499 MemoryRegion *mr;
2500 hwaddr l = 2;
2501 hwaddr addr1;
2502
2503 mr = address_space_translate(as, addr, &addr1, &l,
2504 false);
2505 if (l < 2 || !memory_access_is_direct(mr, false)) {
2506 /* I/O case */
2507 io_mem_read(mr, addr1, &val, 2);
2508 #if defined(TARGET_WORDS_BIGENDIAN)
2509 if (endian == DEVICE_LITTLE_ENDIAN) {
2510 val = bswap16(val);
2511 }
2512 #else
2513 if (endian == DEVICE_BIG_ENDIAN) {
2514 val = bswap16(val);
2515 }
2516 #endif
2517 } else {
2518 /* RAM case */
2519 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2520 & TARGET_PAGE_MASK)
2521 + addr1);
2522 switch (endian) {
2523 case DEVICE_LITTLE_ENDIAN:
2524 val = lduw_le_p(ptr);
2525 break;
2526 case DEVICE_BIG_ENDIAN:
2527 val = lduw_be_p(ptr);
2528 break;
2529 default:
2530 val = lduw_p(ptr);
2531 break;
2532 }
2533 }
2534 return val;
2535 }
2536
2537 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2538 {
2539 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2540 }
2541
2542 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2543 {
2544 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2545 }
2546
2547 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2548 {
2549 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2550 }
2551
2552 /* warning: addr must be aligned. The ram page is not masked as dirty
2553 and the code inside is not invalidated. It is useful if the dirty
2554 bits are used to track modified PTEs */
2555 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2556 {
2557 uint8_t *ptr;
2558 MemoryRegion *mr;
2559 hwaddr l = 4;
2560 hwaddr addr1;
2561
2562 mr = address_space_translate(as, addr, &addr1, &l,
2563 true);
2564 if (l < 4 || !memory_access_is_direct(mr, true)) {
2565 io_mem_write(mr, addr1, val, 4);
2566 } else {
2567 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2568 ptr = qemu_get_ram_ptr(addr1);
2569 stl_p(ptr, val);
2570
2571 if (unlikely(in_migration)) {
2572 if (cpu_physical_memory_is_clean(addr1)) {
2573 /* invalidate code */
2574 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2575 /* set dirty bit */
2576 cpu_physical_memory_set_dirty_flag(addr1,
2577 DIRTY_MEMORY_MIGRATION);
2578 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2579 }
2580 }
2581 }
2582 }
2583
2584 /* warning: addr must be aligned */
2585 static inline void stl_phys_internal(AddressSpace *as,
2586 hwaddr addr, uint32_t val,
2587 enum device_endian endian)
2588 {
2589 uint8_t *ptr;
2590 MemoryRegion *mr;
2591 hwaddr l = 4;
2592 hwaddr addr1;
2593
2594 mr = address_space_translate(as, addr, &addr1, &l,
2595 true);
2596 if (l < 4 || !memory_access_is_direct(mr, true)) {
2597 #if defined(TARGET_WORDS_BIGENDIAN)
2598 if (endian == DEVICE_LITTLE_ENDIAN) {
2599 val = bswap32(val);
2600 }
2601 #else
2602 if (endian == DEVICE_BIG_ENDIAN) {
2603 val = bswap32(val);
2604 }
2605 #endif
2606 io_mem_write(mr, addr1, val, 4);
2607 } else {
2608 /* RAM case */
2609 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2610 ptr = qemu_get_ram_ptr(addr1);
2611 switch (endian) {
2612 case DEVICE_LITTLE_ENDIAN:
2613 stl_le_p(ptr, val);
2614 break;
2615 case DEVICE_BIG_ENDIAN:
2616 stl_be_p(ptr, val);
2617 break;
2618 default:
2619 stl_p(ptr, val);
2620 break;
2621 }
2622 invalidate_and_set_dirty(addr1, 4);
2623 }
2624 }
2625
2626 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2627 {
2628 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2629 }
2630
2631 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2632 {
2633 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2634 }
2635
2636 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2637 {
2638 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2639 }
2640
2641 /* XXX: optimize */
2642 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2643 {
2644 uint8_t v = val;
2645 address_space_rw(as, addr, &v, 1, 1);
2646 }
2647
2648 /* warning: addr must be aligned */
2649 static inline void stw_phys_internal(AddressSpace *as,
2650 hwaddr addr, uint32_t val,
2651 enum device_endian endian)
2652 {
2653 uint8_t *ptr;
2654 MemoryRegion *mr;
2655 hwaddr l = 2;
2656 hwaddr addr1;
2657
2658 mr = address_space_translate(as, addr, &addr1, &l, true);
2659 if (l < 2 || !memory_access_is_direct(mr, true)) {
2660 #if defined(TARGET_WORDS_BIGENDIAN)
2661 if (endian == DEVICE_LITTLE_ENDIAN) {
2662 val = bswap16(val);
2663 }
2664 #else
2665 if (endian == DEVICE_BIG_ENDIAN) {
2666 val = bswap16(val);
2667 }
2668 #endif
2669 io_mem_write(mr, addr1, val, 2);
2670 } else {
2671 /* RAM case */
2672 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2673 ptr = qemu_get_ram_ptr(addr1);
2674 switch (endian) {
2675 case DEVICE_LITTLE_ENDIAN:
2676 stw_le_p(ptr, val);
2677 break;
2678 case DEVICE_BIG_ENDIAN:
2679 stw_be_p(ptr, val);
2680 break;
2681 default:
2682 stw_p(ptr, val);
2683 break;
2684 }
2685 invalidate_and_set_dirty(addr1, 2);
2686 }
2687 }
2688
2689 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2690 {
2691 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2692 }
2693
2694 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2695 {
2696 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2697 }
2698
2699 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2700 {
2701 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2702 }
2703
2704 /* XXX: optimize */
2705 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2706 {
2707 val = tswap64(val);
2708 address_space_rw(as, addr, (void *) &val, 8, 1);
2709 }
2710
2711 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2712 {
2713 val = cpu_to_le64(val);
2714 address_space_rw(as, addr, (void *) &val, 8, 1);
2715 }
2716
2717 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2718 {
2719 val = cpu_to_be64(val);
2720 address_space_rw(as, addr, (void *) &val, 8, 1);
2721 }
2722
2723 /* virtual memory access for debug (includes writing to ROM) */
2724 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2725 uint8_t *buf, int len, int is_write)
2726 {
2727 int l;
2728 hwaddr phys_addr;
2729 target_ulong page;
2730
2731 while (len > 0) {
2732 page = addr & TARGET_PAGE_MASK;
2733 phys_addr = cpu_get_phys_page_debug(cpu, page);
2734 /* if no physical page mapped, return an error */
2735 if (phys_addr == -1)
2736 return -1;
2737 l = (page + TARGET_PAGE_SIZE) - addr;
2738 if (l > len)
2739 l = len;
2740 phys_addr += (addr & ~TARGET_PAGE_MASK);
2741 if (is_write) {
2742 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2743 } else {
2744 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2745 }
2746 len -= l;
2747 buf += l;
2748 addr += l;
2749 }
2750 return 0;
2751 }
2752 #endif
2753
2754 #if !defined(CONFIG_USER_ONLY)
2755
2756 /*
2757 * A helper function for the _utterly broken_ virtio device model to find out if
2758 * it's running on a big endian machine. Don't do this at home kids!
2759 */
2760 bool virtio_is_big_endian(void);
2761 bool virtio_is_big_endian(void)
2762 {
2763 #if defined(TARGET_WORDS_BIGENDIAN)
2764 return true;
2765 #else
2766 return false;
2767 #endif
2768 }
2769
2770 #endif
2771
2772 #ifndef CONFIG_USER_ONLY
2773 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2774 {
2775 MemoryRegion*mr;
2776 hwaddr l = 1;
2777
2778 mr = address_space_translate(&address_space_memory,
2779 phys_addr, &phys_addr, &l, false);
2780
2781 return !(memory_region_is_ram(mr) ||
2782 memory_region_is_romd(mr));
2783 }
2784
2785 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2786 {
2787 RAMBlock *block;
2788
2789 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2790 func(block->host, block->offset, block->length, opaque);
2791 }
2792 }
2793 #endif